From acc6eaa0effb33d5b75094f812833990c2d22e99 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 21 Mar 2017 11:50:07 +0100 Subject: [PATCH 01/82] new work manager --- diags.conf | 5 +- earthdiagnostics/datafile.py | 321 ++++++++++++++++++++++++ earthdiagnostics/diagnostic.py | 58 ++++- earthdiagnostics/earthdiags.py | 97 +------ earthdiagnostics/general/monthlymean.py | 26 +- earthdiagnostics/ocean/regionmean.py | 2 +- earthdiagnostics/work_manager.py | 143 +++++++++++ model_diags.conf | 2 +- 8 files changed, 546 insertions(+), 108 deletions(-) create mode 100644 earthdiagnostics/datafile.py create mode 100644 earthdiagnostics/work_manager.py diff --git a/diags.conf b/diags.conf index 38fc56a0..a584a84f 100644 --- a/diags.conf +++ b/diags.conf @@ -9,13 +9,14 @@ DATA_DIR = /esnas:/esarchive DATA_TYPE = exp # CMORization type to use. Important also for THREDDS as it affects variable name conventions. # Options: SPECS (default), PRIMAVERA, CMIP6 +DATA_CONVENTION = SPECS # Path to NEMO's mask and grid files needed for CDFTools CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = +DIAGS = monmean,tos,ocean # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. @@ -25,7 +26,7 @@ CDFTOOLS_PATH = ~jvegas/CDFTOOLS/bin # If true, copies the mesh files regardless of presence in scratch dir RESTORE_MESHES = False # Limits the maximum amount of threads used. Default: 0 (no limitation, one per virtual core available) -MAX_CORES = 1 +MAX_CORES = 2 [CMOR] # If true, recreates CMOR files regardless of presence. Default = False diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py new file mode 100644 index 00000000..1e7bd384 --- /dev/null +++ b/earthdiagnostics/datafile.py @@ -0,0 +1,321 @@ +# coding: utf-8 +import csv +import shutil +from datetime import datetime + +import numpy as np +import os +from bscearth.utils.log import Log + +from earthdiagnostics.utils import Utils, TempFile +from earthdiagnostics.variable import VariableManager +from earthdiagnostics.modelingrealm import ModelingRealms + + +class LocalStatus(object): + PENDING = 0 + DOWNLOADING = 1 + READY = 2 + FAILED = 3 + TO_COMPUTE = 4 + + +class StorageStatus(object): + PENDING = 0 + UPLOADING = 1 + READY = 2 + FAILED = 3 + + +class DataFile(object): + + def __init__(self, domain, var, frequency, data_convention): + self.local_path = None + self.upload_path = None + self.download_path = None + self.local_status = LocalStatus.PENDING + self.message = None + self.domain = domain + self.var = var + self.region = None + self.frequency = frequency + self.data_convention = data_convention + + self.cmor_var = VariableManager().get_variable(var, True) + + def download(self): + raise NotImplementedError() + + def upload(self): + self.storage_status = StorageStatus.UPLOADING + Utils.convert2netcdf4(self.local_path) + self._correct_metadata() + self._prepare_region() + self._rename_coordinate_variables() + Utils.move_file(self.local_path, self.upload_path) + self.storage_status = StorageStatus.READY + + def _correct_metadata(self): + if not self.cmor_var: + return + handler = Utils.openCdf(self.local_path) + var_handler = handler.variables[self.var] + self._fix_variable_name(var_handler) + handler.modeling_realm = self.cmor_var.domain.name + table = self.cmor_var.get_table(self.frequency, self.data_convention) + handler.table_id = 'Table {0} ({1})'.format(table.name, table.date) + if self.cmor_var.units: + self._fix_units(var_handler) + handler.sync() + self._fix_coordinate_variables_metadata(handler) + var_type = var_handler.dtype + handler.close() + self._fix_values_metadata(var_type) + + def _fix_variable_name(self, var_handler): + var_handler.standard_name = self.cmor_var.standard_name + var_handler.long_name = self.cmor_var.long_name + var_handler.short_name = self.cmor_var.short_name + + def _fix_values_metadata(self, var_type): + if self.cmor_var.valid_min != '': + valid_min = '-a valid_min,{0},o,{1},"{2}" '.format(self.var, var_type.char, self.cmor_var.valid_min) + else: + valid_min = '' + if self.cmor_var.valid_max != '': + valid_max = '-a valid_max,{0},o,{1},"{2}" '.format(self.var, var_type.char, self.cmor_var.valid_max) + else: + valid_max = '' + Utils.nco.ncatted(input=self.local_path, output=self.local_path, + options='-O -a _FillValue,{0},o,{1},"1.e20" ' + '-a missingValue,{0},o,{1},"1.e20" {2}{3}'.format(self.var, var_type.char, + valid_min, valid_max)) + + def _fix_coordinate_variables_metadata(self, handler): + if 'lev' in handler.variables: + handler.variables['lev'].short_name = 'lev' + if self.domain == ModelingRealms.ocean: + handler.variables['lev'].standard_name = 'depth' + if 'lon' in handler.variables: + handler.variables['lon'].short_name = 'lon' + handler.variables['lon'].standard_name = 'longitude' + if 'lat' in handler.variables: + handler.variables['lat'].short_name = 'lat' + handler.variables['lat'].standard_name = 'latitude' + + def _fix_units(self, var_handler): + if 'units' not in var_handler.ncattrs(): + return + if var_handler.units == '-': + var_handler.units = '1.0' + if var_handler.units == 'PSU': + var_handler.units = 'psu' + if var_handler.units == 'C' and self.cmor_var.units == 'K': + var_handler.units = 'deg_C' + if self.cmor_var.units != var_handler.units: + self._convert_units(var_handler) + var_handler.units = self.cmor_var.units + + def _convert_units(self, var_handler): + try: + Utils.convert_units(var_handler, self.cmor_var.units) + except ValueError as ex: + Log.warning('Can not convert {3} from {0} to {1}: {2}', var_handler.units, self.cmor_var.units, ex, + self.cmor_var.short_name) + factor, offset = UnitConversion.get_conversion_factor_offset(var_handler.units, + self.cmor_var.units) + + var_handler[:] = var_handler[:] * factor + offset + if 'valid_min' in var_handler.ncattrs(): + var_handler.valid_min = float(var_handler.valid_min) * factor + offset + if 'valid_max' in var_handler.ncattrs(): + var_handler.valid_max = float(var_handler.valid_max) * factor + offset + + def _prepare_region(self): + if not self.region: + return + if not os.path.exists(self.upload_path): + self._add_region_dimension_to_var() + else: + self._update_var_with_region_data() + self._correct_metadata() + Utils.nco.ncks(input=self.local_path, output=self.local_path, options='-O --fix_rec_dmn region') + + def _update_var_with_region_data(self): + temp = TempFile.get() + shutil.copyfile(self.upload_path, temp) + Utils.nco.ncks(input=temp, output=temp, options='-O --mk_rec_dmn region') + handler = Utils.openCdf(temp) + handler_send = Utils.openCdf(self.local_path) + value = handler_send.variables[self.var][:] + var_region = handler.variables['region'] + basin_index = np.where(var_region[:] == self.region) + if len(basin_index[0]) == 0: + var_region[var_region.shape[0]] = self.region + basin_index = var_region.shape[0] - 1 + + else: + basin_index = basin_index[0][0] + handler.variables[self.var][..., basin_index] = value + handler.close() + handler_send.close() + Utils.move_file(temp, self.local_path) + + def _add_region_dimension_to_var(self): + handler = Utils.openCdf(self.local_path) + handler.createDimension('region') + var_region = handler.createVariable('region', str, 'region') + var_region[0] = self.region + original_var = handler.variables[self.var] + new_var = handler.createVariable('new_var', original_var.datatype, + original_var.dimensions + ('region',)) + new_var.setncatts({k: original_var.getncattr(k) for k in original_var.ncattrs()}) + value = original_var[:] + new_var[..., 0] = value + handler.close() + Utils.nco.ncks(input=self.local_path, output=self.local_path, options='-O -x -v {0}'.format(self.var)) + Utils.rename_variable(self.local_path, 'new_var', self.var) + + def _rename_coordinate_variables(self): + variables = dict() + variables['x'] = 'i' + variables['y'] = 'j' + variables['nav_lat_grid_V'] = 'lat' + variables['nav_lon_grid_V'] = 'lon' + variables['nav_lat_grid_U'] = 'lat' + variables['nav_lon_grid_U'] = 'lon' + variables['nav_lat_grid_T'] = 'lat' + variables['nav_lon_grid_T'] = 'lon' + Utils.rename_variables(self.local_path, variables, False, True) + + def add_diagnostic_history(self, diagnostic): + from earthdiagnostics.earthdiags import EarthDiags + history_line = 'Diagnostic {1} calculated with EarthDiagnostics version {0}'.format(EarthDiags.version, + diagnostic) + self._add_history_line(history_line) + + def add_cmorization_history(self): + from earthdiagnostics.earthdiags import EarthDiags + history_line = 'CMORized with Earthdiagnostics version {0}'.format(EarthDiags.version) + self._add_history_line(history_line) + + def _add_history_line(self, history_line): + utc_datetime = 'UTC ' + datetime.utcnow().isoformat() + history_line = '{0}: {1};'.format(utc_datetime, history_line) + + handler = Utils.openCdf(self.local_path) + try: + history_line = history_line + handler.history + except AttributeError: + history_line = history_line + handler.history = Utils.convert_to_ASCII_if_possible(history_line) + handler.close() + + +class UnitConversion(object): + """ + Class to manage unit conversions + """ + _dict_conversions = None + + @classmethod + def load_conversions(cls): + """ + Load conversions from the configuration file + """ + cls._dict_conversions = dict() + with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'conversions.csv'), 'rb') as csvfile: + reader = csv.reader(csvfile, dialect='excel') + for line in reader: + if line[0] == 'original': + continue + cls.add_conversion(UnitConversion(line[0], line[1], line[2], line[3])) + + @classmethod + def add_conversion(cls, conversion): + """ + Adds a conversion to the dictionary + + :param conversion: conversion to add + :type conversion: UnitConversion + """ + cls._dict_conversions[(conversion.source, conversion.destiny)] = conversion + + def __init__(self, source, destiny, factor, offset): + self.source = source + self.destiny = destiny + self.factor = float(factor) + self.offset = float(offset) + + @classmethod + def get_conversion_factor_offset(cls, input_units, output_units): + """ + Gets the conversion factor and offset for two units . The conversion has to be done in the following way: + converted = original * factor + offset + + :param input_units: original units + :type input_units: str + :param output_units: destiny units + :type output_units: str + :return: factor and offset + :rtype: [float, float] + """ + units = input_units.split() + if len(units) == 1: + scale_unit = 1 + unit = units[0] + else: + if '^' in units[0]: + values = units[0].split('^') + scale_unit = pow(int(values[0]), int(values[1])) + else: + scale_unit = float(units[0]) + unit = units[1] + + units = output_units.split() + if len(units) == 1: + scale_new_unit = 1 + new_unit = units[0] + else: + if '^' in units[0]: + values = units[0].split('^') + scale_new_unit = pow(int(values[0]), int(values[1])) + else: + scale_new_unit = float(units[0]) + new_unit = units[1] + + factor, offset = UnitConversion._get_factor(new_unit, unit) + if factor is None: + return None, None + factor = factor * scale_unit / float(scale_new_unit) + offset /= float(scale_new_unit) + + return factor, offset + + @classmethod + def _get_factor(cls, new_unit, unit): + # Add only the conversions with a factor greater than 1 + if unit == new_unit: + return 1, 0 + elif (unit, new_unit) in cls._dict_conversions: + conversion = cls._dict_conversions[(unit, new_unit)] + return conversion.factor, conversion.offset + elif (new_unit, unit) in cls._dict_conversions: + conversion = cls._dict_conversions[(new_unit, unit)] + return 1 / conversion.factor, -conversion.offset + else: + return None, None + + +class THREDDSData(DataFile): + + def download(self): + self.local_path = TempFile.get() + Utils.execute_shell_command(['nccopy', '-s', '-d', '-4', self.download_path, self.local_path]) + if not Utils.check_netcdf_file(self.local_path): + self.message = 'Can not retrieve {0} from server'.format(self.download_path) + self.status = DataStatus.UNAVAILABLE + return + self.status = DataStatus.ON_LOCAL + + diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index 2a235977..262c4fbd 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -6,6 +6,14 @@ from earthdiagnostics.modelingrealm import ModelingRealms from earthdiagnostics.variable import VariableManager +class DiagnosticStatus(object): + + WAITING = 0 + READY = 1 + RUNNING = 2 + COMPLETED = 3 + FAILED = 4 + class Diagnostic(object): """ Base class for the diagnostics. Provides a common interface for them and also @@ -23,9 +31,8 @@ class Diagnostic(object): def __init__(self, data_manager): self.data_manager = data_manager - self.required_vars = [] - self.generated_vars = [] self.can_run_multiple_instances = True + self.status = DiagnosticStatus.WAITING def __repr__(self): return str(self) @@ -97,6 +104,53 @@ class Diagnostic(object): """ raise NotImplementedError("Class must override compute method") + def request_data(self): + """ + Calculates the diagnostic and stores the output + + Must be implemented by derived classes + """ + pass + + def declare_data_generated(self): + """ + Calculates the diagnostic and stores the output + + Must be implemented by derived classes + """ + pass + + def declare_data(self, filetosend, domain, var, startdate, member, chunk=None, grid=None, region=None, + box=None, rename_var=None, frequency=None, year=None, date_str=None, move_old=False, + vartype=VariableType.MEAN): + """ + + :param filetosend: + :param domain: + :type domain: ModelingRealm + :param var: + :param startdate: + :param member: + :param chunk: + :param grid: + :param region: + :param box: + :param rename_var: + :param frequency: + :type frequency: Frequency + :param year: + :param date_str: + :param move_old: + :param vartype: Variable type (mean, statistic) + :type vartype: VariableType + :return: + """ + if isinstance(region, Basin): + region = region.fullname + self.data_manager.send_file(filetosend, domain, var, startdate, member, chunk, grid, region, + box, rename_var, frequency, year, date_str, move_old, diagnostic=self, + vartype=vartype) + @classmethod def generate_jobs(cls, diags, options): """ diff --git a/earthdiagnostics/earthdiags.py b/earthdiagnostics/earthdiags.py index af81f259..4f11ab66 100755 --- a/earthdiagnostics/earthdiags.py +++ b/earthdiagnostics/earthdiags.py @@ -23,6 +23,7 @@ from earthdiagnostics.general import * from earthdiagnostics.statistics import * from earthdiagnostics.variable import VariableManager from earthdiagnostics.diagnostic import DiagnosticOptionError +from work_manager import WorkManager class EarthDiags(object): @@ -155,32 +156,9 @@ class EarthDiags(object): # Run diagnostics Log.info('Running diagnostics') - list_jobs = self.prepare_job_list() - self._failed_jobs = [] - - time = datetime.datetime.now() - Log.info("Starting to compute at {0}", time) - self.threads = Utils.available_cpu_count() - if 0 < self.config.max_cores < self.threads: - self.threads = self.config.max_cores - Log.info('Using {0} threads', self.threads) - threads = list() - for num_thread in range(0, self.threads): - self.time[num_thread] = dict() - t = threading.Thread(target=EarthDiags._run_jobs, args=(self, list_jobs, num_thread)) - threads.append(t) - t.start() - - for t in threads: - t.join() - - TempFile.clean() - finish_time = datetime.datetime.now() - Log.result("Diagnostics finished at {0}", finish_time) - Log.result("Elapsed time: {0}\n", finish_time - time) - self.print_errors() - self.print_stats() - return not self.had_errors + + work_manager = WorkManager(self.config, self.prepare_job_list()) + return work_manager.run() def _prepare_data_manager(self): if self.config.data_adaptor == 'CMOR': @@ -189,31 +167,8 @@ class EarthDiags(object): self.data_manager = THREDDSManager(self.config) self.data_manager.prepare() - def print_stats(self): - Log.info('Time consumed by each diagnostic class') - Log.info('--------------------------------------') - total = dict() - for num_thread in range(0, self.threads): - for key, value in self.time[num_thread].items(): - if key in total: - total[key] += value - else: - total[key] = value - for diag, time in sorted(total.items(), key=operator.itemgetter(1)): - Log.info('{0:23} {1:}', diag.__name__, time) - - def print_errors(self): - if len(self._failed_jobs) == 0: - return - self.had_errors = True - Log.error('Failed jobs') - Log.error('-----------') - for job in self._failed_jobs: - Log.error(str(job)) - Log.info('') - def prepare_job_list(self): - list_jobs = Queue.Queue() + list_jobs = list() for fulldiag in self.config.get_commands(): Log.info("Adding {0} to diagnostic list", fulldiag) diag_options = fulldiag.split(',') @@ -222,7 +177,7 @@ class EarthDiags(object): if diag_class: try: for job in diag_class.generate_jobs(self, diag_options): - list_jobs.put(job) + list_jobs.append(job) continue except DiagnosticOptionError as ex: Log.error('Can not configure diagnostic {0}: {1}', diag_options[0], ex) @@ -333,46 +288,6 @@ class EarthDiags(object): file_handler.write('{0:12}: {1}\n'.format(var.short_name, var.standard_name)) file_handler.close() - def _run_jobs(self, queue, numthread): - def _run_job(current_job, retrials=1): - while retrials >= 0: - try: - Log.info('Starting {0}', current_job) - time = datetime.datetime.now() - current_job.compute() - time = datetime.datetime.now() - time - if type(current_job) in self.time[numthread]: - self.time[numthread][type(current_job)] += time - else: - self.time[numthread][type(current_job)] = time - Log.result('Finished {0}', current_job) - return True - except Exception as ex: - retrials -= 1 - Log.error('Job {0} failed: {1}', job, ex) - return False - count = 0 - failed_jobs = list() - - while not queue.empty(): - try: - job = queue.get(timeout=1) - if _run_job(job): - count += 1 - else: - failed_jobs.append(str(job)) - queue.task_done() - except Queue.Empty: - continue - - if len(failed_jobs) == 0: - Log.result('Thread {0} finished after taking care of {1} tasks', numthread, count) - else: - Log.result('Thread {0} finished after running successfully {1} of {2} tasks', numthread, count, - count + len(failed_jobs)) - self._failed_jobs += failed_jobs - return - def _prepare_mesh_files(self): Log.info('Copying mesh files') con_files = self.config.con_files diff --git a/earthdiagnostics/general/monthlymean.py b/earthdiagnostics/general/monthlymean.py index 3052a54a..57696302 100644 --- a/earthdiagnostics/general/monthlymean.py +++ b/earthdiagnostics/general/monthlymean.py @@ -77,29 +77,33 @@ class MonthlyMean(Diagnostic): for startdate, member, chunk in diags.config.experiment.get_chunk_list(): job_list.append(MonthlyMean(diags.data_manager, startdate, member, chunk, options['domain'], options['variable'], options['frequency'], options['grid'])) + return job_list + def request_data(self): + self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + frequency=self.frequency, grid=self.grid) + + def declare_data_generated(self): + self.monmean = self.declare_data(self.domain, self.variable, self.startdate, self.member, self.chunk, + frequency=Frequencies.monthly, grid=self.grid) + def compute(self): """ Runs the diagnostic """ - monmean = TempFile.get() - variable_file = self.data_manager.get_file(self.domain, self.variable, self.startdate, self.member, self.chunk, - frequency=self.frequency, grid=self.grid) - handler = Utils.openCdf(variable_file) + handler = Utils.openCdf(self.variable_file.local_path) if 'region' in handler.variables: noregion = TempFile.get() - Utils.nco.ncks(input=variable_file, output=noregion, options='-O -C -x -v region') - Utils.cdo.monmean(input=noregion, output=monmean) - monmean_handler = Utils.openCdf(monmean) + Utils.nco.ncks(input=self.variable_file.local_path, output=noregion, options='-O -C -x -v region') + Utils.cdo.monmean(input=noregion, output=self.monmean.local_path) + monmean_handler = Utils.openCdf(self.monmean.local_path) Utils.copy_variable(handler, monmean_handler, 'region') monmean_handler.close() else: - Utils.cdo.monmean(input=variable_file, output=monmean) + Utils.cdo.monmean(input=self.variable_file.local_path, output=self.monmean.local_path) handler.close() + self.monmean.ready() - os.remove(variable_file) - self.send_file(monmean, self.domain, self.variable, self.startdate, self.member, self.chunk, - frequency=Frequencies.monthly, grid=self.grid) diff --git a/earthdiagnostics/ocean/regionmean.py b/earthdiagnostics/ocean/regionmean.py index 0a0d0b98..e815b12c 100644 --- a/earthdiagnostics/ocean/regionmean.py +++ b/earthdiagnostics/ocean/regionmean.py @@ -75,7 +75,7 @@ class RegionMean(Diagnostic): """ options_available = (DiagnosticDomainOption('domain'), DiagnosticVariableOption('variable'), - DiagnosticOption('grid_point', ''), + DiagnosticOption('grid_point', 'T'), DiagnosticBasinOption('basin', Basins.Global), DiagnosticIntOption('min_depth', 0), DiagnosticIntOption('max_depth', 0), diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py new file mode 100644 index 00000000..c2931721 --- /dev/null +++ b/earthdiagnostics/work_manager.py @@ -0,0 +1,143 @@ +# coding=utf-8 +import datetime +import threading + +import operator +from time import sleep + +from bscearth.utils.log import Log + +from diagnostic import DiagnosticStatus +from utils import Utils, TempFile + + +class WorkManager(object): + + def __init__(self, config, job_list): + + self.pending_jobs = job_list + self.running_jobs = [] + self.finished_jobs = [] + self.config = config + + def run(self): + + time = datetime.datetime.now() + Log.info("Starting to compute at {0}", time) + self.threads = Utils.available_cpu_count() + if 0 < self.config.max_cores < self.threads: + self.threads = self.config.max_cores + Log.info('Using {0} threads', self.threads) + + self.job_runners = [JobRunner() for x in range(0, self.threads)] + + while len(self.pending_jobs) > 0 or len(self.running_jobs) > 0: + if len(self.pending_jobs) > 0: + for runner in self.job_runners: + if runner.next_job is None: + next_job = self.pending_jobs.pop() + runner.set_next_job(next_job) + self.running_jobs.append(next_job) + + if len(self.running_jobs) > 0: + for job in self.running_jobs: + if job.status in (DiagnosticStatus.COMPLETED, DiagnosticStatus.FAILED): + self.finished_jobs.append(job) + self.running_jobs.remove(job) + sleep(0.1) + + TempFile.clean() + finish_time = datetime.datetime.now() + Log.result("Diagnostics finished at {0}", finish_time) + Log.result("Elapsed time: {0}\n", finish_time - time) + self.print_errors() + self.print_stats() + return not self.had_errors + + def print_stats(self): + Log.info('Time consumed by each diagnostic class') + Log.info('--------------------------------------') + total = dict() + for runner in self.job_runners: + for key, value in runner.time.items(): + if key in total: + total[key] += value + else: + total[key] = value + for diag, time in sorted(total.items(), key=operator.itemgetter(1)): + Log.info('{0:23} {1:}', diag.__name__, time) + + def print_errors(self): + failed = [job for job in self.running_jobs if job.status == DiagnosticStatus.FAILED] + if len(failed) == 0: + return + self.had_errors = True + Log.error('Failed jobs') + Log.error('-----------') + for job in failed: + Log.error(str(job)) + Log.info('') + + +class JobRunner(object): + + def __init__(self): + self.next_job = None + self.current_job = None + self.time = {} + self._stop = False + self._is_running = False + self.lock = threading.Lock() + self.lock.acquire() + + def stop(self): + self._stop = True + self.lock.release() + + def set_next_job(self, job): + self.next_job = job + self.lock.release() + + def is_running(self): + return self._is_running + + def start(self): + threading.Thread(target=JobRunner._run_jobs, args=(self,)) + + def keep_running(self): + self.lock.acquire() + if self.stop: + return False + + if self.next_job is not None: + self.current_job = self.next_job + self.next_job = None + + def _run_jobs(self): + self._is_running = True + while self.keep_running(): + self._run_job() + self._is_running = False + return + + def _run_job(self): + try: + Log.info('Starting {0}', self.current_job) + self.current_job.status = DiagnosticStatus.RUNNING + time = datetime.datetime.now() + self.current_job.compute() + time = datetime.datetime.now() - time + if type(self.current_job) in self.time: + self.time[type(self.current_job)] += time + else: + self.time[type(self.current_job)] = time + Log.result('Finished {0}', self.current_job) + self.current_job.status = DiagnosticStatus.COMPLETED + return True + except Exception as ex: + + self.current_job.message = str(ex) + Log.error('Job {0} failed: {1}', self.current_job, ex) + + count = 0 + failed_jobs = list() diff --git a/model_diags.conf b/model_diags.conf index b55c2bc0..9967e075 100644 --- a/model_diags.conf +++ b/model_diags.conf @@ -121,7 +121,7 @@ CHUNKS = # If true, CMORizes atmosphere files. Default = True # ATMOSPHERE_FILES = True -# You can specify the variable to cmorize, in the way domain:var domain:var2 domain2:var +# You can specify the variable to cmorize, in the way domain:var domain:var2 domain2:var, i.e ocean:thetao atmos:tas # VARIABLE_LIST = # Variables to be CMORized from the grib atmospheric files, separated by comma. -- GitLab From bce735b20c13ed647e74228b3ced8d295e1071a5 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 23 Mar 2017 15:32:40 +0100 Subject: [PATCH 02/82] Woek towards a better work_manager --- diags.conf | 2 +- earthdiagnostics/cmormanager.py | 14 ++--- earthdiagnostics/diagnostic.py | 2 +- earthdiagnostics/work_manager.py | 96 +++++++------------------------- setup.py | 2 +- 5 files changed, 28 insertions(+), 88 deletions(-) diff --git a/diags.conf b/diags.conf index a584a84f..0cef5f7f 100644 --- a/diags.conf +++ b/diags.conf @@ -70,7 +70,7 @@ SERVER_URL = https://earth.bsc.es/thredds [EXPERIMENT] # Experiments parameters as defined in CMOR standard INSTITUTE = BSC -MODEL = EC-EARTH +MODEL = EC-EARTH3 # Model version: Available versions MODEL_VERSION =Ec3.2_O1L75 # Atmospheric output timestep in hours diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index 10c45421..592897e6 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -470,17 +470,13 @@ class CMORManager(DataManager): self.experiment.model) Log.debug('Correcting double model appearance') for (dirpath, dirnames, filenames) in os.walk(bad_path, False): - for filename in filenames: - if '_S{0}_'.format(startdate) in filename: - continue - if self._get_member_str(member) in filename: - continue filepath = os.path.join(dirpath, filename) - good = filepath.replace('_{0}_output_'.format(self.experiment.model), - '_{0}_{1}_S{2}_'.format(self.experiment.model, - self.experiment.experiment_name, - startdate)) + good = filepath + good = good.replace('_{0}_output_'.format(self.experiment.model), + '_{0}_{1}_S{2}_'.format(self.experiment.model, + self.experiment.experiment_name, + startdate)) good = good.replace('/{0}/{0}'.format(self.experiment.model), '/{0}/{1}'.format(self.experiment.model, diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index 262c4fbd..d40c500b 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -7,13 +7,13 @@ from earthdiagnostics.variable import VariableManager class DiagnosticStatus(object): - WAITING = 0 READY = 1 RUNNING = 2 COMPLETED = 3 FAILED = 4 + class Diagnostic(object): """ Base class for the diagnostics. Provides a common interface for them and also diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index c2931721..d258d780 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -1,24 +1,22 @@ # coding=utf-8 import datetime -import threading import operator -from time import sleep from bscearth.utils.log import Log from diagnostic import DiagnosticStatus from utils import Utils, TempFile +from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor class WorkManager(object): def __init__(self, config, job_list): - self.pending_jobs = job_list - self.running_jobs = [] - self.finished_jobs = [] self.config = config + self.time = {} + self.had_errors = False def run(self): @@ -29,29 +27,16 @@ class WorkManager(object): self.threads = self.config.max_cores Log.info('Using {0} threads', self.threads) - self.job_runners = [JobRunner() for x in range(0, self.threads)] - - while len(self.pending_jobs) > 0 or len(self.running_jobs) > 0: - if len(self.pending_jobs) > 0: - for runner in self.job_runners: - if runner.next_job is None: - next_job = self.pending_jobs.pop() - runner.set_next_job(next_job) - self.running_jobs.append(next_job) - - if len(self.running_jobs) > 0: - for job in self.running_jobs: - if job.status in (DiagnosticStatus.COMPLETED, DiagnosticStatus.FAILED): - self.finished_jobs.append(job) - self.running_jobs.remove(job) - sleep(0.1) + with ThreadPoolExecutor(self.threads) as executor: + for job in self.pending_jobs: + executor.submit(self._run_job, job) TempFile.clean() finish_time = datetime.datetime.now() Log.result("Diagnostics finished at {0}", finish_time) Log.result("Elapsed time: {0}\n", finish_time - time) - self.print_errors() - self.print_stats() + # self.print_errors() + # self.print_stats() return not self.had_errors def print_stats(self): @@ -78,66 +63,25 @@ class WorkManager(object): Log.error(str(job)) Log.info('') - -class JobRunner(object): - - def __init__(self): - self.next_job = None - self.current_job = None - self.time = {} - self._stop = False - self._is_running = False - self.lock = threading.Lock() - self.lock.acquire() - - def stop(self): - self._stop = True - self.lock.release() - - def set_next_job(self, job): - self.next_job = job - self.lock.release() - - def is_running(self): - return self._is_running - - def start(self): - threading.Thread(target=JobRunner._run_jobs, args=(self,)) - - def keep_running(self): - self.lock.acquire() - if self.stop: - return False - - if self.next_job is not None: - self.current_job = self.next_job - self.next_job = None - - def _run_jobs(self): - self._is_running = True - while self.keep_running(): - self._run_job() - self._is_running = False - return - - def _run_job(self): + def _run_job(self, job): try: - Log.info('Starting {0}', self.current_job) - self.current_job.status = DiagnosticStatus.RUNNING + Log.info('Starting {0}', job) + job.status = DiagnosticStatus.RUNNING time = datetime.datetime.now() - self.current_job.compute() + job.compute() time = datetime.datetime.now() - time - if type(self.current_job) in self.time: - self.time[type(self.current_job)] += time + if type(job) in self.time: + self.time[type(job)] += time else: - self.time[type(self.current_job)] = time - Log.result('Finished {0}', self.current_job) - self.current_job.status = DiagnosticStatus.COMPLETED + self.time[type(job)] = time + Log.result('Finished {0}', job) + job.status = DiagnosticStatus.COMPLETED return True except Exception as ex: - self.current_job.message = str(ex) - Log.error('Job {0} failed: {1}', self.current_job, ex) + job.message = str(ex) + Log.error('Job {0} failed: {1}', job, ex) + self.semaphore.release() count = 0 failed_jobs = list() diff --git a/setup.py b/setup.py index 3e079b5c..ccdb77e7 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ setup( keywords=['climate', 'weather', 'diagnostic'], setup_requires=['pyproj'], install_requires=['numpy', 'netCDF4', 'bscearth.utils', 'cdo', 'nco', 'cfunits>=1.1.4', 'coverage', - 'pygrib', 'openpyxl', 'mock'], + 'pygrib', 'openpyxl', 'mock', 'futures'], packages=find_packages(), include_package_data=True, scripts=['bin/earthdiags'] -- GitLab From 1c5aaeea66cea72923701c6073c1a7d9a068f4da Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 23 Mar 2017 17:03:24 +0100 Subject: [PATCH 03/82] Request data ready --- diags.conf | 2 +- earthdiagnostics/cmormanager.py | 31 ++++++++++++ earthdiagnostics/datamanager.py | 61 +++++++++++++++-------- earthdiagnostics/earthdiags.py | 2 +- earthdiagnostics/general/monthlymean.py | 16 +++--- earthdiagnostics/work_manager.py | 66 ++++++++++++++----------- 6 files changed, 119 insertions(+), 59 deletions(-) diff --git a/diags.conf b/diags.conf index 0cef5f7f..df2f238a 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = monmean,tos,ocean +DIAGS = monmean,tos,ocean,mon # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index 592897e6..60ac6e24 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -90,6 +90,37 @@ class CMORManager(DataManager): Utils.copy_file(filepath, temp_path) return temp_path + def request_chunk(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None): + """ + Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + + :param domain: CMOR domain + :type domain: Domain + :param var: variable name + :type var: str + :param startdate: file's startdate + :type startdate: str + :param member: file's member + :type member: int + :param chunk: file's chunk + :type chunk: int + :param grid: file's grid (only needed if it is not the original) + :type grid: str|NoneType + :param box: file's box (only needed to retrieve sections or averages) + :type box: Box + :param frequency: file's frequency (only needed if it is different from the default) + :type frequency: Frequency|NoneType + :param vartype: Variable type (mean, statistic) + :type vartype: VariableType + :return: path to the copy created on the scratch folder + :rtype: str + """ + cmor_var = self.variable_list.get_variable(var) + var = self._get_final_var_name(box, var) + filepath = self.get_file_path(startdate, member, domain, var, cmor_var, chunk, frequency, grid, None, None) + + return self._get_file_from_storage(filepath) + def get_file_path(self, startdate, member, domain, var, cmor_var, chunk, frequency, grid=None, year=None, date_str=None): """ diff --git a/earthdiagnostics/datamanager.py b/earthdiagnostics/datamanager.py index bdc06a65..2945527b 100644 --- a/earthdiagnostics/datamanager.py +++ b/earthdiagnostics/datamanager.py @@ -8,10 +8,9 @@ import numpy as np import os import re from bscearth.utils.log import Log -from cfunits import Units from earthdiagnostics.utils import Utils, TempFile -from earthdiagnostics.variable import Variable, VariableManager +from earthdiagnostics.variable import VariableManager from earthdiagnostics.variable_type import VariableType from earthdiagnostics.modelingrealm import ModelingRealms @@ -31,6 +30,7 @@ class DataManager(object): self.variable_list.load_variables(self.config.data_convention) UnitConversion.load_conversions() self.lock = threading.Lock() + self.requested_files = {} def file_exists(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=VariableType.MEAN): @@ -60,6 +60,11 @@ class DataManager(object): """ raise NotImplementedError() + def _get_file_from_storage(self, filepath): + if filepath not in self.requested_files: + self.requested_files[filepath] = NetCDFFile.from_storage(filepath) + return self.requested_files[filepath] + def get_file(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=VariableType.MEAN): """ @@ -267,26 +272,42 @@ class NetCDFFile(object): """ Class to manage netCDF file and pr - :param remote_file: - :type remote_file: str - :param local_file: - :type local_file: str - :param domain: - :type domain: Domain - :param var: - :type var: str - :param cmor_var: - :type cmor_var: Variable """ - def __init__(self, remote_file, local_file, domain, var, cmor_var, data_convention, region): - self.remote_file = remote_file - self.local_file = local_file - self.domain = domain - self.var = var - self.cmor_var = cmor_var - self.region = region + def __init__(self): + self.remote_file = None + self.local_file = None + self.domain = None + self.var = None + self.cmor_var = None + self.region = None self.frequency = None - self.data_convention = data_convention + self.data_convention = None + + @staticmethod + def from_storage(filepath): + file_object = NetCDFFile() + file_object.remote_file = filepath + return file_object + + @staticmethod + def to_storage(domain, var, cmor_var, data_convention, region): + new_object = NetCDFFile() + new_object.domain = domain + new_object.var = var + new_object.cmor_var = cmor_var + new_object.region = region + new_object.frequency = None + new_object.data_convention = data_convention + return new_object + + def download(self): + try: + if not self.local_file: + self.local_file = TempFile.get() + Utils.copy_file(self.remote_file, self.local_file) + except Exception as ex: + os.remove(self.local_file) + Log.error('File {0} not available: {1}', self.remote_file, ex) def send(self): Utils.convert2netcdf4(self.local_file) diff --git a/earthdiagnostics/earthdiags.py b/earthdiagnostics/earthdiags.py index 4f11ab66..9647f5ef 100755 --- a/earthdiagnostics/earthdiags.py +++ b/earthdiagnostics/earthdiags.py @@ -157,7 +157,7 @@ class EarthDiags(object): # Run diagnostics Log.info('Running diagnostics') - work_manager = WorkManager(self.config, self.prepare_job_list()) + work_manager = WorkManager(self.config, self.prepare_job_list(), self.data_manager) return work_manager.run() def _prepare_data_manager(self): diff --git a/earthdiagnostics/general/monthlymean.py b/earthdiagnostics/general/monthlymean.py index 57696302..71b5fa38 100644 --- a/earthdiagnostics/general/monthlymean.py +++ b/earthdiagnostics/general/monthlymean.py @@ -29,7 +29,7 @@ class MonthlyMean(Diagnostic): :param domain: variable's domain :type domain: ModelingRealm :param frequency: original frequency - :type frequency: str + :type frequency: Frequency :param grid: original data grid :type grid: str """ @@ -81,8 +81,8 @@ class MonthlyMean(Diagnostic): return job_list def request_data(self): - self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, - frequency=self.frequency, grid=self.grid) + self.variable_file = self.data_manager.request_chunk(self.domain, self.variable, self.startdate, self.member, + self.chunk, frequency=self.frequency, grid=self.grid) def declare_data_generated(self): self.monmean = self.declare_data(self.domain, self.variable, self.startdate, self.member, self.chunk, @@ -92,16 +92,16 @@ class MonthlyMean(Diagnostic): """ Runs the diagnostic """ - handler = Utils.openCdf(self.variable_file.local_path) + handler = Utils.openCdf(self.variable_file.local_file) if 'region' in handler.variables: noregion = TempFile.get() - Utils.nco.ncks(input=self.variable_file.local_path, output=noregion, options='-O -C -x -v region') - Utils.cdo.monmean(input=noregion, output=self.monmean.local_path) - monmean_handler = Utils.openCdf(self.monmean.local_path) + Utils.nco.ncks(input=self.variable_file.local_file, output=noregion, options='-O -C -x -v region') + Utils.cdo.monmean(input=noregion, output=self.monmean.local_file) + monmean_handler = Utils.openCdf(self.monmean.local_file) Utils.copy_variable(handler, monmean_handler, 'region') monmean_handler.close() else: - Utils.cdo.monmean(input=self.variable_file.local_path, output=self.monmean.local_path) + Utils.cdo.monmean(input=self.variable_file.local_file, output=self.monmean.local_file) handler.close() self.monmean.ready() diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index d258d780..9af5807a 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -12,11 +12,12 @@ from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor class WorkManager(object): - def __init__(self, config, job_list): + def __init__(self, config, job_list, data_manager): self.pending_jobs = job_list self.config = config self.time = {} self.had_errors = False + self.data_manager = data_manager def run(self): @@ -27,9 +28,18 @@ class WorkManager(object): self.threads = self.config.max_cores Log.info('Using {0} threads', self.threads) - with ThreadPoolExecutor(self.threads) as executor: - for job in self.pending_jobs: - executor.submit(self._run_job, job) + for job in self.pending_jobs: + job.request_data() + + self.downloader = ThreadPoolExecutor(1) + self.uploader = ThreadPoolExecutor(1) + self.executor = ThreadPoolExecutor(self.threads) + + for file_object in self.data_manager.requested_files.values(): + self.downloader.submit(file_object.download) + + for job in self.pending_jobs: + self.executor.submit(self._run_job, job) TempFile.clean() finish_time = datetime.datetime.now() @@ -39,29 +49,29 @@ class WorkManager(object): # self.print_stats() return not self.had_errors - def print_stats(self): - Log.info('Time consumed by each diagnostic class') - Log.info('--------------------------------------') - total = dict() - for runner in self.job_runners: - for key, value in runner.time.items(): - if key in total: - total[key] += value - else: - total[key] = value - for diag, time in sorted(total.items(), key=operator.itemgetter(1)): - Log.info('{0:23} {1:}', diag.__name__, time) - - def print_errors(self): - failed = [job for job in self.running_jobs if job.status == DiagnosticStatus.FAILED] - if len(failed) == 0: - return - self.had_errors = True - Log.error('Failed jobs') - Log.error('-----------') - for job in failed: - Log.error(str(job)) - Log.info('') + # def print_stats(self): + # Log.info('Time consumed by each diagnostic class') + # Log.info('--------------------------------------') + # total = dict() + # for runner in self.job_runners: + # for key, value in runner.time.items(): + # if key in total: + # total[key] += value + # else: + # total[key] = value + # for diag, time in sorted(total.items(), key=operator.itemgetter(1)): + # Log.info('{0:23} {1:}', diag.__name__, time) + # + # def print_errors(self): + # failed = [job for job in self.running_jobs if job.status == DiagnosticStatus.FAILED] + # if len(failed) == 0: + # return + # self.had_errors = True + # Log.error('Failed jobs') + # Log.error('-----------') + # for job in failed: + # Log.error(str(job)) + # Log.info('') def _run_job(self, job): try: @@ -78,10 +88,8 @@ class WorkManager(object): job.status = DiagnosticStatus.COMPLETED return True except Exception as ex: - job.message = str(ex) Log.error('Job {0} failed: {1}', job, ex) - self.semaphore.release() count = 0 failed_jobs = list() -- GitLab From 36aaa693484b860afca12f044c87306899dc4411 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Fri, 24 Mar 2017 10:56:05 +0100 Subject: [PATCH 04/82] Request and submit now working --- earthdiagnostics/cmormanager.py | 2 +- earthdiagnostics/datafile.py | 106 +++++++++++++++++------- earthdiagnostics/datamanager.py | 4 +- earthdiagnostics/diagnostic.py | 38 ++++++++- earthdiagnostics/general/monthlymean.py | 4 +- earthdiagnostics/publisher.py | 15 ++++ earthdiagnostics/work_manager.py | 32 +++++-- 7 files changed, 156 insertions(+), 45 deletions(-) create mode 100644 earthdiagnostics/publisher.py diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index 60ac6e24..88e0a874 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -7,7 +7,7 @@ from bscearth.utils.log import Log from bscearth.utils.date import parse_date, chunk_start_date, chunk_end_date, previous_day from earthdiagnostics.cmorizer import Cmorizer -from earthdiagnostics.datamanager import DataManager, NetCDFFile +from earthdiagnostics.datamanager import DataManager from earthdiagnostics.frequency import Frequencies, Frequency from earthdiagnostics.modelingrealm import ModelingRealms from earthdiagnostics.utils import TempFile, Utils diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index 1e7bd384..95b52da0 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -8,7 +8,7 @@ import os from bscearth.utils.log import Log from earthdiagnostics.utils import Utils, TempFile -from earthdiagnostics.variable import VariableManager +from publisher import Publisher from earthdiagnostics.modelingrealm import ModelingRealms @@ -27,38 +27,65 @@ class StorageStatus(object): FAILED = 3 -class DataFile(object): +class DataFile(Publisher): - def __init__(self, domain, var, frequency, data_convention): - self.local_path = None - self.upload_path = None - self.download_path = None - self.local_status = LocalStatus.PENDING - self.message = None - self.domain = domain - self.var = var + def __init__(self): + super(DataFile, self).__init__() + self.remote_file = None + self.local_file = None + self.domain = None + self.var = None + self.cmor_var = None self.region = None - self.frequency = frequency - self.data_convention = data_convention + self.frequency = None + self.data_convention = None + self._local_status = LocalStatus.PENDING + self._storage_status = StorageStatus.READY - self.cmor_var = VariableManager().get_variable(var, True) + @property + def local_status(self): + return self._local_status + + @local_status.setter + def local_status(self, value): + self._local_status = value + self.dispatch(self, value) + + @classmethod + def from_storage(cls, filepath): + file_object = cls() + file_object.remote_file = filepath + file_object.local_status = LocalStatus.PENDING + return file_object + + @classmethod + def to_storage(cls, domain, var, cmor_var, data_convention, region): + new_object = cls() + new_object.domain = domain + new_object.var = var + new_object.cmor_var = cmor_var + new_object.region = region + new_object.frequency = None + new_object.data_convention = data_convention + self.storage_status = StorageStatus.PENDING + return new_object def download(self): raise NotImplementedError() def upload(self): self.storage_status = StorageStatus.UPLOADING - Utils.convert2netcdf4(self.local_path) + Utils.convert2netcdf4(self.local_file) self._correct_metadata() self._prepare_region() self._rename_coordinate_variables() - Utils.move_file(self.local_path, self.upload_path) + Utils.move_file(self.local_file, self.remote_file) self.storage_status = StorageStatus.READY def _correct_metadata(self): if not self.cmor_var: return - handler = Utils.openCdf(self.local_path) + handler = Utils.openCdf(self.local_file) var_handler = handler.variables[self.var] self._fix_variable_name(var_handler) handler.modeling_realm = self.cmor_var.domain.name @@ -86,7 +113,7 @@ class DataFile(object): valid_max = '-a valid_max,{0},o,{1},"{2}" '.format(self.var, var_type.char, self.cmor_var.valid_max) else: valid_max = '' - Utils.nco.ncatted(input=self.local_path, output=self.local_path, + Utils.nco.ncatted(input=self.local_file, output=self.local_file, options='-O -a _FillValue,{0},o,{1},"1.e20" ' '-a missingValue,{0},o,{1},"1.e20" {2}{3}'.format(self.var, var_type.char, valid_min, valid_max)) @@ -134,19 +161,19 @@ class DataFile(object): def _prepare_region(self): if not self.region: return - if not os.path.exists(self.upload_path): + if not os.path.exists(self.remote_file): self._add_region_dimension_to_var() else: self._update_var_with_region_data() self._correct_metadata() - Utils.nco.ncks(input=self.local_path, output=self.local_path, options='-O --fix_rec_dmn region') + Utils.nco.ncks(input=self.local_file, output=self.local_file, options='-O --fix_rec_dmn region') def _update_var_with_region_data(self): temp = TempFile.get() - shutil.copyfile(self.upload_path, temp) + shutil.copyfile(self.remote_file, temp) Utils.nco.ncks(input=temp, output=temp, options='-O --mk_rec_dmn region') handler = Utils.openCdf(temp) - handler_send = Utils.openCdf(self.local_path) + handler_send = Utils.openCdf(self.local_file) value = handler_send.variables[self.var][:] var_region = handler.variables['region'] basin_index = np.where(var_region[:] == self.region) @@ -159,10 +186,10 @@ class DataFile(object): handler.variables[self.var][..., basin_index] = value handler.close() handler_send.close() - Utils.move_file(temp, self.local_path) + Utils.move_file(temp, self.local_file) def _add_region_dimension_to_var(self): - handler = Utils.openCdf(self.local_path) + handler = Utils.openCdf(self.local_file) handler.createDimension('region') var_region = handler.createVariable('region', str, 'region') var_region[0] = self.region @@ -173,8 +200,8 @@ class DataFile(object): value = original_var[:] new_var[..., 0] = value handler.close() - Utils.nco.ncks(input=self.local_path, output=self.local_path, options='-O -x -v {0}'.format(self.var)) - Utils.rename_variable(self.local_path, 'new_var', self.var) + Utils.nco.ncks(input=self.local_file, output=self.local_file, options='-O -x -v {0}'.format(self.var)) + Utils.rename_variable(self.local_file, 'new_var', self.var) def _rename_coordinate_variables(self): variables = dict() @@ -186,7 +213,7 @@ class DataFile(object): variables['nav_lon_grid_U'] = 'lon' variables['nav_lat_grid_T'] = 'lat' variables['nav_lon_grid_T'] = 'lon' - Utils.rename_variables(self.local_path, variables, False, True) + Utils.rename_variables(self.local_file, variables, False, True) def add_diagnostic_history(self, diagnostic): from earthdiagnostics.earthdiags import EarthDiags @@ -203,7 +230,7 @@ class DataFile(object): utc_datetime = 'UTC ' + datetime.utcnow().isoformat() history_line = '{0}: {1};'.format(utc_datetime, history_line) - handler = Utils.openCdf(self.local_path) + handler = Utils.openCdf(self.local_file) try: history_line = history_line + handler.history except AttributeError: @@ -311,11 +338,28 @@ class THREDDSData(DataFile): def download(self): self.local_path = TempFile.get() - Utils.execute_shell_command(['nccopy', '-s', '-d', '-4', self.download_path, self.local_path]) + Utils.execute_shell_command(['nccopy', '-s', '-d', '-4', self.remote_file, self.local_path]) if not Utils.check_netcdf_file(self.local_path): - self.message = 'Can not retrieve {0} from server'.format(self.download_path) - self.status = DataStatus.UNAVAILABLE + self.message = 'Can not retrieve {0} from server'.format(self.remote_file) + self.storage_status = LocalStatus.FAILED return - self.status = DataStatus.ON_LOCAL + self.status = LocalStatus.READY + + +class NetCDFFile(DataFile): + + def download(self): + try: + self.local_status = LocalStatus.DOWNLOADING + if not self.local_file: + self.local_file = TempFile.get() + Utils.copy_file(self.remote_file, self.local_file) + Log.info('File {0} ready!', self.remote_file) + self.local_status = LocalStatus.READY + + except Exception as ex: + os.remove(self.local_file) + Log.error('File {0} not available: {1}', self.remote_file, ex) + self.local_status = LocalStatus.FAILED diff --git a/earthdiagnostics/datamanager.py b/earthdiagnostics/datamanager.py index 2945527b..fb83ee4a 100644 --- a/earthdiagnostics/datamanager.py +++ b/earthdiagnostics/datamanager.py @@ -13,6 +13,7 @@ from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.variable import VariableManager from earthdiagnostics.variable_type import VariableType from earthdiagnostics.modelingrealm import ModelingRealms +from earthdiagnostics.datafile import NetCDFFile as NCfile class DataManager(object): @@ -62,7 +63,7 @@ class DataManager(object): def _get_file_from_storage(self, filepath): if filepath not in self.requested_files: - self.requested_files[filepath] = NetCDFFile.from_storage(filepath) + self.requested_files[filepath] = NCfile.from_storage(filepath) return self.requested_files[filepath] def get_file(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, @@ -305,6 +306,7 @@ class NetCDFFile(object): if not self.local_file: self.local_file = TempFile.get() Utils.copy_file(self.remote_file, self.local_file) + Log.info('File {0} ready!', self.remote_file) except Exception as ex: os.remove(self.local_file) Log.error('File {0} not available: {1}', self.remote_file, ex) diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index d40c500b..e8322f33 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -1,9 +1,11 @@ # coding=utf-8 + from earthdiagnostics.constants import Basins, Basin from earthdiagnostics.frequency import Frequency from earthdiagnostics.variable_type import VariableType from earthdiagnostics.modelingrealm import ModelingRealms from earthdiagnostics.variable import VariableManager +from publisher import Publisher class DiagnosticStatus(object): @@ -14,7 +16,7 @@ class DiagnosticStatus(object): FAILED = 4 -class Diagnostic(object): +class Diagnostic(Publisher): """ Base class for the diagnostics. Provides a common interface for them and also has a mechanism that allows diagnostic retrieval by name. @@ -30,13 +32,23 @@ class Diagnostic(object): _diag_list = dict() def __init__(self, data_manager): + super(Diagnostic, self).__init__() self.data_manager = data_manager - self.can_run_multiple_instances = True - self.status = DiagnosticStatus.WAITING + self._status = DiagnosticStatus.WAITING + self._requests = [] def __repr__(self): return str(self) + @property + def status(self): + return self._status + + @status.setter + def status(self, value): + self._status = value + self.dispatch(self, value) + @staticmethod def register(cls): """ @@ -189,6 +201,26 @@ class Diagnostic(object): """ return 'Developer must override base class __str__ method' + def request_chunk(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None): + request = self.data_manager.request_chunk(domain, var, startdate, member, chunk, grid, box, frequency) + self._requests.append(request) + request.subscribe(self, self._updated_request) + return request + + def _updated_request(self, request, new_status): + from datafile import LocalStatus + if new_status == LocalStatus.FAILED: + self.reason = 'Required file {0} is not available'.format(request.remote_file) + self.status = DiagnosticStatus.FAILED + return + + if new_status == LocalStatus.READY: + if all([request.local_status == LocalStatus.READY for request in self._requests]): + self.status = DiagnosticStatus.READY + return + + + class DiagnosticOption(object): diff --git a/earthdiagnostics/general/monthlymean.py b/earthdiagnostics/general/monthlymean.py index 71b5fa38..493475dd 100644 --- a/earthdiagnostics/general/monthlymean.py +++ b/earthdiagnostics/general/monthlymean.py @@ -81,8 +81,8 @@ class MonthlyMean(Diagnostic): return job_list def request_data(self): - self.variable_file = self.data_manager.request_chunk(self.domain, self.variable, self.startdate, self.member, - self.chunk, frequency=self.frequency, grid=self.grid) + self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + frequency=self.frequency, grid=self.grid) def declare_data_generated(self): self.monmean = self.declare_data(self.domain, self.variable, self.startdate, self.member, self.chunk, diff --git a/earthdiagnostics/publisher.py b/earthdiagnostics/publisher.py new file mode 100644 index 00000000..b0ed0c88 --- /dev/null +++ b/earthdiagnostics/publisher.py @@ -0,0 +1,15 @@ +class Publisher(object): + def __init__(self): + self.subscribers = dict() + + def subscribe(self, who, callback=None): + if callback is None: + callback = getattr(who, 'update') + self.subscribers[who] = callback + + def unsubscribe(self, who): + del self.subscribers[who] + + def dispatch(self, *args): + for subscriber, callback in self.subscribers.items(): + callback(*args) \ No newline at end of file diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index 9af5807a..49d229a9 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -1,19 +1,18 @@ # coding=utf-8 import datetime -import operator - from bscearth.utils.log import Log +from concurrent.futures import ThreadPoolExecutor from diagnostic import DiagnosticStatus from utils import Utils, TempFile -from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor +import threading class WorkManager(object): def __init__(self, config, job_list, data_manager): - self.pending_jobs = job_list + self.jobs = job_list self.config = config self.time = {} self.had_errors = False @@ -28,8 +27,9 @@ class WorkManager(object): self.threads = self.config.max_cores Log.info('Using {0} threads', self.threads) - for job in self.pending_jobs: + for job in self.jobs: job.request_data() + job.subscribe(self, self._job_status_changed) self.downloader = ThreadPoolExecutor(1) self.uploader = ThreadPoolExecutor(1) @@ -38,8 +38,13 @@ class WorkManager(object): for file_object in self.data_manager.requested_files.values(): self.downloader.submit(file_object.download) - for job in self.pending_jobs: - self.executor.submit(self._run_job, job) + self.lock = threading.Lock() + self.lock.acquire() + self.lock.acquire() + + self.downloader.shutdown(True) + self.uploader.shutdown(True) + self.executor.shutdown(True) TempFile.clean() finish_time = datetime.datetime.now() @@ -49,6 +54,16 @@ class WorkManager(object): # self.print_stats() return not self.had_errors + def _job_status_changed(self, job, status): + if status == DiagnosticStatus.READY: + self.executor.submit(self._run_job, job) + self.check_completion() + + def check_completion(self): + if any([job.status not in (DiagnosticStatus.COMPLETED, DiagnosticStatus.FAILED) for job in self.jobs]): + return + self.lock.release() + # def print_stats(self): # Log.info('Time consumed by each diagnostic class') # Log.info('--------------------------------------') @@ -90,6 +105,9 @@ class WorkManager(object): except Exception as ex: job.message = str(ex) Log.error('Job {0} failed: {1}', job, ex) + job.status = DiagnosticStatus.FAILED count = 0 failed_jobs = list() + + -- GitLab From f21044a063e3834b0cfbdf0c04c8b4d83d9be8c2 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Fri, 24 Mar 2017 11:29:47 +0100 Subject: [PATCH 05/82] Added automatic removal when file is no longer required --- earthdiagnostics/datafile.py | 8 ++- earthdiagnostics/diagnostic.py | 6 +- earthdiagnostics/general/monthlymean.py | 10 ++-- earthdiagnostics/work_manager.py | 74 +++++++++++++------------ 4 files changed, 56 insertions(+), 42 deletions(-) diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index 95b52da0..5a31af9e 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -42,6 +42,12 @@ class DataFile(Publisher): self._local_status = LocalStatus.PENDING self._storage_status = StorageStatus.READY + def unsubscribe(self, who): + super(DataFile, self).unsubscribe(who) + if self.local_status == LocalStatus.READY and len(self.subscribers) == 0: + os.remove(self.local_file) + + @property def local_status(self): return self._local_status @@ -67,7 +73,7 @@ class DataFile(Publisher): new_object.region = region new_object.frequency = None new_object.data_convention = data_convention - self.storage_status = StorageStatus.PENDING + new_object.storage_status = StorageStatus.PENDING return new_object def download(self): diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index e8322f33..6c182c49 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -47,6 +47,8 @@ class Diagnostic(Publisher): @status.setter def status(self, value): self._status = value + if self.status in (DiagnosticStatus.FAILED, DiagnosticStatus.COMPLETED): + self._unsuscribe_requests() self.dispatch(self, value) @staticmethod @@ -219,7 +221,9 @@ class Diagnostic(Publisher): self.status = DiagnosticStatus.READY return - + def _unsuscribe_requests(self): + for request in self._requests: + request.unsubscribe(self) class DiagnosticOption(object): diff --git a/earthdiagnostics/general/monthlymean.py b/earthdiagnostics/general/monthlymean.py index 493475dd..6882f697 100644 --- a/earthdiagnostics/general/monthlymean.py +++ b/earthdiagnostics/general/monthlymean.py @@ -93,17 +93,19 @@ class MonthlyMean(Diagnostic): Runs the diagnostic """ handler = Utils.openCdf(self.variable_file.local_file) + temp = TempFile.get() if 'region' in handler.variables: noregion = TempFile.get() Utils.nco.ncks(input=self.variable_file.local_file, output=noregion, options='-O -C -x -v region') - Utils.cdo.monmean(input=noregion, output=self.monmean.local_file) - monmean_handler = Utils.openCdf(self.monmean.local_file) + Utils.cdo.monmean(input=noregion, output=temp) + os.remove(noregion) + monmean_handler = Utils.openCdf(temp) Utils.copy_variable(handler, monmean_handler, 'region') monmean_handler.close() else: - Utils.cdo.monmean(input=self.variable_file.local_file, output=self.monmean.local_file) + Utils.cdo.monmean(input=self.variable_file.local_file, output=temp) handler.close() - self.monmean.ready() + self.monmean.set_local_file(temp) diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index 49d229a9..8aa9c07c 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -1,6 +1,7 @@ # coding=utf-8 import datetime +import operator from bscearth.utils.log import Log from concurrent.futures import ThreadPoolExecutor @@ -50,8 +51,8 @@ class WorkManager(object): finish_time = datetime.datetime.now() Log.result("Diagnostics finished at {0}", finish_time) Log.result("Elapsed time: {0}\n", finish_time - time) - # self.print_errors() - # self.print_stats() + self.print_errors() + self.print_stats() return not self.had_errors def _job_status_changed(self, job, status): @@ -64,49 +65,50 @@ class WorkManager(object): return self.lock.release() - # def print_stats(self): - # Log.info('Time consumed by each diagnostic class') - # Log.info('--------------------------------------') - # total = dict() - # for runner in self.job_runners: - # for key, value in runner.time.items(): - # if key in total: - # total[key] += value - # else: - # total[key] = value - # for diag, time in sorted(total.items(), key=operator.itemgetter(1)): - # Log.info('{0:23} {1:}', diag.__name__, time) - # - # def print_errors(self): - # failed = [job for job in self.running_jobs if job.status == DiagnosticStatus.FAILED] - # if len(failed) == 0: - # return - # self.had_errors = True - # Log.error('Failed jobs') - # Log.error('-----------') - # for job in failed: - # Log.error(str(job)) - # Log.info('') + def print_stats(self): + Log.info('Time consumed by each diagnostic class') + Log.info('--------------------------------------') + + times = {} + for job in self.jobs: + job_type = job.alias + if job_type in times.keys(): + times[job_type] += job.consumed_time + else: + times[job_type] = job.consumed_time + + for diag in sorted(times, key=operator.itemgetter(1)): + Log.info('{0:23} {1:}', diag, times[diag]) + + def print_errors(self): + failed = [job for job in self.jobs if job.status == DiagnosticStatus.FAILED] + if len(failed) == 0: + return + self.had_errors = True + Log.error('Failed jobs') + Log.error('-----------') + for job in failed: + Log.error('{0}: {0.message}', job) + Log.error('Total wasted time: {0}', sum([job.consumed_time for job in failed], datetime.timedelta())) + Log.info('') def _run_job(self, job): + Log.info('Starting {0}', job) + job.status = DiagnosticStatus.RUNNING + time = datetime.datetime.now() try: - Log.info('Starting {0}', job) - job.status = DiagnosticStatus.RUNNING - time = datetime.datetime.now() job.compute() - time = datetime.datetime.now() - time - if type(job) in self.time: - self.time[type(job)] += time - else: - self.time[type(job)] = time - Log.result('Finished {0}', job) - job.status = DiagnosticStatus.COMPLETED - return True except Exception as ex: + job.consumed_time = datetime.datetime.now() - time job.message = str(ex) Log.error('Job {0} failed: {1}', job, ex) job.status = DiagnosticStatus.FAILED + return False + job.consumed_time = datetime.datetime.now() - time + Log.result('Finished {0}', job) + job.status = DiagnosticStatus.COMPLETED + return True count = 0 failed_jobs = list() -- GitLab From ae84ffa913c3615f1b528672f44f462be7782d15 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Fri, 24 Mar 2017 12:30:23 +0100 Subject: [PATCH 06/82] Added checks to avoid interlocking --- earthdiagnostics/cmormanager.py | 35 +++++++++++++++++++++++++ earthdiagnostics/datafile.py | 23 +++++++++++++--- earthdiagnostics/datamanager.py | 13 ++++++++- earthdiagnostics/diagnostic.py | 16 ++++++----- earthdiagnostics/general/monthlymean.py | 4 +-- earthdiagnostics/work_manager.py | 19 +++++++++++--- 6 files changed, 94 insertions(+), 16 deletions(-) diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index 88e0a874..97b80858 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -121,6 +121,41 @@ class CMORManager(DataManager): return self._get_file_from_storage(filepath) + def declare_chunk(self, domain, var, startdate, member, chunk, grid=None, region=None, box=None, frequency=None, + vartype=VariableType.MEAN, diagnostic=None): + """ + Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + + :param domain: CMOR domain + :type domain: Domain + :param var: variable name + :type var: str + :param startdate: file's startdate + :type startdate: str + :param member: file's member + :type member: int + :param chunk: file's chunk + :type chunk: int + :param grid: file's grid (only needed if it is not the original) + :type grid: str|NoneType + :param box: file's box (only needed to retrieve sections or averages) + :type box: Box + :param frequency: file's frequency (only needed if it is different from the default) + :type frequency: Frequency|NoneType + :param vartype: Variable type (mean, statistic) + :type vartype: VariableType + :return: path to the copy created on the scratch folder + :rtype: str + """ + cmor_var = self.variable_list.get_variable(var) + final_name = self._get_final_var_name(box, var) + + filepath = self.get_file_path(startdate, member, domain, final_name, cmor_var, chunk, frequency, grid) + netcdf_file = self._declare_generated_file(filepath, domain, final_name, cmor_var, self.config.data_convention, + region, diagnostic, vartype) + netcdf_file.frequency = frequency + return netcdf_file + def get_file_path(self, startdate, member, domain, var, cmor_var, chunk, frequency, grid=None, year=None, date_str=None): """ diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index 5a31af9e..1a37711f 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -39,6 +39,7 @@ class DataFile(Publisher): self.region = None self.frequency = None self.data_convention = None + self.diagnostic = None self._local_status = LocalStatus.PENDING self._storage_status = StorageStatus.READY @@ -57,6 +58,14 @@ class DataFile(Publisher): self._local_status = value self.dispatch(self, value) + @property + def storage_status(self): + return self._storage_status + + @storage_status.setter + def storage_status(self, value): + self._storage_status = value + @classmethod def from_storage(cls, filepath): file_object = cls() @@ -65,7 +74,7 @@ class DataFile(Publisher): return file_object @classmethod - def to_storage(cls, domain, var, cmor_var, data_convention, region): + def to_storage(cls, remote_file, domain, var, cmor_var, data_convention, region): new_object = cls() new_object.domain = domain new_object.var = var @@ -73,6 +82,7 @@ class DataFile(Publisher): new_object.region = region new_object.frequency = None new_object.data_convention = data_convention + new_object.remote_file = remote_file new_object.storage_status = StorageStatus.PENDING return new_object @@ -85,9 +95,14 @@ class DataFile(Publisher): self._correct_metadata() self._prepare_region() self._rename_coordinate_variables() + self.add_diagnostic_history() Utils.move_file(self.local_file, self.remote_file) self.storage_status = StorageStatus.READY + def set_local_file(self, local_file): + self.local_file = local_file + self.local_status = LocalStatus.READY + def _correct_metadata(self): if not self.cmor_var: return @@ -221,10 +236,12 @@ class DataFile(Publisher): variables['nav_lon_grid_T'] = 'lon' Utils.rename_variables(self.local_file, variables, False, True) - def add_diagnostic_history(self, diagnostic): + def add_diagnostic_history(self): + if not self.diagnostic: + return from earthdiagnostics.earthdiags import EarthDiags history_line = 'Diagnostic {1} calculated with EarthDiagnostics version {0}'.format(EarthDiags.version, - diagnostic) + self.diagnostic) self._add_history_line(history_line) def add_cmorization_history(self): diff --git a/earthdiagnostics/datamanager.py b/earthdiagnostics/datamanager.py index fb83ee4a..e33bf1b3 100644 --- a/earthdiagnostics/datamanager.py +++ b/earthdiagnostics/datamanager.py @@ -13,7 +13,7 @@ from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.variable import VariableManager from earthdiagnostics.variable_type import VariableType from earthdiagnostics.modelingrealm import ModelingRealms -from earthdiagnostics.datafile import NetCDFFile as NCfile +from earthdiagnostics.datafile import NetCDFFile as NCfile, StorageStatus class DataManager(object): @@ -66,6 +66,17 @@ class DataManager(object): self.requested_files[filepath] = NCfile.from_storage(filepath) return self.requested_files[filepath] + def _declare_generated_file(self, remote_file, domain, var, cmor_var, data_convention, + region, diagnostic, var_type): + if remote_file not in self.requested_files: + self.requested_files[remote_file] = NCfile.to_storage(remote_file, domain, var, cmor_var, data_convention, + region) + file_object = self.requested_files[remote_file] + file_object.diagnostic = diagnostic + file_object.var_type = var_type + file_object.storage_status = StorageStatus.PENDING + return file_object + def get_file(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=VariableType.MEAN): """ diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index 6c182c49..3dadbf23 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -1,5 +1,7 @@ # coding=utf-8 +import datetime +from datafile import StorageStatus from earthdiagnostics.constants import Basins, Basin from earthdiagnostics.frequency import Frequency from earthdiagnostics.variable_type import VariableType @@ -36,6 +38,7 @@ class Diagnostic(Publisher): self.data_manager = data_manager self._status = DiagnosticStatus.WAITING self._requests = [] + self.consumed_time = datetime.timedelta() def __repr__(self): return str(self) @@ -134,9 +137,8 @@ class Diagnostic(Publisher): """ pass - def declare_data(self, filetosend, domain, var, startdate, member, chunk=None, grid=None, region=None, - box=None, rename_var=None, frequency=None, year=None, date_str=None, move_old=False, - vartype=VariableType.MEAN): + def declare_chunk(self, domain, var, startdate, member, chunk, grid=None, region=None, box=None, frequency=None, + vartype=VariableType.MEAN): """ :param filetosend: @@ -161,9 +163,8 @@ class Diagnostic(Publisher): """ if isinstance(region, Basin): region = region.fullname - self.data_manager.send_file(filetosend, domain, var, startdate, member, chunk, grid, region, - box, rename_var, frequency, year, date_str, move_old, diagnostic=self, - vartype=vartype) + return self.data_manager.declare_chunk(domain, var, startdate, member, chunk, grid, region, box, + diagnostic=self, vartype=vartype, frequency=frequency) @classmethod def generate_jobs(cls, diags, options): @@ -225,6 +226,9 @@ class Diagnostic(Publisher): for request in self._requests: request.unsubscribe(self) + def all_requests_in_storage(self): + return not any(request.storage_status != StorageStatus.READY for request in self._requests) + class DiagnosticOption(object): diff --git a/earthdiagnostics/general/monthlymean.py b/earthdiagnostics/general/monthlymean.py index 6882f697..0aea6c96 100644 --- a/earthdiagnostics/general/monthlymean.py +++ b/earthdiagnostics/general/monthlymean.py @@ -85,8 +85,8 @@ class MonthlyMean(Diagnostic): frequency=self.frequency, grid=self.grid) def declare_data_generated(self): - self.monmean = self.declare_data(self.domain, self.variable, self.startdate, self.member, self.chunk, - frequency=Frequencies.monthly, grid=self.grid) + self.monmean = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + frequency=Frequencies.monthly, grid=self.grid) def compute(self): """ diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index 8aa9c07c..69f63a5d 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -5,6 +5,7 @@ import operator from bscearth.utils.log import Log from concurrent.futures import ThreadPoolExecutor +from datafile import StorageStatus from diagnostic import DiagnosticStatus from utils import Utils, TempFile import threading @@ -19,6 +20,7 @@ class WorkManager(object): self.had_errors = False self.data_manager = data_manager + def run(self): time = datetime.datetime.now() @@ -30,6 +32,7 @@ class WorkManager(object): for job in self.jobs: job.request_data() + job.declare_data_generated() job.subscribe(self, self._job_status_changed) self.downloader = ThreadPoolExecutor(1) @@ -37,11 +40,13 @@ class WorkManager(object): self.executor = ThreadPoolExecutor(self.threads) for file_object in self.data_manager.requested_files.values(): - self.downloader.submit(file_object.download) + if file_object.storage_status == StorageStatus.READY: + self.downloader.submit(file_object.download) self.lock = threading.Lock() self.lock.acquire() - self.lock.acquire() + if not self.check_completion(): + self.lock.acquire() self.downloader.shutdown(True) self.uploader.shutdown(True) @@ -61,9 +66,15 @@ class WorkManager(object): self.check_completion() def check_completion(self): - if any([job.status not in (DiagnosticStatus.COMPLETED, DiagnosticStatus.FAILED) for job in self.jobs]): - return + for job in self.jobs: + if job.status in (DiagnosticStatus.READY, DiagnosticStatus.RUNNING): + return False + if job.status == DiagnosticStatus.WAITING: + if job.all_requests_in_storage(): + return False + self.lock.release() + return True def print_stats(self): Log.info('Time consumed by each diagnostic class') -- GitLab From b4979c2dfcf65dc9d640e821155f02980c0aebf9 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Fri, 24 Mar 2017 13:04:46 +0100 Subject: [PATCH 07/82] First version of complete workmanager is working --- earthdiagnostics/datafile.py | 15 +++++++++---- earthdiagnostics/datamanager.py | 4 +++- earthdiagnostics/diagnostic.py | 6 +++--- earthdiagnostics/general/monthlymean.py | 2 +- earthdiagnostics/work_manager.py | 28 +++++++++++++++++++------ 5 files changed, 40 insertions(+), 15 deletions(-) diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index 1a37711f..2e6e966e 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -17,7 +17,7 @@ class LocalStatus(object): DOWNLOADING = 1 READY = 2 FAILED = 3 - TO_COMPUTE = 4 + NOT_REQUESTED = 4 class StorageStatus(object): @@ -40,14 +40,19 @@ class DataFile(Publisher): self.frequency = None self.data_convention = None self.diagnostic = None - self._local_status = LocalStatus.PENDING + self._local_status = LocalStatus.NOT_REQUESTED self._storage_status = StorageStatus.READY def unsubscribe(self, who): super(DataFile, self).unsubscribe(who) - if self.local_status == LocalStatus.READY and len(self.subscribers) == 0: + if self.local_status == LocalStatus.READY and not any(self.subscribers) == 0: os.remove(self.local_file) + def upload_required(self): + return self.local_status == LocalStatus.READY and self.storage_status == StorageStatus.PENDING + + def download_required(self): + return self.local_status == LocalStatus.PENDING and self.storage_status == StorageStatus.READY @property def local_status(self): @@ -56,7 +61,7 @@ class DataFile(Publisher): @local_status.setter def local_status(self, value): self._local_status = value - self.dispatch(self, value) + self.dispatch(self) @property def storage_status(self): @@ -65,6 +70,7 @@ class DataFile(Publisher): @storage_status.setter def storage_status(self, value): self._storage_status = value + self.dispatch(self) @classmethod def from_storage(cls, filepath): @@ -97,6 +103,7 @@ class DataFile(Publisher): self._rename_coordinate_variables() self.add_diagnostic_history() Utils.move_file(self.local_file, self.remote_file) + Log.info('File {0} uploaded!', self.remote_file) self.storage_status = StorageStatus.READY def set_local_file(self, local_file): diff --git a/earthdiagnostics/datamanager.py b/earthdiagnostics/datamanager.py index e33bf1b3..2e1ada26 100644 --- a/earthdiagnostics/datamanager.py +++ b/earthdiagnostics/datamanager.py @@ -13,7 +13,7 @@ from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.variable import VariableManager from earthdiagnostics.variable_type import VariableType from earthdiagnostics.modelingrealm import ModelingRealms -from earthdiagnostics.datafile import NetCDFFile as NCfile, StorageStatus +from earthdiagnostics.datafile import NetCDFFile as NCfile, StorageStatus, LocalStatus class DataManager(object): @@ -64,6 +64,8 @@ class DataManager(object): def _get_file_from_storage(self, filepath): if filepath not in self.requested_files: self.requested_files[filepath] = NCfile.from_storage(filepath) + file_object = self.requested_files[filepath] + file_object.local_satatus = LocalStatus.PENDING return self.requested_files[filepath] def _declare_generated_file(self, remote_file, domain, var, cmor_var, data_convention, diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index 3dadbf23..9a6ca4ef 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -210,14 +210,14 @@ class Diagnostic(Publisher): request.subscribe(self, self._updated_request) return request - def _updated_request(self, request, new_status): + def _updated_request(self, request): from datafile import LocalStatus - if new_status == LocalStatus.FAILED: + if request.local_status == LocalStatus.FAILED: self.reason = 'Required file {0} is not available'.format(request.remote_file) self.status = DiagnosticStatus.FAILED return - if new_status == LocalStatus.READY: + if request.local_status == LocalStatus.READY: if all([request.local_status == LocalStatus.READY for request in self._requests]): self.status = DiagnosticStatus.READY return diff --git a/earthdiagnostics/general/monthlymean.py b/earthdiagnostics/general/monthlymean.py index 0aea6c96..c77b642c 100644 --- a/earthdiagnostics/general/monthlymean.py +++ b/earthdiagnostics/general/monthlymean.py @@ -86,7 +86,7 @@ class MonthlyMean(Diagnostic): def declare_data_generated(self): self.monmean = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, - frequency=Frequencies.monthly, grid=self.grid) + frequency=Frequencies.yearly, grid=self.grid) def compute(self): """ diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index 69f63a5d..8af7fb1e 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -22,7 +22,6 @@ class WorkManager(object): def run(self): - time = datetime.datetime.now() Log.info("Starting to compute at {0}", time) self.threads = Utils.available_cpu_count() @@ -40,17 +39,19 @@ class WorkManager(object): self.executor = ThreadPoolExecutor(self.threads) for file_object in self.data_manager.requested_files.values(): - if file_object.storage_status == StorageStatus.READY: + file_object.subscribe(self, self._file_object_status_changed) + if file_object.download_required(): self.downloader.submit(file_object.download) self.lock = threading.Lock() self.lock.acquire() - if not self.check_completion(): - self.lock.acquire() - self.downloader.shutdown(True) + self.check_completion() + self.lock.acquire() + + self.downloader.shutdown() + self.executor.shutdown() self.uploader.shutdown(True) - self.executor.shutdown(True) TempFile.clean() finish_time = datetime.datetime.now() @@ -65,6 +66,15 @@ class WorkManager(object): self.executor.submit(self._run_job, job) self.check_completion() + def _file_object_status_changed(self, file_object): + if file_object.download_required(): + self.downloader.submit(file_object.download) + return + if file_object.upload_required(): + self.uploader.submit(file_object.upload) + return + self.check_completion() + def check_completion(self): for job in self.jobs: if job.status in (DiagnosticStatus.READY, DiagnosticStatus.RUNNING): @@ -73,6 +83,12 @@ class WorkManager(object): if job.all_requests_in_storage(): return False + for request in self.data_manager.requested_files.values(): + if request.upload_required(): + return + if request.download_required(): + return + self.lock.release() return True -- GitLab From 74e27f5ad89f07b7678ed3dd90f1faa9923ab38e Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Fri, 24 Mar 2017 14:52:51 +0100 Subject: [PATCH 08/82] All time mean diagnostics adapted to new strategy --- diags.conf | 2 +- earthdiagnostics/general/dailymean.py | 28 ++++++++++++++----------- earthdiagnostics/general/monthlymean.py | 2 +- earthdiagnostics/general/yearlymean.py | 28 +++++++++++++++---------- earthdiagnostics/work_manager.py | 1 + 5 files changed, 36 insertions(+), 25 deletions(-) diff --git a/diags.conf b/diags.conf index df2f238a..42bb4f3c 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = monmean,tos,ocean,mon +DIAGS = yearmean,tos,ocean,mon # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. diff --git a/earthdiagnostics/general/dailymean.py b/earthdiagnostics/general/dailymean.py index 983609e1..984c6862 100644 --- a/earthdiagnostics/general/dailymean.py +++ b/earthdiagnostics/general/dailymean.py @@ -81,25 +81,29 @@ class DailyMean(Diagnostic): options['domain'], options['variable'], options['frequency'], options['grid'])) return job_list + def request_data(self): + self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + frequency=self.frequency, grid=self.grid) + + def declare_data_generated(self): + self.daymean = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + frequency=Frequencies.daily, grid=self.grid) + def compute(self): """ Runs the diagnostic """ - day_mean = TempFile.get() - variable_file = self.data_manager.get_file(self.domain, self.variable, self.startdate, self.member, self.chunk, - frequency=self.frequency, grid=self.grid) - handler = Utils.openCdf(variable_file) + temp = TempFile.get() + handler = Utils.openCdf(self.variable_file.local_file) if 'region' in handler.variables: noregion = TempFile.get() - Utils.nco.ncks(input=variable_file, output=noregion, options='-O -C -x -v region') - Utils.cdo.daymean(input=noregion, output=day_mean) - monmean_handler = Utils.openCdf(day_mean) + Utils.nco.ncks(input=self.variable_file.local_file, output=noregion, options='-O -C -x -v region') + Utils.cdo.daymean(input=noregion, output=temp) + os.remove(noregion) + monmean_handler = Utils.openCdf(temp) Utils.copy_variable(handler, monmean_handler, 'region') monmean_handler.close() else: - Utils.cdo.daymean(input=variable_file, output=day_mean) - os.remove(variable_file) - - self.send_file(day_mean, self.domain, self.variable, self.startdate, self.member, self.chunk, - frequency=Frequencies.daily, grid=self.grid) + Utils.cdo.daymean(input=self.variable_file.local_file, output=temp) + self.daymean.set_local_file(temp) diff --git a/earthdiagnostics/general/monthlymean.py b/earthdiagnostics/general/monthlymean.py index c77b642c..0aea6c96 100644 --- a/earthdiagnostics/general/monthlymean.py +++ b/earthdiagnostics/general/monthlymean.py @@ -86,7 +86,7 @@ class MonthlyMean(Diagnostic): def declare_data_generated(self): self.monmean = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, - frequency=Frequencies.yearly, grid=self.grid) + frequency=Frequencies.monthly, grid=self.grid) def compute(self): """ diff --git a/earthdiagnostics/general/yearlymean.py b/earthdiagnostics/general/yearlymean.py index 99ee87d7..8906e591 100644 --- a/earthdiagnostics/general/yearlymean.py +++ b/earthdiagnostics/general/yearlymean.py @@ -81,25 +81,31 @@ class YearlyMean(Diagnostic): options['domain'], options['variable'], options['frequency'], options['grid'])) return job_list + def request_data(self): + self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + frequency=self.frequency, grid=self.grid) + + def declare_data_generated(self): + self.yearmean = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + frequency=Frequencies.yearly, grid=self.grid) + def compute(self): """ Runs the diagnostic """ - year_mean = TempFile.get() - variable_file = self.data_manager.get_file(self.domain, self.variable, self.startdate, self.member, self.chunk, - frequency=self.frequency, grid=self.grid) - handler = Utils.openCdf(variable_file) + temp = TempFile.get() + + handler = Utils.openCdf(self.variable_file.local_file) if 'region' in handler.variables: noregion = TempFile.get() - Utils.nco.ncks(input=variable_file, output=noregion, options='-O -C -x -v region') - Utils.cdo.yearmean(input=noregion, output=year_mean) - monmean_handler = Utils.openCdf(year_mean) + Utils.nco.ncks(input=self.variable_file.local_file, output=noregion, options='-O -C -x -v region') + Utils.cdo.yearmean(input=noregion, output=temp) + monmean_handler = Utils.openCdf(temp) Utils.copy_variable(handler, monmean_handler, 'region') monmean_handler.close() else: - Utils.cdo.yearmean(input=variable_file, output=year_mean) - os.remove(variable_file) + Utils.cdo.yearmean(input=self.variable_file.local_file, output=temp) + os.remove(self.variable_file.local_file) - self.send_file(year_mean, self.domain, self.variable, self.startdate, self.member, self.chunk, - frequency=Frequencies.yearly, grid=self.grid) + self.yearmean.set_local_file(temp) diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index 8af7fb1e..9d8e9348 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -79,6 +79,7 @@ class WorkManager(object): for job in self.jobs: if job.status in (DiagnosticStatus.READY, DiagnosticStatus.RUNNING): return False + if job.status == DiagnosticStatus.WAITING: if job.all_requests_in_storage(): return False -- GitLab From e42fbd9e40fdb665cf08aba7a199752a15ffd467 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Fri, 24 Mar 2017 17:43:06 +0100 Subject: [PATCH 09/82] Last friday changes --- earthdiagnostics/datafile.py | 6 ++++++ earthdiagnostics/work_manager.py | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index 2e6e966e..00c3c3da 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -110,6 +110,9 @@ class DataFile(Publisher): self.local_file = local_file self.local_status = LocalStatus.READY + def create_link(self): + pass + def _correct_metadata(self): if not self.cmor_var: return @@ -392,4 +395,7 @@ class NetCDFFile(DataFile): Log.error('File {0} not available: {1}', self.remote_file, ex) self.local_status = LocalStatus.FAILED + def create_link(self): + self.data_manager_create_link(self.domain, self.remote_file, self.frequency, self.var, self.grid, move_old, vartype) + diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index 9d8e9348..1cc06625 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -20,7 +20,6 @@ class WorkManager(object): self.had_errors = False self.data_manager = data_manager - def run(self): time = datetime.datetime.now() Log.info("Starting to compute at {0}", time) -- GitLab From e7bcbeabd1331e69c48108ecca1a5afe3b69de92 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 27 Mar 2017 10:49:58 +0200 Subject: [PATCH 10/82] First attempt to allow diagnostic that modify existing files --- diags.conf | 2 +- earthdiagnostics/datafile.py | 16 +++++++++++++++- earthdiagnostics/diagnostic.py | 6 ++++-- earthdiagnostics/general/attribute.py | 15 +++++++++++---- 4 files changed, 31 insertions(+), 8 deletions(-) diff --git a/diags.conf b/diags.conf index 42bb4f3c..d6e3367c 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = yearmean,tos,ocean,mon +DIAGS = att,tos,ocean,test,value yearmean,tos,ocean,mon # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index 00c3c3da..606fe7cd 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -42,6 +42,7 @@ class DataFile(Publisher): self.diagnostic = None self._local_status = LocalStatus.NOT_REQUESTED self._storage_status = StorageStatus.READY + self._modifiers = [] def unsubscribe(self, who): super(DataFile, self).unsubscribe(who) @@ -54,6 +55,16 @@ class DataFile(Publisher): def download_required(self): return self.local_status == LocalStatus.PENDING and self.storage_status == StorageStatus.READY + def add_modifier(self, diagnostic): + self._modifiers.append(diagnostic) + + def ready_to_run(self, diagnostic): + if not self.local_status == LocalStatus.READY: + return False + if len(self._modifiers) == 0: + return True + return self._modifiers[0] is diagnostic + @property def local_status(self): return self._local_status @@ -106,10 +117,13 @@ class DataFile(Publisher): Log.info('File {0} uploaded!', self.remote_file) self.storage_status = StorageStatus.READY - def set_local_file(self, local_file): + def set_local_file(self, local_file, diagnostic=None): + if diagnostic in self._modifiers: + self._modifiers.remove(diagnostic) self.local_file = local_file self.local_status = LocalStatus.READY + def create_link(self): pass diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index 9a6ca4ef..5dfe4d60 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -204,8 +204,10 @@ class Diagnostic(Publisher): """ return 'Developer must override base class __str__ method' - def request_chunk(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None): + def request_chunk(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, to_modify=False): request = self.data_manager.request_chunk(domain, var, startdate, member, chunk, grid, box, frequency) + if to_modify: + request.add_modifier(self) self._requests.append(request) request.subscribe(self, self._updated_request) return request @@ -218,7 +220,7 @@ class Diagnostic(Publisher): return if request.local_status == LocalStatus.READY: - if all([request.local_status == LocalStatus.READY for request in self._requests]): + if all([request.ready_to_run(self) for request in self._requests]): self.status = DiagnosticStatus.READY return diff --git a/earthdiagnostics/general/attribute.py b/earthdiagnostics/general/attribute.py index 178dae42..4af93e1c 100644 --- a/earthdiagnostics/general/attribute.py +++ b/earthdiagnostics/general/attribute.py @@ -78,18 +78,25 @@ class Attribute(Diagnostic): options['value'])) return job_list + def request_data(self): + self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + grid=self.grid, to_modify=True) + + def declare_data_generated(self): + self.corrected = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + grid=self.grid) + def compute(self): """ Runs the diagnostic """ - variable_file = self.data_manager.get_file(self.domain, self.variable, self.startdate, self.member, self.chunk, - grid=self.grid) + variable_file = self.variable_file.local_file handler = Utils.openCdf(variable_file) handler.setncattr(self.attributte_name, self.attributte_value) handler.close() if not Utils.check_netcdf_file(variable_file): raise Exception('Attribute {0} can not be set correctly to {1}'.format(self.attributte_name, self.attributte_value)) - self.send_file(variable_file, self.domain, self.variable, self.startdate, self.member, self.chunk, - grid=self.grid) + + self.corrected.set_local_file(variable_file) -- GitLab From c1f44547a6e8342e267941a7bacc094db31aa319 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 27 Mar 2017 12:52:17 +0200 Subject: [PATCH 11/82] Attributte diagnostic ready --- diags.conf | 2 +- earthdiagnostics/datafile.py | 34 +++++++++++++++++++++------ earthdiagnostics/diagnostic.py | 16 ++++++++++--- earthdiagnostics/general/attribute.py | 2 +- earthdiagnostics/work_manager.py | 10 +++++--- 5 files changed, 49 insertions(+), 15 deletions(-) diff --git a/diags.conf b/diags.conf index d6e3367c..54b83c3a 100644 --- a/diags.conf +++ b/diags.conf @@ -91,7 +91,7 @@ STARTDATES = 19900101 MEMBERS = 0 MEMBER_DIGITS = 1 CHUNK_SIZE = 12 -CHUNKS = 2 +CHUNKS = 1 # CHUNKS = 1 diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index 606fe7cd..8dcfcf3a 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -18,6 +18,7 @@ class LocalStatus(object): READY = 2 FAILED = 3 NOT_REQUESTED = 4 + COMPUTING = 5 class StorageStatus(object): @@ -46,18 +47,30 @@ class DataFile(Publisher): def unsubscribe(self, who): super(DataFile, self).unsubscribe(who) - if self.local_status == LocalStatus.READY and not any(self.subscribers) == 0: + if self.local_status == LocalStatus.READY and len(self.subscribers) == 0: os.remove(self.local_file) def upload_required(self): return self.local_status == LocalStatus.READY and self.storage_status == StorageStatus.PENDING def download_required(self): - return self.local_status == LocalStatus.PENDING and self.storage_status == StorageStatus.READY + + if not self.local_status == LocalStatus.PENDING: + return False + + if self.storage_status == StorageStatus.READY: + return True + + if self.has_modifiers(): + print 'Scheduling because has modifiers' + return True def add_modifier(self, diagnostic): self._modifiers.append(diagnostic) + def has_modifiers(self): + return len(self._modifiers) > 0 + def ready_to_run(self, diagnostic): if not self.local_status == LocalStatus.READY: return False @@ -71,6 +84,8 @@ class DataFile(Publisher): @local_status.setter def local_status(self, value): + if self._local_status == value: + return self._local_status = value self.dispatch(self) @@ -80,6 +95,8 @@ class DataFile(Publisher): @storage_status.setter def storage_status(self, value): + if self._storage_status == value: + return self._storage_status = value self.dispatch(self) @@ -106,14 +123,16 @@ class DataFile(Publisher): def download(self): raise NotImplementedError() - def upload(self): - self.storage_status = StorageStatus.UPLOADING + def prepare_to_upload(self): Utils.convert2netcdf4(self.local_file) self._correct_metadata() self._prepare_region() self._rename_coordinate_variables() self.add_diagnostic_history() - Utils.move_file(self.local_file, self.remote_file) + + def upload(self): + self.storage_status = StorageStatus.UPLOADING + Utils.copy_file(self.local_file, self.remote_file) Log.info('File {0} uploaded!', self.remote_file) self.storage_status = StorageStatus.READY @@ -121,9 +140,9 @@ class DataFile(Publisher): if diagnostic in self._modifiers: self._modifiers.remove(diagnostic) self.local_file = local_file + self.prepare_to_upload() self.local_status = LocalStatus.READY - def create_link(self): pass @@ -405,7 +424,8 @@ class NetCDFFile(DataFile): self.local_status = LocalStatus.READY except Exception as ex: - os.remove(self.local_file) + if os.path.isfile(self.local_file): + os.remove(self.local_file) Log.error('File {0} not available: {1}', self.remote_file, ex) self.local_status = LocalStatus.FAILED diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index 5dfe4d60..6bd8fece 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -1,7 +1,7 @@ # coding=utf-8 import datetime -from datafile import StorageStatus +from datafile import StorageStatus, LocalStatus from earthdiagnostics.constants import Basins, Basin from earthdiagnostics.frequency import Frequency from earthdiagnostics.variable_type import VariableType @@ -35,6 +35,7 @@ class Diagnostic(Publisher): def __init__(self, data_manager): super(Diagnostic, self).__init__() + self._generated_files = [] self.data_manager = data_manager self._status = DiagnosticStatus.WAITING self._requests = [] @@ -49,10 +50,15 @@ class Diagnostic(Publisher): @status.setter def status(self, value): + if self._status == value: + return self._status = value + if self.status == DiagnosticStatus.RUNNING: + for generated_file in self._generated_files: + generated_file.local_status = LocalStatus.COMPUTING if self.status in (DiagnosticStatus.FAILED, DiagnosticStatus.COMPLETED): self._unsuscribe_requests() - self.dispatch(self, value) + self.dispatch(self) @staticmethod def register(cls): @@ -163,8 +169,10 @@ class Diagnostic(Publisher): """ if isinstance(region, Basin): region = region.fullname - return self.data_manager.declare_chunk(domain, var, startdate, member, chunk, grid, region, box, + generated_chunk = self.data_manager.declare_chunk(domain, var, startdate, member, chunk, grid, region, box, diagnostic=self, vartype=vartype, frequency=frequency) + self._generated_files.append(generated_chunk) + return generated_chunk @classmethod def generate_jobs(cls, diags, options): @@ -213,6 +221,8 @@ class Diagnostic(Publisher): return request def _updated_request(self, request): + if self.status != DiagnosticStatus.WAITING: + return from datafile import LocalStatus if request.local_status == LocalStatus.FAILED: self.reason = 'Required file {0} is not available'.format(request.remote_file) diff --git a/earthdiagnostics/general/attribute.py b/earthdiagnostics/general/attribute.py index 4af93e1c..d4dc308b 100644 --- a/earthdiagnostics/general/attribute.py +++ b/earthdiagnostics/general/attribute.py @@ -98,5 +98,5 @@ class Attribute(Diagnostic): raise Exception('Attribute {0} can not be set correctly to {1}'.format(self.attributte_name, self.attributte_value)) - self.corrected.set_local_file(variable_file) + self.corrected.set_local_file(variable_file, self) diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index 1cc06625..1544a4c5 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -5,7 +5,7 @@ import operator from bscearth.utils.log import Log from concurrent.futures import ThreadPoolExecutor -from datafile import StorageStatus +from datafile import StorageStatus, LocalStatus from diagnostic import DiagnosticStatus from utils import Utils, TempFile import threading @@ -60,8 +60,8 @@ class WorkManager(object): self.print_stats() return not self.had_errors - def _job_status_changed(self, job, status): - if status == DiagnosticStatus.READY: + def _job_status_changed(self, job): + if job.status == DiagnosticStatus.READY: self.executor.submit(self._run_job, job) self.check_completion() @@ -84,6 +84,10 @@ class WorkManager(object): return False for request in self.data_manager.requested_files.values(): + if request.storage_status == StorageStatus.UPLOADING: + return + if request.local_status == LocalStatus.DOWNLOADING: + return if request.upload_required(): return if request.download_required(): -- GitLab From e53c392644c2dac900e62d118f7c3d2e353f2a32 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 27 Mar 2017 13:13:47 +0200 Subject: [PATCH 12/82] Changing task --- earthdiagnostics/cmormanager.py | 2 +- earthdiagnostics/datafile.py | 19 +++++++++++-------- earthdiagnostics/datamanager.py | 13 ++++++++++--- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index 97b80858..d006e596 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -152,7 +152,7 @@ class CMORManager(DataManager): filepath = self.get_file_path(startdate, member, domain, final_name, cmor_var, chunk, frequency, grid) netcdf_file = self._declare_generated_file(filepath, domain, final_name, cmor_var, self.config.data_convention, - region, diagnostic, vartype) + region, diagnostic, grid, vartype) netcdf_file.frequency = frequency return netcdf_file diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index 8dcfcf3a..0aa4a903 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -10,6 +10,7 @@ from bscearth.utils.log import Log from earthdiagnostics.utils import Utils, TempFile from publisher import Publisher from earthdiagnostics.modelingrealm import ModelingRealms +from variable_type import VariableType class LocalStatus(object): @@ -41,6 +42,9 @@ class DataFile(Publisher): self.frequency = None self.data_convention = None self.diagnostic = None + self.grid = None + self.data_manager = None + self.var_type = VariableType.MEAN self._local_status = LocalStatus.NOT_REQUESTED self._storage_status = StorageStatus.READY self._modifiers = [] @@ -108,14 +112,8 @@ class DataFile(Publisher): return file_object @classmethod - def to_storage(cls, remote_file, domain, var, cmor_var, data_convention, region): + def to_storage(cls, remote_file): new_object = cls() - new_object.domain = domain - new_object.var = var - new_object.cmor_var = cmor_var - new_object.region = region - new_object.frequency = None - new_object.data_convention = data_convention new_object.remote_file = remote_file new_object.storage_status = StorageStatus.PENDING return new_object @@ -134,6 +132,7 @@ class DataFile(Publisher): self.storage_status = StorageStatus.UPLOADING Utils.copy_file(self.local_file, self.remote_file) Log.info('File {0} uploaded!', self.remote_file) + self.create_link() self.storage_status = StorageStatus.READY def set_local_file(self, local_file, diagnostic=None): @@ -430,6 +429,10 @@ class NetCDFFile(DataFile): self.local_status = LocalStatus.FAILED def create_link(self): - self.data_manager_create_link(self.domain, self.remote_file, self.frequency, self.var, self.grid, move_old, vartype) + try: + self.data_manager._create_link(self.domain, self.remote_file, self.frequency, self.var, + self.grid, False, self.var_type) + except Exception as ex: + Log.error('Can not create link to {1}: {0}'.format(ex, self.remote_file)) diff --git a/earthdiagnostics/datamanager.py b/earthdiagnostics/datamanager.py index 2e1ada26..2e4a66fe 100644 --- a/earthdiagnostics/datamanager.py +++ b/earthdiagnostics/datamanager.py @@ -69,13 +69,20 @@ class DataManager(object): return self.requested_files[filepath] def _declare_generated_file(self, remote_file, domain, var, cmor_var, data_convention, - region, diagnostic, var_type): + region, diagnostic, grid, var_type): if remote_file not in self.requested_files: - self.requested_files[remote_file] = NCfile.to_storage(remote_file, domain, var, cmor_var, data_convention, - region) + self.requested_files[remote_file] = NCfile.to_storage(remote_file) file_object = self.requested_files[remote_file] file_object.diagnostic = diagnostic file_object.var_type = var_type + file_object.grid = grid + file_object.data_manager = self + file_object.domain = domain + file_object.var = var + file_object.cmor_var = cmor_var + file_object.region = region + file_object.frequency = None + file_object.data_convention = data_convention file_object.storage_status = StorageStatus.PENDING return file_object -- GitLab From e26f39d3a5f8c1e8aac0a7b5e92db1904ab4d57f Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 28 Mar 2017 17:25:19 +0200 Subject: [PATCH 13/82] Fixed link creation --- earthdiagnostics/cmormanager.py | 2 ++ earthdiagnostics/datamanager.py | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index d006e596..ec5750d6 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -147,6 +147,8 @@ class CMORManager(DataManager): :return: path to the copy created on the scratch folder :rtype: str """ + if not frequency: + frequency = self.config.frequency cmor_var = self.variable_list.get_variable(var) final_name = self._get_final_var_name(box, var) diff --git a/earthdiagnostics/datamanager.py b/earthdiagnostics/datamanager.py index 2e4a66fe..933fc486 100644 --- a/earthdiagnostics/datamanager.py +++ b/earthdiagnostics/datamanager.py @@ -81,7 +81,6 @@ class DataManager(object): file_object.var = var file_object.cmor_var = cmor_var file_object.region = region - file_object.frequency = None file_object.data_convention = data_convention file_object.storage_status = StorageStatus.PENDING return file_object -- GitLab From c08dbdc31868f86c858beefc43fad0d68b1ca194 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 28 Mar 2017 17:34:21 +0200 Subject: [PATCH 14/82] Added support for jobs that use no data (relink, relinkall) --- diags.conf | 2 +- earthdiagnostics/diagnostic.py | 8 +++++--- earthdiagnostics/work_manager.py | 9 +++++---- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/diags.conf b/diags.conf index 54b83c3a..0561ac9b 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = att,tos,ocean,test,value yearmean,tos,ocean,mon +DIAGS = relinkall # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index 6bd8fece..04f2d68e 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -230,9 +230,11 @@ class Diagnostic(Publisher): return if request.local_status == LocalStatus.READY: - if all([request.ready_to_run(self) for request in self._requests]): - self.status = DiagnosticStatus.READY - return + self.check_is_ready() + + def check_is_ready(self): + if all([request.ready_to_run(self) for request in self._requests]): + self.status = DiagnosticStatus.READY def _unsuscribe_requests(self): for request in self._requests: diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index 1544a4c5..e1f3a19a 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -28,14 +28,15 @@ class WorkManager(object): self.threads = self.config.max_cores Log.info('Using {0} threads', self.threads) + self.downloader = ThreadPoolExecutor(1) + self.uploader = ThreadPoolExecutor(1) + self.executor = ThreadPoolExecutor(self.threads) + for job in self.jobs: job.request_data() job.declare_data_generated() job.subscribe(self, self._job_status_changed) - - self.downloader = ThreadPoolExecutor(1) - self.uploader = ThreadPoolExecutor(1) - self.executor = ThreadPoolExecutor(self.threads) + job.check_is_ready() for file_object in self.data_manager.requested_files.values(): file_object.subscribe(self, self._file_object_status_changed) -- GitLab From 5dabf0796908c549afecda601317f1305a9c71cd Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 28 Mar 2017 17:58:51 +0200 Subject: [PATCH 15/82] Adapted scale and rewrite --- diags.conf | 2 +- earthdiagnostics/diagnostic.py | 5 +++-- earthdiagnostics/general/rewrite.py | 13 +++++++++---- earthdiagnostics/general/scale.py | 14 ++++++++++---- 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/diags.conf b/diags.conf index 0561ac9b..8c538e27 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = relinkall +DIAGS = scale,tos,ocean,1,0 # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index 04f2d68e..66766353 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -165,12 +165,13 @@ class Diagnostic(Publisher): :param move_old: :param vartype: Variable type (mean, statistic) :type vartype: VariableType - :return: + :return: datafile object + :rtype: DataFile """ if isinstance(region, Basin): region = region.fullname generated_chunk = self.data_manager.declare_chunk(domain, var, startdate, member, chunk, grid, region, box, - diagnostic=self, vartype=vartype, frequency=frequency) + diagnostic=self, vartype=vartype, frequency=frequency) self._generated_files.append(generated_chunk) return generated_chunk diff --git a/earthdiagnostics/general/rewrite.py b/earthdiagnostics/general/rewrite.py index 6b82a1dd..a51e363b 100644 --- a/earthdiagnostics/general/rewrite.py +++ b/earthdiagnostics/general/rewrite.py @@ -68,12 +68,17 @@ class Rewrite(Diagnostic): options['domain'], options['variable'], options['grid'])) return job_list + def request_data(self): + self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + grid=self.grid, to_modify=True) + + def declare_data_generated(self): + self.corrected = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + grid=self.grid) + def compute(self): """ Runs the diagnostic """ - variable_file = self.data_manager.get_file(self.domain, self.variable, self.startdate, self.member, self.chunk, - grid=self.grid) - self.send_file(variable_file, self.domain, self.variable, self.startdate, self.member, self.chunk, - grid=self.grid) + self.corrected.set_local_file(self.variable_file.local_file, self) diff --git a/earthdiagnostics/general/scale.py b/earthdiagnostics/general/scale.py index 59fc369d..4e162d4b 100644 --- a/earthdiagnostics/general/scale.py +++ b/earthdiagnostics/general/scale.py @@ -88,12 +88,19 @@ class Scale(Diagnostic): options['grid'], options['min_limit'], options['max_limit'], frequency)) return job_list + def request_data(self): + self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + grid=self.grid, frequency=self.frequency, to_modify=True) + + def declare_data_generated(self): + self.corrected = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + grid=self.grid, frequency=self.frequency) + def compute(self): """ Runs the diagnostic """ - variable_file = self.data_manager.get_file(self.domain, self.variable, self.startdate, self.member, self.chunk, - grid=self.grid, frequency=self.frequency) + variable_file = self.variable_file.local_file handler = Utils.openCdf(variable_file) var_handler = handler.variables[self.variable] @@ -101,8 +108,7 @@ class Scale(Diagnostic): if self._check_limits(): var_handler[:] = self.original_values * self.value + self.offset handler.close() - self.send_file(variable_file, self.domain, self.variable, self.startdate, self.member, self.chunk, - grid=self.grid, frequency=self.frequency) + self.corrected.set_local_file(self.variable_file.local_file, self) def _check_limits(self): if not math.isnan(self.min_limit) and (self.original_values.min() < self.min_limit): -- GitLab From 78ae762c532a03e197362de847fd01253034ebff Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 29 Mar 2017 10:23:37 +0200 Subject: [PATCH 16/82] Moc and AreaMoc adapted. MaxMoc started --- diags.conf | 2 +- earthdiagnostics/cmormanager.py | 141 +++++++++++++++++++++++++++++- earthdiagnostics/datafile.py | 37 ++++---- earthdiagnostics/datamanager.py | 7 +- earthdiagnostics/diagnostic.py | 38 ++++++++ earthdiagnostics/ocean/areamoc.py | 17 +++- earthdiagnostics/ocean/maxmoc.py | 45 +++++++--- earthdiagnostics/ocean/moc.py | 16 ++-- earthdiagnostics/work_manager.py | 1 + 9 files changed, 263 insertions(+), 41 deletions(-) diff --git a/diags.conf b/diags.conf index 8c538e27..2832a73a 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = scale,tos,ocean,1,0 +DIAGS = moc area_moc max_moc # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index ec5750d6..32244fa8 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -6,6 +6,8 @@ import os from bscearth.utils.log import Log from bscearth.utils.date import parse_date, chunk_start_date, chunk_end_date, previous_day +from datafile import StorageStatus +from diagnostic import Diagnostic from earthdiagnostics.cmorizer import Cmorizer from earthdiagnostics.datamanager import DataManager from earthdiagnostics.frequency import Frequencies, Frequency @@ -121,6 +123,36 @@ class CMORManager(DataManager): return self._get_file_from_storage(filepath) + def request_year(self, domain, var, startdate, member, year, grid=None, box=None, frequency=None): + """ + Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + + :param domain: CMOR domain + :type domain: Domain + :param var: variable name + :type var: str + :param startdate: file's startdate + :type startdate: str + :param member: file's member + :type member: int + :param chunk: file's chunk + :type chunk: int + :param grid: file's grid (only needed if it is not the original) + :type grid: str|NoneType + :param box: file's box (only needed to retrieve sections or averages) + :type box: Box + :param frequency: file's frequency (only needed if it is different from the default) + :type frequency: Frequency|NoneType + :param vartype: Variable type (mean, statistic) + :type vartype: VariableType + :return: path to the copy created on the scratch folder + :rtype: str + """ + + job = MergeYear(self, domain, var, startdate, member, year, grid, box, frequency) + + return job.year_file + def declare_chunk(self, domain, var, startdate, member, chunk, grid=None, region=None, box=None, frequency=None, vartype=VariableType.MEAN, diagnostic=None): """ @@ -149,15 +181,57 @@ class CMORManager(DataManager): """ if not frequency: frequency = self.config.frequency + original_name = var cmor_var = self.variable_list.get_variable(var) + if cmor_var: + var = cmor_var.short_name final_name = self._get_final_var_name(box, var) filepath = self.get_file_path(startdate, member, domain, final_name, cmor_var, chunk, frequency, grid) netcdf_file = self._declare_generated_file(filepath, domain, final_name, cmor_var, self.config.data_convention, - region, diagnostic, grid, vartype) + region, diagnostic, grid, vartype, original_name) netcdf_file.frequency = frequency return netcdf_file + def declare_year(self, domain, var, startdate, member, year, grid=None, box=None, + vartype=VariableType.MEAN, diagnostic=None): + """ + Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + + :param domain: CMOR domain + :type domain: Domain + :param var: variable name + :type var: str + :param startdate: file's startdate + :type startdate: str + :param member: file's member + :type member: int + :param chunk: file's chunk + :type chunk: int + :param grid: file's grid (only needed if it is not the original) + :type grid: str|NoneType + :param box: file's box (only needed to retrieve sections or averages) + :type box: Box + :param frequency: file's frequency (only needed if it is different from the default) + :type frequency: Frequency|NoneType + :param vartype: Variable type (mean, statistic) + :type vartype: VariableType + :return: path to the copy created on the scratch folder + :rtype: str + """ + original_name = var + cmor_var = self.variable_list.get_variable(var) + if cmor_var: + var = cmor_var.short_name + final_name = self._get_final_var_name(box, var) + + filepath = self.get_file_path(startdate, member, domain, final_name, cmor_var, None, Frequencies.yearly, grid, + year=year) + netcdf_file = self._declare_generated_file(filepath, domain, final_name, cmor_var, self.config.data_convention, + None, diagnostic, grid, vartype, original_name) + netcdf_file.frequency = Frequencies.yearly + return netcdf_file + def get_file_path(self, startdate, member, domain, var, cmor_var, chunk, frequency, grid=None, year=None, date_str=None): """ @@ -600,3 +674,68 @@ class CMORManager(DataManager): def _get_member_str(self, member): return 'r{0}i1p1'.format(member + 1 - self.experiment.member_count_start) + +class MergeYear(Diagnostic): + def __init__(self, data_manager, domain, var, startdate, member, year, grid=None, box=None, frequency=None): + super(MergeYear, self).__init__(data_manager) + self.chunk_files = [] + self.experiment = self.data_manager.experiment + self.domain = domain + self.var = var + self.startdate = startdate + self.member = member + self.year = year + self.grid = grid + self.box = box + self.frequency = frequency + + def request_data(self): + for chunk in self.experiment.get_year_chunks(self.startdate, self.year): + self.chunk_files.append(self.request_chunk(self.domain, self.var, self.startdate, self.member, chunk, + grid=self.grid, box=self.box, frequency=self.frequency)) + + def declare_data_generated(self): + self.year_file = self.declare_year(self.domain, self.var, self.startdate, self.member, self.year, + grid=self.grid, box=self.box, frequency=self.frequency) + self.year_file.storage_status = StorageStatus.NO_STORE + + def compute(self): + temp = self._merge_chunk_files() + temp2 = self._select_data_of_given_year(temp) + return temp2 + + def _select_data_of_given_year(self, data_file): + temp2 = TempFile.get() + handler = Utils.openCdf(data_file) + times = Utils.get_datetime_from_netcdf(handler) + x = 0 + first_index = None + last_index = None + while x < times.size: + if times[x].year == self.year: + first_index = x + break + else: + x += 1 + + while x < times.size: + if times[x].year != self.year: + last_index = x + break + else: + x += 1 + if last_index is None: + last_index = times.size + Utils.nco.ncks(input=data_file, output=temp2, options=['-d time,{0},{1}'.format(first_index, last_index - 1)]) + return temp2 + + def _merge_chunk_files(self): + temp = TempFile.get() + if len(self.chunk_files) == 1: + Utils.copy_file(self.chunk_files[0].local_file, temp) + return temp + + Utils.nco.ncrcat(input=' '.join(self.chunk_files), output=temp) + for chunk_file in self.chunk_files: + os.remove(chunk_file) + return temp diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index 0aa4a903..0863f677 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -27,6 +27,7 @@ class StorageStatus(object): UPLOADING = 1 READY = 2 FAILED = 3 + NO_STORE = 4 class DataFile(Publisher): @@ -44,6 +45,7 @@ class DataFile(Publisher): self.diagnostic = None self.grid = None self.data_manager = None + self.final_name = None self.var_type = VariableType.MEAN self._local_status = LocalStatus.NOT_REQUESTED self._storage_status = StorageStatus.READY @@ -66,7 +68,6 @@ class DataFile(Publisher): return True if self.has_modifiers(): - print 'Scheduling because has modifiers' return True def add_modifier(self, diagnostic): @@ -121,8 +122,14 @@ class DataFile(Publisher): def download(self): raise NotImplementedError() - def prepare_to_upload(self): + def prepare_to_upload(self, rename_var): Utils.convert2netcdf4(self.local_file) + if rename_var: + original_name = rename_var + else: + original_name = self.var + if self.final_name != original_name: + Utils.rename_variable(self.local_file, original_name, self.final_name) self._correct_metadata() self._prepare_region() self._rename_coordinate_variables() @@ -135,11 +142,11 @@ class DataFile(Publisher): self.create_link() self.storage_status = StorageStatus.READY - def set_local_file(self, local_file, diagnostic=None): + def set_local_file(self, local_file, diagnostic=None, rename_var=''): if diagnostic in self._modifiers: self._modifiers.remove(diagnostic) self.local_file = local_file - self.prepare_to_upload() + self.prepare_to_upload(rename_var) self.local_status = LocalStatus.READY def create_link(self): @@ -149,7 +156,7 @@ class DataFile(Publisher): if not self.cmor_var: return handler = Utils.openCdf(self.local_file) - var_handler = handler.variables[self.var] + var_handler = handler.variables[self.final_name] self._fix_variable_name(var_handler) handler.modeling_realm = self.cmor_var.domain.name table = self.cmor_var.get_table(self.frequency, self.data_convention) @@ -165,20 +172,20 @@ class DataFile(Publisher): def _fix_variable_name(self, var_handler): var_handler.standard_name = self.cmor_var.standard_name var_handler.long_name = self.cmor_var.long_name - var_handler.short_name = self.cmor_var.short_name + # var_handler.short_name = self.cmor_var.short_name def _fix_values_metadata(self, var_type): if self.cmor_var.valid_min != '': - valid_min = '-a valid_min,{0},o,{1},"{2}" '.format(self.var, var_type.char, self.cmor_var.valid_min) + valid_min = '-a valid_min,{0},o,{1},"{2}" '.format(self.final_name, var_type.char, self.cmor_var.valid_min) else: valid_min = '' if self.cmor_var.valid_max != '': - valid_max = '-a valid_max,{0},o,{1},"{2}" '.format(self.var, var_type.char, self.cmor_var.valid_max) + valid_max = '-a valid_max,{0},o,{1},"{2}" '.format(self.final_name, var_type.char, self.cmor_var.valid_max) else: valid_max = '' Utils.nco.ncatted(input=self.local_file, output=self.local_file, options='-O -a _FillValue,{0},o,{1},"1.e20" ' - '-a missingValue,{0},o,{1},"1.e20" {2}{3}'.format(self.var, var_type.char, + '-a missingValue,{0},o,{1},"1.e20" {2}{3}'.format(self.final_name, var_type.char, valid_min, valid_max)) def _fix_coordinate_variables_metadata(self, handler): @@ -237,7 +244,7 @@ class DataFile(Publisher): Utils.nco.ncks(input=temp, output=temp, options='-O --mk_rec_dmn region') handler = Utils.openCdf(temp) handler_send = Utils.openCdf(self.local_file) - value = handler_send.variables[self.var][:] + value = handler_send.variables[self.final_name][:] var_region = handler.variables['region'] basin_index = np.where(var_region[:] == self.region) if len(basin_index[0]) == 0: @@ -246,7 +253,7 @@ class DataFile(Publisher): else: basin_index = basin_index[0][0] - handler.variables[self.var][..., basin_index] = value + handler.variables[self.final_name][..., basin_index] = value handler.close() handler_send.close() Utils.move_file(temp, self.local_file) @@ -256,15 +263,15 @@ class DataFile(Publisher): handler.createDimension('region') var_region = handler.createVariable('region', str, 'region') var_region[0] = self.region - original_var = handler.variables[self.var] + original_var = handler.variables[self.final_name] new_var = handler.createVariable('new_var', original_var.datatype, original_var.dimensions + ('region',)) new_var.setncatts({k: original_var.getncattr(k) for k in original_var.ncattrs()}) value = original_var[:] new_var[..., 0] = value handler.close() - Utils.nco.ncks(input=self.local_file, output=self.local_file, options='-O -x -v {0}'.format(self.var)) - Utils.rename_variable(self.local_file, 'new_var', self.var) + Utils.nco.ncks(input=self.local_file, output=self.local_file, options='-O -x -v {0}'.format(self.final_name)) + Utils.rename_variable(self.local_file, 'new_var', self.final_name) def _rename_coordinate_variables(self): variables = dict() @@ -430,7 +437,7 @@ class NetCDFFile(DataFile): def create_link(self): try: - self.data_manager._create_link(self.domain, self.remote_file, self.frequency, self.var, + self.data_manager._create_link(self.domain, self.remote_file, self.frequency, self.final_name, self.grid, False, self.var_type) except Exception as ex: Log.error('Can not create link to {1}: {0}'.format(ex, self.remote_file)) diff --git a/earthdiagnostics/datamanager.py b/earthdiagnostics/datamanager.py index 933fc486..c69fb6c9 100644 --- a/earthdiagnostics/datamanager.py +++ b/earthdiagnostics/datamanager.py @@ -68,8 +68,8 @@ class DataManager(object): file_object.local_satatus = LocalStatus.PENDING return self.requested_files[filepath] - def _declare_generated_file(self, remote_file, domain, var, cmor_var, data_convention, - region, diagnostic, grid, var_type): + def _declare_generated_file(self, remote_file, domain, final_var, cmor_var, data_convention, + region, diagnostic, grid, var_type, original_var): if remote_file not in self.requested_files: self.requested_files[remote_file] = NCfile.to_storage(remote_file) file_object = self.requested_files[remote_file] @@ -78,7 +78,8 @@ class DataManager(object): file_object.grid = grid file_object.data_manager = self file_object.domain = domain - file_object.var = var + file_object.var = original_var + file_object.final_name = final_var file_object.cmor_var = cmor_var file_object.region = region file_object.data_convention = data_convention diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index 66766353..36d53e82 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -175,6 +175,36 @@ class Diagnostic(Publisher): self._generated_files.append(generated_chunk) return generated_chunk + def declare_year(self, domain, var, startdate, member, year, grid=None, box=None, + vartype=VariableType.MEAN): + """ + + :param filetosend: + :param domain: + :type domain: ModelingRealm + :param var: + :param startdate: + :param member: + :param chunk: + :param grid: + :param region: + :param box: + :param rename_var: + :param frequency: + :type frequency: Frequency + :param year: + :param date_str: + :param move_old: + :param vartype: Variable type (mean, statistic) + :type vartype: VariableType + :return: datafile object + :rtype: DataFile + """ + generated_year = self.data_manager.declare_year(domain, var, startdate, member, year, grid, box, + diagnostic=self, vartype=vartype) + self._generated_files.append(generated_year) + return generated_year + @classmethod def generate_jobs(cls, diags, options): """ @@ -221,6 +251,14 @@ class Diagnostic(Publisher): request.subscribe(self, self._updated_request) return request + def request_year(self, domain, var, startdate, member, year, grid=None, box=None, frequency=None, to_modify=False): + request = self.data_manager.request_year(domain, var, startdate, member, year, grid, box, frequency) + if to_modify: + request.add_modifier(self) + self._requests.append(request) + request.subscribe(self, self._updated_request) + return request + def _updated_request(self, request): if self.status != DiagnosticStatus.WAITING: return diff --git a/earthdiagnostics/ocean/areamoc.py b/earthdiagnostics/ocean/areamoc.py index e65fe622..0239953e 100644 --- a/earthdiagnostics/ocean/areamoc.py +++ b/earthdiagnostics/ocean/areamoc.py @@ -39,6 +39,8 @@ class AreaMoc(Diagnostic): alias = 'mocarea' "Diagnostic alias for the configuration file" + vsftmyz = 'vsftmyz' + def __init__(self, data_manager, startdate, member, chunk, basin, box): Diagnostic.__init__(self, data_manager) self.basin = basin @@ -86,15 +88,26 @@ class AreaMoc(Diagnostic): job_list.append(AreaMoc(diags.data_manager, startdate, member, chunk, options['basin'], box)) return job_list + def request_data(self): + self.variable_file = self.request_chunk(ModelingRealms.ocean, AreaMoc.vsftmyz, + self.startdate, self.member, self.chunk) + + def declare_data_generated(self): + self.results = self.declare_chunk(ModelingRealms.ocean, AreaMoc.vsftmyz, + self.startdate, self.member, self.chunk, + box=self.box) + def compute(self): """ Runs the diagnostic """ nco = Utils.nco cdo = Utils.cdo + + temp = TempFile.get() temp2 = TempFile.get() - temp = self.data_manager.get_file(ModelingRealms.ocean, 'vsftmyz', self.startdate, self.member, self.chunk) + Utils.copy_file(self.variable_file.local_file, temp) handler = Utils.openCdf(temp) if 'i' in handler.dimensions: @@ -146,4 +159,4 @@ class AreaMoc(Diagnostic): nco.ncap2(input=temp2, output=temp2, options='-O -s "coslat[lat]=cos(lat[lat]*3.141592657/180.0)"') nco.ncwa(input=temp2, output=temp2, options='-w coslat -a lat') nco.ncks(input=temp2, output=temp2, options='-O -v vsftmyz,time') - self.send_file(temp2, ModelingRealms.ocean, 'vsftmyz', self.startdate, self.member, self.chunk, box=self.box) + self.results.set_local_file(temp2) diff --git a/earthdiagnostics/ocean/maxmoc.py b/earthdiagnostics/ocean/maxmoc.py index 2717d413..6906bf33 100644 --- a/earthdiagnostics/ocean/maxmoc.py +++ b/earthdiagnostics/ocean/maxmoc.py @@ -39,14 +39,14 @@ class MaxMoc(Diagnostic): alias = 'mocmax' "Diagnostic alias for the configuration file" + vsftmyz = 'vsftmyz' + def __init__(self, data_manager, startdate, member, year, basin, box): Diagnostic.__init__(self, data_manager) self.basin = basin self.startdate = startdate self.member = member self.year = year - self.required_vars = ['vo'] - self.generated_vars = ['vsftmyz'] self.box = box def __str__(self): @@ -91,6 +91,29 @@ class MaxMoc(Diagnostic): job_list.append(MaxMoc(diags.data_manager, startdate, member, year, options['basin'], box)) return job_list + def request_data(self): + self.variable_file = self.request_year(ModelingRealms.ocean, MaxMoc.vsftmyz, + self.startdate, self.member, self.year) + + def declare_data_generated(self): + + self.results = {'vsftmyzmax': self.declare_year(ModelingRealms.ocean, 'vsftmyzmax', self.startdate, self.member, + self.year, box=self.box, vartype=VariableType.STATISTIC), + 'vsftmyzmaxlev': self.declare_year(ModelingRealms.ocean, 'vsftmyzmaxlev', self.startdate, + self.member, self.year, box=self.box, + vartype=VariableType.STATISTIC), + 'vsftmyzmaxlat': self.declare_year(ModelingRealms.ocean, 'vsftmyzmaxlat', self.startdate, + self.member, self.year, box=self.box, + vartype=VariableType.STATISTIC), + 'vsftmyzmin': self.declare_year(ModelingRealms.ocean, 'vsftmyzmin', self.startdate, self.member, + self.year, box=self.box, vartype=VariableType.STATISTIC), + 'vsftmyzminlev': self.declare_year(ModelingRealms.ocean, 'vsftmyzminlev', self.startdate, + self.member, self.year, box=self.box, + vartype=VariableType.STATISTIC), + 'vsftmyzminlat': self.declare_year(ModelingRealms.ocean, 'vsftmyzminlat', self.startdate, + self.member, self.year, box=self.box, + vartype=VariableType.STATISTIC)} + def compute(self): """ Runs the diagnostic @@ -156,8 +179,7 @@ class MaxMoc(Diagnostic): var.valid_max = 1000. var[0] = maximum handler.close() - self.send_file(temp, ModelingRealms.ocean, 'vsftmyzmax', self.startdate, self.member, box=self.box, - frequency=Frequencies.yearly, year=self.year, vartype=VariableType.STATISTIC) + self.results['vsftmyzmax'].set_local_file(temp) handler = self._create_output_file(temp) var = handler.createVariable('vsftmyzmaxlat', float, ('time',)) @@ -167,8 +189,7 @@ class MaxMoc(Diagnostic): var.valid_max = 90. var[0] = max_lat handler.close() - self.send_file(temp, ModelingRealms.ocean, 'vsftmyzmax', self.startdate, self.member, box=self.box, - frequency=Frequencies.yearly, year=self.year, vartype=VariableType.STATISTIC) + self.results['vsftmyzmaxlat'].set_local_file(temp) handler = self._create_output_file(temp) var = handler.createVariable('vsftmyzmaxlev', float, ('time',)) @@ -178,8 +199,7 @@ class MaxMoc(Diagnostic): var.valid_max = 10000. var[0] = max_lev handler.close() - self.send_file(temp, ModelingRealms.ocean, 'vsftmyzmax', self.startdate, self.member, box=self.box, - frequency=Frequencies.yearly, year=self.year, vartype=VariableType.STATISTIC) + self.results['vsftmyzmaxlev'].set_local_file(temp) handler = self._create_output_file(temp) var = handler.createVariable('vsftmyzmin', float, ('time',)) @@ -189,8 +209,7 @@ class MaxMoc(Diagnostic): var.valid_max = 1000. var[0] = minimum handler.close() - self.send_file(temp, ModelingRealms.ocean, 'vsftmyzmax', self.startdate, self.member, box=self.box, - frequency=Frequencies.yearly, year=self.year, vartype=VariableType.STATISTIC) + self.results['vsftmyzmin'].set_local_file(temp) handler = self._create_output_file(temp) var = handler.createVariable('vsftmyzminlat', float, ('time',)) @@ -200,8 +219,7 @@ class MaxMoc(Diagnostic): var.valid_max = 90. var[0] = min_lat handler.close() - self.send_file(temp, ModelingRealms.ocean, 'vsftmyzmax', self.startdate, self.member, box=self.box, - frequency=Frequencies.yearly, year=self.year, vartype=VariableType.STATISTIC) + self.results['vsftmyzminlat'].set_local_file(temp) handler = self._create_output_file(temp) var = handler.createVariable('vsftmyzminlev', float, ('time',)) @@ -211,8 +229,7 @@ class MaxMoc(Diagnostic): var.valid_max = 10000. var[0] = min_lev handler.close() - self.send_file(temp, ModelingRealms.ocean, 'vsftmyzmax', self.startdate, self.member, box=self.box, - frequency=Frequencies.yearly, year=self.year, vartype=VariableType.STATISTIC) + self.results['vsftmyzminlev'].set_local_file(temp) def _create_output_file(self, temp): handler = netCDF4.Dataset(temp, 'w') diff --git a/earthdiagnostics/ocean/moc.py b/earthdiagnostics/ocean/moc.py index 52bbc018..6f4754e0 100644 --- a/earthdiagnostics/ocean/moc.py +++ b/earthdiagnostics/ocean/moc.py @@ -32,6 +32,8 @@ class Moc(Diagnostic): alias = 'moc' "Diagnostic alias for the configuration file" + vsftmyz = 'vsftmyz' + def __init__(self, data_manager, startdate, member, chunk): Diagnostic.__init__(self, data_manager) self.startdate = startdate @@ -64,17 +66,21 @@ class Moc(Diagnostic): job_list.append(Moc(diags.data_manager, startdate, member, chunk)) return job_list + def request_data(self): + self.variable_file = self.request_chunk(ModelingRealms.ocean, 'vo', self.startdate, self.member, self.chunk) + + def declare_data_generated(self): + self.results = self.declare_chunk(ModelingRealms.ocean, Moc.vsftmyz, self.startdate, self.member, self.chunk) + def compute(self): """ Runs the diagnostic """ temp = TempFile.get() - input_file = self.data_manager.get_file(ModelingRealms.ocean, 'vo', self.startdate, self.member, self.chunk) - Log.debug('Computing MOC') - cdftools.run('cdfmoc', input=input_file, output=temp) - Utils.nco.ncks(input=input_file, output=temp, options='-A -v lev') + cdftools.run('cdfmoc', input=self.variable_file.local_file, output=temp) + Utils.nco.ncks(input=self.variable_file.local_file, output=temp, options='-A -v lev') Utils.convert2netcdf4(temp) Log.debug('Reformatting variables') @@ -112,4 +118,4 @@ class Moc(Diagnostic): options='-O -x -v zomsfglo,zomsfatl,zomsfpac,zomsfinp,zomsfind,zomsfinp0') Utils.setminmax(temp, 'vsftmyz') - self.send_file(temp, ModelingRealms.ocean, 'vsftmyz', self.startdate, self.member, self.chunk) + self.results.set_local_file(temp) diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index e1f3a19a..ebeea233 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -135,6 +135,7 @@ class WorkManager(object): job.message = str(ex) Log.error('Job {0} failed: {1}', job, ex) job.status = DiagnosticStatus.FAILED + raise return False job.consumed_time = datetime.datetime.now() - time -- GitLab From 6b032b92234822b8e542c817c0036b49d5dc5d2c Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 29 Mar 2017 12:25:21 +0200 Subject: [PATCH 17/82] MaxMoc working --- earthdiagnostics/cmormanager.py | 21 ++++++-- earthdiagnostics/config.py | 2 +- earthdiagnostics/datafile.py | 14 +++++- earthdiagnostics/diagnostic.py | 3 +- earthdiagnostics/earthdiags.py | 79 ++---------------------------- earthdiagnostics/utils.py | 3 +- earthdiagnostics/work_manager.py | 84 +++++++++++++++++++++++++++++--- 7 files changed, 114 insertions(+), 92 deletions(-) diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index 32244fa8..f2c4077e 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -123,7 +123,7 @@ class CMORManager(DataManager): return self._get_file_from_storage(filepath) - def request_year(self, domain, var, startdate, member, year, grid=None, box=None, frequency=None): + def request_year(self, diagnostic, domain, var, startdate, member, year, grid=None, box=None, frequency=None): """ Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy @@ -150,7 +150,11 @@ class CMORManager(DataManager): """ job = MergeYear(self, domain, var, startdate, member, year, grid, box, frequency) - + job.request_data() + job.declare_data_generated() + if not job.year_file.job_added: + diagnostic.subjobs.append(job) + job.year_file.job_added = True return job.year_file def declare_chunk(self, domain, var, startdate, member, chunk, grid=None, region=None, box=None, frequency=None, @@ -696,13 +700,13 @@ class MergeYear(Diagnostic): def declare_data_generated(self): self.year_file = self.declare_year(self.domain, self.var, self.startdate, self.member, self.year, - grid=self.grid, box=self.box, frequency=self.frequency) + grid=self.grid, box=self.box) self.year_file.storage_status = StorageStatus.NO_STORE def compute(self): temp = self._merge_chunk_files() temp2 = self._select_data_of_given_year(temp) - return temp2 + self.year_file.set_local_file(temp2) def _select_data_of_given_year(self, data_file): temp2 = TempFile.get() @@ -739,3 +743,12 @@ class MergeYear(Diagnostic): for chunk_file in self.chunk_files: os.remove(chunk_file) return temp + + def __str__(self): + return 'Create year CMOR file Startdate: {0.startdate} Member: {0.member} Year: {0.year} ' \ + 'Variable: {0.domain}:{0.var} Grid: {0.grid} Box: {0.box}'.format(self) + + def __eq__(self, other): + return self.startdate == other.startdate and self.member == other.member and self.year == other.year and\ + self.domain == other.domain and self.var == other.var and self.grid == other.grid and \ + self.box == other.box \ No newline at end of file diff --git a/earthdiagnostics/config.py b/earthdiagnostics/config.py index ba1efb7a..4f88f3de 100644 --- a/earthdiagnostics/config.py +++ b/earthdiagnostics/config.py @@ -297,7 +297,7 @@ class ExperimentConfig(object): first_january += 1 years = list() - for chunk in range(first_january, self.num_chunks - chunks_per_year, chunks_per_year): + for chunk in range(first_january, chunks_per_year, self.num_chunks): years.append(first_year) first_year += 1 return years diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index 0863f677..8828bc64 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -49,8 +49,12 @@ class DataFile(Publisher): self.var_type = VariableType.MEAN self._local_status = LocalStatus.NOT_REQUESTED self._storage_status = StorageStatus.READY + self.job_added = False self._modifiers = [] + def __str__(self): + return 'Data file for {0}'.format(self.remote_file) + def unsubscribe(self, who): super(DataFile, self).unsubscribe(who) if self.local_status == LocalStatus.READY and len(self.subscribers) == 0: @@ -137,8 +141,14 @@ class DataFile(Publisher): def upload(self): self.storage_status = StorageStatus.UPLOADING - Utils.copy_file(self.local_file, self.remote_file) - Log.info('File {0} uploaded!', self.remote_file) + try: + Utils.copy_file(self.local_file, self.remote_file) + except Exception as ex: + Log.error('File {0} can not be uploaded: {1}', self.remote_file, ex) + self.storage_status = StorageStatus.FAILED + return + + Log.result('File {0} uploaded!', self.remote_file) self.create_link() self.storage_status = StorageStatus.READY diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index 36d53e82..746ecbaa 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -40,6 +40,7 @@ class Diagnostic(Publisher): self._status = DiagnosticStatus.WAITING self._requests = [] self.consumed_time = datetime.timedelta() + self.subjobs = [] def __repr__(self): return str(self) @@ -252,7 +253,7 @@ class Diagnostic(Publisher): return request def request_year(self, domain, var, startdate, member, year, grid=None, box=None, frequency=None, to_modify=False): - request = self.data_manager.request_year(domain, var, startdate, member, year, grid, box, frequency) + request = self.data_manager.request_year(self, domain, var, startdate, member, year, grid, box, frequency) if to_modify: request.add_modifier(self) self._requests.append(request) diff --git a/earthdiagnostics/earthdiags.py b/earthdiagnostics/earthdiags.py index 9647f5ef..18b63f10 100755 --- a/earthdiagnostics/earthdiags.py +++ b/earthdiagnostics/earthdiags.py @@ -1,13 +1,10 @@ #!/usr/bin/env python # coding=utf-8 -import Queue import argparse import shutil -import threading import pkg_resources import netCDF4 -import operator import os from bscearth.utils.date import * import bscearth.utils.path @@ -17,12 +14,8 @@ from earthdiagnostics.cmormanager import CMORManager from earthdiagnostics.threddsmanager import THREDDSManager from earthdiagnostics import cdftools from earthdiagnostics.utils import TempFile, Utils -from earthdiagnostics.diagnostic import Diagnostic -from earthdiagnostics.ocean import * -from earthdiagnostics.general import * -from earthdiagnostics.statistics import * + from earthdiagnostics.variable import VariableManager -from earthdiagnostics.diagnostic import DiagnosticOptionError from work_manager import WorkManager @@ -149,15 +142,13 @@ class EarthDiags(object): os.chdir(self.config.scratch_dir) self._prepare_mesh_files() - - self._register_diagnostics() - self._prepare_data_manager() # Run diagnostics Log.info('Running diagnostics') - work_manager = WorkManager(self.config, self.prepare_job_list(), self.data_manager) + work_manager = WorkManager(self.config, self.data_manager) + work_manager.prepare_job_list() return work_manager.run() def _prepare_data_manager(self): @@ -167,70 +158,6 @@ class EarthDiags(object): self.data_manager = THREDDSManager(self.config) self.data_manager.prepare() - def prepare_job_list(self): - list_jobs = list() - for fulldiag in self.config.get_commands(): - Log.info("Adding {0} to diagnostic list", fulldiag) - diag_options = fulldiag.split(',') - - diag_class = Diagnostic.get_diagnostic(diag_options[0]) - if diag_class: - try: - for job in diag_class.generate_jobs(self, diag_options): - list_jobs.append(job) - continue - except DiagnosticOptionError as ex: - Log.error('Can not configure diagnostic {0}: {1}', diag_options[0], ex) - self.had_errors = True - else: - Log.error('{0} is not an available diagnostic', diag_options[0]) - self.had_errors = True - return list_jobs - - @staticmethod - def _register_diagnostics(): - EarthDiags._register_ocean_diagnostics() - EarthDiags._register_general_diagnostics() - EarthDiags._register_stats_diagnostics() - - @staticmethod - def _register_stats_diagnostics(): - Diagnostic.register(MonthlyPercentile) - Diagnostic.register(ClimatologicalPercentile) - - @staticmethod - def _register_general_diagnostics(): - Diagnostic.register(DailyMean) - Diagnostic.register(MonthlyMean) - Diagnostic.register(YearlyMean) - Diagnostic.register(Rewrite) - Diagnostic.register(Relink) - Diagnostic.register(RelinkAll) - Diagnostic.register(Scale) - Diagnostic.register(Attribute) - - @staticmethod - def _register_ocean_diagnostics(): - Diagnostic.register(MixedLayerSaltContent) - Diagnostic.register(Siasiesiv) - Diagnostic.register(VerticalMean) - Diagnostic.register(VerticalMeanMeters) - Diagnostic.register(Interpolate) - Diagnostic.register(InterpolateCDO) - Diagnostic.register(Moc) - Diagnostic.register(AreaMoc) - Diagnostic.register(MaxMoc) - Diagnostic.register(Psi) - Diagnostic.register(Gyres) - Diagnostic.register(ConvectionSites) - Diagnostic.register(CutSection) - Diagnostic.register(AverageSection) - Diagnostic.register(MixedLayerHeatContent) - Diagnostic.register(HeatContentLayer) - Diagnostic.register(HeatContent) - Diagnostic.register(RegionMean) - Diagnostic.register(Rotation) - def clean(self): Log.info('Removing scratch folder...') if os.path.exists(self.config.scratch_dir): diff --git a/earthdiagnostics/utils.py b/earthdiagnostics/utils.py index bb39bf42..d4aceeee 100644 --- a/earthdiagnostics/utils.py +++ b/earthdiagnostics/utils.py @@ -231,12 +231,13 @@ class Utils(object): hash_destiny = None hash_original = Utils.get_file_hash(source) - retrials = 5 + retrials = 3 while hash_original != hash_destiny: if retrials == 0: raise Exception('Can not move {0} to {1}'.format(source, destiny)) shutil.copyfile(source, destiny) hash_destiny = Utils.get_file_hash(destiny) + retrials -= 1 @staticmethod def move_file(source, destiny): diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index ebeea233..8e413fa3 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -5,21 +5,46 @@ import operator from bscearth.utils.log import Log from concurrent.futures import ThreadPoolExecutor -from datafile import StorageStatus, LocalStatus -from diagnostic import DiagnosticStatus -from utils import Utils, TempFile +from earthdiagnostics.datafile import StorageStatus, LocalStatus +from earthdiagnostics.diagnostic import DiagnosticStatus, Diagnostic, DiagnosticOptionError +from earthdiagnostics.utils import Utils, TempFile import threading +from earthdiagnostics.ocean import * +from earthdiagnostics.general import * +from earthdiagnostics.statistics import * + class WorkManager(object): - def __init__(self, config, job_list, data_manager): - self.jobs = job_list + def __init__(self, config, data_manager): + self.jobs = None self.config = config self.time = {} self.had_errors = False self.data_manager = data_manager + def prepare_job_list(self): + self._register_diagnostics() + list_jobs = list() + for fulldiag in self.config.get_commands(): + Log.info("Adding {0} to diagnostic list", fulldiag) + diag_options = fulldiag.split(',') + + diag_class = Diagnostic.get_diagnostic(diag_options[0]) + if diag_class: + try: + for job in diag_class.generate_jobs(self, diag_options): + list_jobs.append(job) + continue + except DiagnosticOptionError as ex: + Log.error('Can not configure diagnostic {0}: {1}', diag_options[0], ex) + self.had_errors = True + else: + Log.error('{0} is not an available diagnostic', diag_options[0]) + self.had_errors = True + self.jobs = list_jobs + def run(self): time = datetime.datetime.now() Log.info("Starting to compute at {0}", time) @@ -36,6 +61,8 @@ class WorkManager(object): job.request_data() job.declare_data_generated() job.subscribe(self, self._job_status_changed) + for subjob in job.subjobs: + subjob.subscribe(self, self._job_status_changed) job.check_is_ready() for file_object in self.data_manager.requested_files.values(): @@ -71,6 +98,7 @@ class WorkManager(object): self.downloader.submit(file_object.download) return if file_object.upload_required(): + Log.user_warning('Adding {0} Uploading', file_object) self.uploader.submit(file_object.upload) return self.check_completion() @@ -93,7 +121,6 @@ class WorkManager(object): return if request.download_required(): return - self.lock.release() return True @@ -135,7 +162,6 @@ class WorkManager(object): job.message = str(ex) Log.error('Job {0} failed: {1}', job, ex) job.status = DiagnosticStatus.FAILED - raise return False job.consumed_time = datetime.datetime.now() - time @@ -145,4 +171,48 @@ class WorkManager(object): count = 0 failed_jobs = list() + @staticmethod + def _register_diagnostics(): + WorkManager._register_ocean_diagnostics() + WorkManager._register_general_diagnostics() + WorkManager._register_stats_diagnostics() + + @staticmethod + def _register_stats_diagnostics(): + Diagnostic.register(MonthlyPercentile) + Diagnostic.register(ClimatologicalPercentile) + + @staticmethod + def _register_general_diagnostics(): + Diagnostic.register(DailyMean) + Diagnostic.register(MonthlyMean) + Diagnostic.register(YearlyMean) + Diagnostic.register(Rewrite) + Diagnostic.register(Relink) + Diagnostic.register(RelinkAll) + Diagnostic.register(Scale) + Diagnostic.register(Attribute) + + @staticmethod + def _register_ocean_diagnostics(): + Diagnostic.register(MixedLayerSaltContent) + Diagnostic.register(Siasiesiv) + Diagnostic.register(VerticalMean) + Diagnostic.register(VerticalMeanMeters) + Diagnostic.register(Interpolate) + Diagnostic.register(InterpolateCDO) + Diagnostic.register(Moc) + Diagnostic.register(AreaMoc) + Diagnostic.register(MaxMoc) + Diagnostic.register(Psi) + Diagnostic.register(Gyres) + Diagnostic.register(ConvectionSites) + Diagnostic.register(CutSection) + Diagnostic.register(AverageSection) + Diagnostic.register(MixedLayerHeatContent) + Diagnostic.register(HeatContentLayer) + Diagnostic.register(HeatContent) + Diagnostic.register(RegionMean) + Diagnostic.register(Rotation) + -- GitLab From 4a15e0c7a52039c56a8c967be428dba310129fc9 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 29 Mar 2017 12:50:27 +0200 Subject: [PATCH 18/82] Fixed bug in MaxMoc --- earthdiagnostics/datafile.py | 7 +++++-- earthdiagnostics/ocean/maxmoc.py | 23 ++++++++++++----------- earthdiagnostics/utils.py | 2 +- earthdiagnostics/work_manager.py | 1 - 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index 8828bc64..b41f1ca5 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -148,8 +148,11 @@ class DataFile(Publisher): self.storage_status = StorageStatus.FAILED return - Log.result('File {0} uploaded!', self.remote_file) - self.create_link() + Log.info('File {0} uploaded!', self.remote_file) + try: + self.create_link() + except Exception as ex: + Log.warning('Link for file {0} can not be created: {1}', self.remote_file, ex) self.storage_status = StorageStatus.READY def set_local_file(self, local_file, diagnostic=None, rename_var=''): diff --git a/earthdiagnostics/ocean/maxmoc.py b/earthdiagnostics/ocean/maxmoc.py index 6906bf33..ab905b15 100644 --- a/earthdiagnostics/ocean/maxmoc.py +++ b/earthdiagnostics/ocean/maxmoc.py @@ -7,7 +7,7 @@ from earthdiagnostics.constants import Basins from earthdiagnostics.box import Box from earthdiagnostics.diagnostic import Diagnostic, DiagnosticBasinOption, DiagnosticFloatOption from earthdiagnostics.frequency import Frequencies -from earthdiagnostics.utils import Utils +from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.modelingrealm import ModelingRealms from earthdiagnostics.variable_type import VariableType @@ -120,7 +120,8 @@ class MaxMoc(Diagnostic): """ nco = Utils.nco - temp = self.data_manager.get_year(ModelingRealms.ocean, 'vsftmyz', self.startdate, self.member, self.year) + temp = TempFile.get() + Utils.copy_file(self.variable_file.local_file, temp) handler = Utils.openCdf(temp) if 'i' in handler.dimensions: @@ -150,7 +151,6 @@ class MaxMoc(Diagnostic): Log.info('Computing year {0}', str(self.year)) moc = handler.variables['vsftmyz'][:, lev_inds, lat_inds, basin_index] handler.close() - os.remove(temp) moc = np.mean(moc, 0) @@ -171,7 +171,7 @@ class MaxMoc(Diagnostic): Log.info('Maximum {0} Sv, latitude {1} depth {2} m', maximum, max_lat, max_lev) Log.info('Minimum {0} Sv, latitude {1} depth {2} m', minimum, min_lat, min_lev) - handler = self._create_output_file(temp) + handler, temp = self._create_output_file() var = handler.createVariable('vsftmyzmax', float, ('time',)) var.long_name = 'Maximum_Overturning' var.units = 'Sverdrup' @@ -181,7 +181,7 @@ class MaxMoc(Diagnostic): handler.close() self.results['vsftmyzmax'].set_local_file(temp) - handler = self._create_output_file(temp) + handler, temp = self._create_output_file() var = handler.createVariable('vsftmyzmaxlat', float, ('time',)) var.long_name = 'Latitude_of_Maximum_Overturning' var.units = 'Degrees' @@ -191,7 +191,7 @@ class MaxMoc(Diagnostic): handler.close() self.results['vsftmyzmaxlat'].set_local_file(temp) - handler = self._create_output_file(temp) + handler, temp = self._create_output_file() var = handler.createVariable('vsftmyzmaxlev', float, ('time',)) var.long_name = 'Depth_of_Maximum_Overturning' var.units = 'Meters' @@ -201,7 +201,7 @@ class MaxMoc(Diagnostic): handler.close() self.results['vsftmyzmaxlev'].set_local_file(temp) - handler = self._create_output_file(temp) + handler, temp = self._create_output_file() var = handler.createVariable('vsftmyzmin', float, ('time',)) var.long_name = 'Minimum_Overturning' var.units = 'Sverdrup' @@ -211,7 +211,7 @@ class MaxMoc(Diagnostic): handler.close() self.results['vsftmyzmin'].set_local_file(temp) - handler = self._create_output_file(temp) + handler, temp = self._create_output_file() var = handler.createVariable('vsftmyzminlat', float, ('time',)) var.long_name = 'Latitude_of_Minimum_Overturning' var.units = 'Degrees' @@ -221,7 +221,7 @@ class MaxMoc(Diagnostic): handler.close() self.results['vsftmyzminlat'].set_local_file(temp) - handler = self._create_output_file(temp) + handler, temp = self._create_output_file() var = handler.createVariable('vsftmyzminlev', float, ('time',)) var.long_name = 'Depth_of_Minimum_Overturning' var.units = 'Meters' @@ -231,11 +231,12 @@ class MaxMoc(Diagnostic): handler.close() self.results['vsftmyzminlev'].set_local_file(temp) - def _create_output_file(self, temp): + def _create_output_file(self): + temp = TempFile.get() handler = netCDF4.Dataset(temp, 'w') handler.createDimension('time') time = handler.createVariable('time', 'i2', ('time',)) time.calendar = 'gregorian' time.units = 'days since January 1, {0}'.format(self.year) - return handler + return handler, temp diff --git a/earthdiagnostics/utils.py b/earthdiagnostics/utils.py index d4aceeee..4dc9abfd 100644 --- a/earthdiagnostics/utils.py +++ b/earthdiagnostics/utils.py @@ -234,7 +234,7 @@ class Utils(object): retrials = 3 while hash_original != hash_destiny: if retrials == 0: - raise Exception('Can not move {0} to {1}'.format(source, destiny)) + raise Exception('Can not copy {0} to {1}'.format(source, destiny)) shutil.copyfile(source, destiny) hash_destiny = Utils.get_file_hash(destiny) retrials -= 1 diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index 8e413fa3..439db356 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -98,7 +98,6 @@ class WorkManager(object): self.downloader.submit(file_object.download) return if file_object.upload_required(): - Log.user_warning('Adding {0} Uploading', file_object) self.uploader.submit(file_object.upload) return self.check_completion() -- GitLab From bf5c8493ef23220da99f56ac4e4e15f0e6b520b5 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 29 Mar 2017 15:14:38 +0200 Subject: [PATCH 19/82] Sqitching to auto-eceearth --- diags.conf | 2 +- earthdiagnostics/constants.py | 2 ++ earthdiagnostics/earthdiags.py | 2 +- earthdiagnostics/ocean/convectionsites.py | 2 -- earthdiagnostics/ocean/gyres.py | 20 ++++++++++++-------- earthdiagnostics/ocean/psi.py | 21 +++++++++++++-------- 6 files changed, 29 insertions(+), 20 deletions(-) diff --git a/diags.conf b/diags.conf index 2832a73a..a2a871ce 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = moc area_moc max_moc +DIAGS = gyres # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. diff --git a/earthdiagnostics/constants.py b/earthdiagnostics/constants.py index 971e36a4..31e160ff 100644 --- a/earthdiagnostics/constants.py +++ b/earthdiagnostics/constants.py @@ -240,6 +240,8 @@ class Models(object): """ EC-Earth 3.1 ORCA0.25 L75 """ ECEARTH_3_2_O1L75 = 'Ec3.2_O1L75' """ EC-Earth 3.2 ORCA1 L75 """ + ECEARTH_3_2_O25L75 = 'Ec3.2_O25L75' + """ EC-Earth 3.2 ORCA0.25 L75 """ NEMO_3_2_O1L42 = 'N3.2_O1L42' """ NEMO 3.2 ORCA1 L42 """ diff --git a/earthdiagnostics/earthdiags.py b/earthdiagnostics/earthdiags.py index 18b63f10..30474739 100755 --- a/earthdiagnostics/earthdiags.py +++ b/earthdiagnostics/earthdiags.py @@ -276,7 +276,7 @@ class EarthDiags(object): Log.info('File {0} already exists', destiny) return True - Log.info('Creating file {0}', destiny) + Log.info('Copying file {0}', destiny) shutil.copy(source, destiny) Log.info('File {0} ready', destiny) Utils.rename_variables(destiny, self.dic_variables, False, True) diff --git a/earthdiagnostics/ocean/convectionsites.py b/earthdiagnostics/ocean/convectionsites.py index e2bcddf6..081a7739 100644 --- a/earthdiagnostics/ocean/convectionsites.py +++ b/earthdiagnostics/ocean/convectionsites.py @@ -38,8 +38,6 @@ class ConvectionSites(Diagnostic): self.member = member self.chunk = chunk self.model_version = model_version - self.required_vars = ['vsftbarot'] - self.generated_vars = ['gyres'] self.mlotst_handler = None def __str__(self): diff --git a/earthdiagnostics/ocean/gyres.py b/earthdiagnostics/ocean/gyres.py index adecfb0c..7008f833 100644 --- a/earthdiagnostics/ocean/gyres.py +++ b/earthdiagnostics/ocean/gyres.py @@ -39,8 +39,6 @@ class Gyres(Diagnostic): self.member = member self.chunk = chunk self.model_version = model_version - self.required_vars = ['vsftbarot'] - self.generated_vars = ['gyres'] self.var_vsftbarot = None def __eq__(self, other): @@ -64,10 +62,17 @@ class Gyres(Diagnostic): if len(options) > 1: raise Exception('The gyres diagnostic has no options') job_list = list() + model_version = diags.config.experiment.model_version for startdate, member, chunk in diags.config.experiment.get_chunk_list(): - job_list.append(Gyres(diags.data_manager, startdate, member, chunk, diags.model_version)) + job_list.append(Gyres(diags.data_manager, startdate, member, chunk, model_version)) return job_list + def request_data(self): + self.vsftbarot = self.request_chunk(ModelingRealms.ocean, 'vsftbarot', self.startdate, self.member, self.chunk) + + def declare_data_generated(self): + self.gyre = self.declare_chunk(ModelingRealms.ocean, 'gyre', self.startdate, self.member, self.chunk) + # noinspection PyPep8Naming def compute(self): """ @@ -86,15 +91,14 @@ class Gyres(Diagnostic): subtropInd = [320, 30, 110, 180] ACC = [1, 361, 1, 65] - elif self in [Models.ECEARTH_3_0_O25L46, Models.ECEARTH_3_0_O25L75, - Models.GLORYS2_V1_O25L75]: + elif self.model_version in [Models.ECEARTH_3_0_O25L46, Models.ECEARTH_3_0_O25L75, Models.GLORYS2_V1_O25L75, + Models.ECEARTH_3_2_O1L75, Models.ECEARTH_3_2_O25L75]: raise Exception("Option gyres not available yet for {0}".format(self.model_version)) else: raise Exception("Input grid {0} not recognized".format(self.model_version)) output = TempFile.get() - vsftbarot_file = self.data_manager.get_file(ModelingRealms.ocean, 'vsftbarot', self.startdate, - self.member, self.chunk) + vsftbarot_file = self.vsftbarot.local_file handler_original = Utils.openCdf(vsftbarot_file) self.var_vsftbarot = handler_original.variables['vsftbarot'] @@ -145,7 +149,7 @@ class Gyres(Diagnostic): handler.close() handler_original.close() - self.send_file(output, ModelingRealms.ocean, 'gyre', self.startdate, self.member, self.chunk) + self.gyre.set_file_ready(output) Log.info('Finished gyres for startdate {0}, member {1}, chunk {2}', self.startdate, self.member, self.chunk) def _gyre(self, site, invert=False): diff --git a/earthdiagnostics/ocean/psi.py b/earthdiagnostics/ocean/psi.py index 072bfb87..fd1ee553 100644 --- a/earthdiagnostics/ocean/psi.py +++ b/earthdiagnostics/ocean/psi.py @@ -28,13 +28,13 @@ class Psi(Diagnostic): alias = 'psi' "Diagnostic alias for the configuration file" + vsftbarot = 'vsftbarot' + def __init__(self, data_manager, startdate, member, chunk): Diagnostic.__init__(self, data_manager) self.startdate = startdate self.member = member self.chunk = chunk - self.required_vars = ['vo', 'uo'] - self.generated_vars = ['vsftbarot'] def __eq__(self, other): return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk @@ -60,14 +60,19 @@ class Psi(Diagnostic): job_list.append(Psi(diags.data_manager, startdate, member, chunk)) return job_list + def request_data(self): + self.uo = self.request_chunk(ModelingRealms.ocean, 'uo', self.startdate, self.member, self.chunk) + self.vo = self.request_chunk(ModelingRealms.ocean, 'vo', self.startdate, self.member, self.chunk) + + def declare_data_generated(self): + self.psi = self.declare_chunk(ModelingRealms.ocean, Psi.vsftbarot, self.startdate, self.member, self.chunk) + def compute(self): """ Runs the diagnostic """ temp = TempFile.get() - input_file_u = self.data_manager.get_file(ModelingRealms.ocean, 'uo', self.startdate, self.member, self.chunk) - input_file_v = self.data_manager.get_file(ModelingRealms.ocean, 'vo', self.startdate, self.member, self.chunk) - cdftools.run('cdfpsi', input=[input_file_u, input_file_v], output=temp, options='-mean -mask') - Utils.rename_variable(temp, 'sobarstf', 'vsftbarot') - Utils.setminmax(temp, 'vsftbarot') - self.send_file(temp, ModelingRealms.ocean, 'vsftbarot', self.startdate, self.member, self.chunk) + cdftools.run('cdfpsi', input=[self.uo.local_file, self.vo.local_file], output=temp, options='-mean -mask') + Utils.rename_variable(temp, 'sobarstf', Psi.vsftbarot) + Utils.setminmax(temp, Psi.vsftbarot) + self.psi.set_local_file(temp) -- GitLab From ec96e7aeb85a808d8d49dbb10719e34ad0f0d573 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 29 Mar 2017 17:06:43 +0200 Subject: [PATCH 20/82] Applied improvements on interpcdo --- diags.conf | 6 +- earthdiagnostics/ocean/averagesection.py | 15 ++-- earthdiagnostics/ocean/interpolatecdo.py | 104 ++++++++++++++++++++--- 3 files changed, 104 insertions(+), 21 deletions(-) diff --git a/diags.conf b/diags.conf index a2a871ce..32d56eb6 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = gyres +DIAGS = avgsection,atmos,tas,190,220,-90,90 # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. @@ -115,8 +115,8 @@ UOHC = ohc,glob,0,1,17 OHC_SPECIFIED_LAYER = ohclayer,0,300 ohclayer,300,800 3DTEMP = interp,thetao 3DSAL = interp,so -TSEC_AVE190-220E =avgsection,thetao,190,220,-90,90 -SSEC_AVE190-220E =avgsection,so,190,220,-90,90 +TSEC_AVE190-220E =avgsection,ocean,thetao,190,220,-90,90,regular +SSEC_AVE190-220E =avgsection,ocean,so,190,220,-90,90,regular VERT_SSECTIONS = cutsection,so,Z,0 cutsection,so,Z,45 cutsection,so,Z,-45 cutsection,so,M,-30 cutsection,so,M,180 cutsection,so,M,80 VERT_TSECTIONS = cutsection,thetao,Z,0 cutsection,thetao,Z,45 cutsection,thetao,Z,-45 cutsection,thetao,M,-30 cutsection,thetao,M,180 cutsection,thetao,M,80 SIASIESIV = siasiesiv,glob diff --git a/earthdiagnostics/ocean/averagesection.py b/earthdiagnostics/ocean/averagesection.py index 96c1162f..e558395e 100644 --- a/earthdiagnostics/ocean/averagesection.py +++ b/earthdiagnostics/ocean/averagesection.py @@ -1,8 +1,7 @@ # coding=utf-8 import os from earthdiagnostics.box import Box -from earthdiagnostics.diagnostic import Diagnostic, DiagnosticIntOption, DiagnosticDomainOption, \ - DiagnosticVariableOption +from earthdiagnostics.diagnostic import * from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.modelingrealm import ModelingRealm, ModelingRealms @@ -37,7 +36,7 @@ class AverageSection(Diagnostic): alias = 'avgsection' "Diagnostic alias for the configuration file" - def __init__(self, data_manager, startdate, member, chunk, domain, variable, box): + def __init__(self, data_manager, startdate, member, chunk, domain, variable, box, grid): Diagnostic.__init__(self, data_manager) self.startdate = startdate self.member = member @@ -45,6 +44,7 @@ class AverageSection(Diagnostic): self.variable = variable self.domain = domain self.box = box + self.grid = grid def __eq__(self, other): return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ @@ -65,12 +65,13 @@ class AverageSection(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticVariableOption('variable'), + options_available = (DiagnosticDomainOption('domain'), + DiagnosticVariableOption('variable'), DiagnosticIntOption('min_lon'), DiagnosticIntOption('max_lon'), DiagnosticIntOption('min_lat'), DiagnosticIntOption('max_lat'), - DiagnosticDomainOption('domain', ModelingRealms.ocean)) + DiagnosticOption('grid', '')) options = cls.process_options(options, options_available) box = Box() box.min_lon = options['min_lon'] @@ -80,7 +81,7 @@ class AverageSection(Diagnostic): job_list = list() for startdate, member, chunk in diags.config.experiment.get_chunk_list(): job_list.append(AverageSection(diags.data_manager, startdate, member, chunk, - options['domain'], options['variable'], box)) + options['domain'], options['variable'], box, options['grid'])) return job_list def compute(self): @@ -89,7 +90,7 @@ class AverageSection(Diagnostic): """ temp = TempFile.get() variable_file = self.data_manager.get_file(self.domain, self.variable, self.startdate, self.member, self.chunk, - grid='regular') + grid=self.grid) Utils.cdo.zonmean(input='-sellonlatbox,{0},{1},{2},{3} {4}'.format(self.box.min_lon, self.box.max_lon, self.box.min_lat, self.box.max_lat, variable_file), diff --git a/earthdiagnostics/ocean/interpolatecdo.py b/earthdiagnostics/ocean/interpolatecdo.py index bff647ad..d46d3615 100644 --- a/earthdiagnostics/ocean/interpolatecdo.py +++ b/earthdiagnostics/ocean/interpolatecdo.py @@ -1,11 +1,13 @@ # coding=utf-8 +from bscearth.utils.log import Log + from earthdiagnostics.constants import Basins -from earthdiagnostics.diagnostic import Diagnostic, DiagnosticOption, DiagnosticDomainOption, DiagnosticBoolOption, \ - DiagnosticVariableOption +from earthdiagnostics.diagnostic import * from earthdiagnostics.utils import Utils, TempFile import numpy as np from earthdiagnostics.modelingrealm import ModelingRealm, ModelingRealms +import numpy as np class InterpolateCDO(Diagnostic): @@ -36,8 +38,15 @@ class InterpolateCDO(Diagnostic): alias = 'interpcdo' "Diagnostic alias for the configuration file" + BILINEAR = 'bilinear' + BICUBIC = 'bicubic' + CONSERVATIVE = 'conservative' + CONSERVATIVE2 = 'conservative2' + + METHODS = [BILINEAR, BICUBIC, CONSERVATIVE, CONSERVATIVE2] + def __init__(self, data_manager, startdate, member, chunk, domain, variable, target_grid, model_version, - mask_oceans, original_grid): + mask_oceans, original_grid, weights): Diagnostic.__init__(self, data_manager) self.startdate = startdate self.member = member @@ -51,6 +60,7 @@ class InterpolateCDO(Diagnostic): self.grid = target_grid self.mask_oceans = mask_oceans self.original_grid = original_grid + self.weights = weights def __eq__(self, other): return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ @@ -77,18 +87,81 @@ class InterpolateCDO(Diagnostic): options_available = (DiagnosticVariableOption('variable'), DiagnosticOption('target_grid', diags.config.experiment.atmos_grid.lower()), DiagnosticDomainOption('domain', ModelingRealms.ocean), + DiagnosticChoiceOption('method', InterpolateCDO.METHODS, InterpolateCDO.BILINEAR), DiagnosticBoolOption('mask_oceans', True), - DiagnosticOption('original_grid')) + DiagnosticOption('original_grid', '')) options = cls.process_options(options, options_available) target_grid = cls._translate_ifs_grids_to_cdo_names(options['target_grid']) job_list = list() + weights = TempFile.get() + method = options['method'].lower() + temp = cls.get_sample_grid_file() + + if method == InterpolateCDO.BILINEAR: + Utils.cdo.genbil(target_grid, input=temp, output=weights) + elif method == InterpolateCDO.BICUBIC: + Utils.cdo.genbic(target_grid, input=temp, output=weights) + elif method == InterpolateCDO.CONSERVATIVE: + Utils.cdo.gencon(target_grid, input=temp, output=weights) + elif method == InterpolateCDO.CONSERVATIVE2: + Utils.cdo.gencon2(target_grid, input=temp, output=weights) + + target_grid = '{0}_{1}'.format(target_grid, method) + for startdate, member, chunk in diags.config.experiment.get_chunk_list(): job_list.append(InterpolateCDO(diags.data_manager, startdate, member, chunk, options['domain'], options['variable'], target_grid, diags.config.experiment.model_version, options['mask_oceans'], - options['original_grid'])) + options['original_grid'], weights)) return job_list + @classmethod + def get_sample_grid_file(cls): + temp = TempFile.get() + Utils.nco.ncks(input='mask.nc', output=temp, options='-O -v tmask,lat,lon,gphif,glamf') + handler = Utils.openCdf(temp) + lon = handler.variables['lon'] + lon.units = "degrees_east" + lon.long_name = "Longitude" + lon.nav_model = "Default grid" + lon.standard_name = "longitude" + lon.short_name = "lon" + lon.bounds = 'lon_bnds' + + lat = handler.variables['lat'] + lat.units = "degrees_north" + lat.long_name = "Latitude" + lat.nav_model = "Default grid" + lat.standard_name = "latitude" + lat.short_name = "lat" + lat.bounds = 'lat_bnds' + + handler.createDimension('bounds', 4) + + lon_bnds = handler.createVariable('lon_bnds', lon.datatype, ('j', 'i', 'bounds')) + corner_lat = handler.variables['glamf'][0, ...] + lon_bnds[:, :, 0] = corner_lat + lon_bnds[:, :, 1] = np.roll(corner_lat, 1, 0) + lon_bnds[:, :, 2] = np.roll(corner_lat, -1, 1) + lon_bnds[:, :, 3] = np.roll(lon_bnds[:, :, 1], -1, 1) + + lat_bnds = handler.createVariable('lat_bnds', lat.datatype, ('j', 'i', 'bounds')) + corner_lat = handler.variables['gphif'][0, ...] + lat_bnds[:, :, 0] = corner_lat + lat_bnds[:, :, 1] = np.roll(corner_lat, 1, 0) + lat_bnds[:, :, 2] = np.roll(corner_lat, 1, 1) + lat_bnds[:, :, 3] = np.roll(lat_bnds[:, :, 1], 1, 1) + lat_bnds[0, :, 1] = lat_bnds[1, 0, 1] - 1 + lat_bnds[0, :, 3] = lat_bnds[1, 0, 3] - 1 + + tmask = handler.variables['tmask'] + tmask.coordinates = 'time lev lat lon' + + handler.close() + + Utils.nco.ncks(input=temp, output=temp, options='-O -x -v gphif,glamf') + return temp + @classmethod def _translate_ifs_grids_to_cdo_names(cls, target_grid): if target_grid.upper().startswith('T159L'): @@ -105,19 +178,28 @@ class InterpolateCDO(Diagnostic): """ variable_file = self.data_manager.get_file(self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.original_grid) + handler = Utils.openCdf(variable_file) + var = handler.variables[self.variable] + + coordinates = list() + for dim in var.dimensions: + if dim == 'i': + coordinates.append('lat') + elif dim == 'j': + coordinates.append('lon') + else: + coordinates.append(dim) + var.coordinates = ' '.join(coordinates) if self.mask_oceans: - handler = Utils.openCdf(variable_file) - var = handler.variables[self.variable] + mask = Utils.get_mask(Basins.Global).astype(float) mask[mask == 0] = np.nan var[:] = mask * var[:] - handler.close() + handler.close() - cdo = Utils.cdo temp = TempFile.get() - cdo.remapbil(self.grid, input=variable_file, output=temp) - Utils.setminmax(temp, self.variable) + Utils.cdo.remap(','.join((self.grid.split('_')[0], self.weights)), input=variable_file, output=temp) self.send_file(temp, self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.grid) -- GitLab From 335ab94ccaf9cbd39a3d9297afd2a60e73357af8 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 30 Mar 2017 09:27:29 +0200 Subject: [PATCH 21/82] Adapted averagesection and interpolatecdo --- diags.conf | 2 +- earthdiagnostics/ocean/averagesection.py | 14 +++++++++---- earthdiagnostics/ocean/interpolatecdo.py | 25 +++++++++++++++++------- 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/diags.conf b/diags.conf index 32d56eb6..fc976430 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = avgsection,atmos,tas,190,220,-90,90 +DIAGS = interpcdo,ocean,tos,t170grid avgsection,ocean,tos,190,220,-90,90,t170grid # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. diff --git a/earthdiagnostics/ocean/averagesection.py b/earthdiagnostics/ocean/averagesection.py index e558395e..544e2f73 100644 --- a/earthdiagnostics/ocean/averagesection.py +++ b/earthdiagnostics/ocean/averagesection.py @@ -84,17 +84,23 @@ class AverageSection(Diagnostic): options['domain'], options['variable'], box, options['grid'])) return job_list + def request_data(self): + self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + grid=self.grid) + + def declare_data_generated(self): + self.mean = self.declare_chunk(self.domain, self.variable + 'mean', self.startdate, self.member, self.chunk, + box=self.box, grid=self.grid) + def compute(self): """ Runs the diagnostic """ temp = TempFile.get() - variable_file = self.data_manager.get_file(self.domain, self.variable, self.startdate, self.member, self.chunk, - grid=self.grid) + variable_file = self.variable_file.local_file Utils.cdo.zonmean(input='-sellonlatbox,{0},{1},{2},{3} {4}'.format(self.box.min_lon, self.box.max_lon, self.box.min_lat, self.box.max_lat, variable_file), output=temp) os.remove(variable_file) - self.send_file(temp, self.domain, self.variable + 'mean', self.startdate, self.member, self.chunk, - box=self.box, grid='regular') + self.mean.set_local_file(temp, rename_var='tos') \ No newline at end of file diff --git a/earthdiagnostics/ocean/interpolatecdo.py b/earthdiagnostics/ocean/interpolatecdo.py index d46d3615..d298aa51 100644 --- a/earthdiagnostics/ocean/interpolatecdo.py +++ b/earthdiagnostics/ocean/interpolatecdo.py @@ -84,9 +84,9 @@ class InterpolateCDO(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticVariableOption('variable'), + options_available = (DiagnosticDomainOption('domain', ModelingRealms.ocean), + DiagnosticVariableOption('variable'), DiagnosticOption('target_grid', diags.config.experiment.atmos_grid.lower()), - DiagnosticDomainOption('domain', ModelingRealms.ocean), DiagnosticChoiceOption('method', InterpolateCDO.METHODS, InterpolateCDO.BILINEAR), DiagnosticBoolOption('mask_oceans', True), DiagnosticOption('original_grid', '')) @@ -97,6 +97,10 @@ class InterpolateCDO(Diagnostic): method = options['method'].lower() temp = cls.get_sample_grid_file() + if not target_grid: + raise Exception('Target grid not defined for interpcdo {0}:{1}'.format(options['domain'], + options['variable'])) + if method == InterpolateCDO.BILINEAR: Utils.cdo.genbil(target_grid, input=temp, output=weights) elif method == InterpolateCDO.BICUBIC: @@ -106,8 +110,6 @@ class InterpolateCDO(Diagnostic): elif method == InterpolateCDO.CONSERVATIVE2: Utils.cdo.gencon2(target_grid, input=temp, output=weights) - target_grid = '{0}_{1}'.format(target_grid, method) - for startdate, member, chunk in diags.config.experiment.get_chunk_list(): job_list.append(InterpolateCDO(diags.data_manager, startdate, member, chunk, options['domain'], options['variable'], target_grid, @@ -172,12 +174,20 @@ class InterpolateCDO(Diagnostic): target_grid = 't340grid' return target_grid + def request_data(self): + self.original = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + grid=self.original_grid) + + def declare_data_generated(self): + self.regridded = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + grid=self.grid) + def compute(self): """ Runs the diagnostic """ - variable_file = self.data_manager.get_file(self.domain, self.variable, self.startdate, self.member, self.chunk, - grid=self.original_grid) + variable_file = TempFile.get() + Utils.copy_file(self.original.local_file, variable_file) handler = Utils.openCdf(variable_file) var = handler.variables[self.variable] @@ -200,7 +210,8 @@ class InterpolateCDO(Diagnostic): temp = TempFile.get() Utils.cdo.remap(','.join((self.grid.split('_')[0], self.weights)), input=variable_file, output=temp) - self.send_file(temp, self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.grid) + self.regridded.set_local_file(temp) + -- GitLab From ff28693d8adeeddfd258817a95a1fca6d9387f3f Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 30 Mar 2017 09:31:05 +0200 Subject: [PATCH 22/82] Convectionsites updated --- earthdiagnostics/ocean/convectionsites.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/earthdiagnostics/ocean/convectionsites.py b/earthdiagnostics/ocean/convectionsites.py index 081a7739..74271682 100644 --- a/earthdiagnostics/ocean/convectionsites.py +++ b/earthdiagnostics/ocean/convectionsites.py @@ -65,6 +65,12 @@ class ConvectionSites(Diagnostic): job_list.append(ConvectionSites(diags.data_manager, startdate, member, chunk, diags.model_version)) return job_list + def request_data(self): + self.mixed_layer = self.request_chunk(ModelingRealms.ocean, 'mlotst', self.startdate, self.member, self.chunk) + + def declare_data_generated(self): + self.sites = self.declare_chunk(ModelingRealms.ocean, 'site', self.startdate, self.member, self.chunk) + def compute(self): """ Runs the diagnostic @@ -78,13 +84,13 @@ class ConvectionSites(Diagnostic): wedell = [225, 280, 1, 50] elif self.model_version in [Models.ECEARTH_3_0_O25L46, Models.ECEARTH_3_0_O25L75, + Models.ECEARTH_3_2_O25L75, Models.ECEARTH_3_2_O1L75, Models.GLORYS2_V1_O25L75]: raise Exception("Option convection not available yet for {0}".format(self.model_version)) else: raise Exception("Input grid {0} not recognized".format(self.model_version)) - mlotst_file = self.data_manager.get_file(ModelingRealms.ocean, 'mlotst', self.startdate, self.member, - self.chunk) + mlotst_file = self.mixed_layer.local_file output = TempFile.get() self.mlotst_handler = Utils.openCdf(mlotst_file) @@ -113,7 +119,7 @@ class ConvectionSites(Diagnostic): self.mlotst_handler.close() handler.close() - self.send_file(output, ModelingRealms.ocean, 'site', self.startdate, self.member, self.chunk) + self.sites.set_local_file(output) Log.info('Finished convection sites for startdate {0}, member {1}, chunk {2}', self.startdate, self.member, self.chunk) -- GitLab From 3becfa34bb71b2ab0c5a1f608fc9746355f57e35 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 30 Mar 2017 10:00:52 +0200 Subject: [PATCH 23/82] Cutsection updated --- diags.conf | 4 ++-- earthdiagnostics/ocean/cutsection.py | 25 ++++++++++++++++--------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/diags.conf b/diags.conf index fc976430..e9ef0a37 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = interpcdo,ocean,tos,t170grid avgsection,ocean,tos,190,220,-90,90,t170grid +DIAGS = VERT_SSECTIONS # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. @@ -117,7 +117,7 @@ OHC_SPECIFIED_LAYER = ohclayer,0,300 ohclayer,300,800 3DSAL = interp,so TSEC_AVE190-220E =avgsection,ocean,thetao,190,220,-90,90,regular SSEC_AVE190-220E =avgsection,ocean,so,190,220,-90,90,regular -VERT_SSECTIONS = cutsection,so,Z,0 cutsection,so,Z,45 cutsection,so,Z,-45 cutsection,so,M,-30 cutsection,so,M,180 cutsection,so,M,80 +VERT_SSECTIONS = cutsection,so,Z,0 cutsection,so,Z,45 cutsection,so,Z,-45 cutsection,so,M,-30 cutsection,so,M,80 VERT_TSECTIONS = cutsection,thetao,Z,0 cutsection,thetao,Z,45 cutsection,thetao,Z,-45 cutsection,thetao,M,-30 cutsection,thetao,M,180 cutsection,thetao,M,80 SIASIESIV = siasiesiv,glob diff --git a/earthdiagnostics/ocean/cutsection.py b/earthdiagnostics/ocean/cutsection.py index 282c0f5d..a616285b 100644 --- a/earthdiagnostics/ocean/cutsection.py +++ b/earthdiagnostics/ocean/cutsection.py @@ -50,6 +50,14 @@ class CutSection(Diagnostic): self.zonal = zonal self.value = value + self.box = Box() + if self.zonal: + self.box.max_lon = self.value + self.box.min_lon = self.value + else: + self.box.max_lat = self.value + self.box.min_lat = self.value + def __eq__(self, other): return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ self.domain == other.domain and self.variable == other.variable and self.zonal == other.zonal and \ @@ -83,6 +91,13 @@ class CutSection(Diagnostic): options['domain'], options['variable'], options['zonal'], options['value'])) return job_list + def request_data(self): + self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk) + + def declare_data_generated(self): + self.section = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + box=self.box) + def compute(self): """ Runs the diagnostic @@ -170,14 +185,6 @@ class CutSection(Diagnostic): file_var.missing_value = 1e20 handler.close() - box = Box() - if self.zonal: - box.max_lon = self.value - box.min_lon = self.value - else: - box.max_lat = self.value - box.min_lat = self.value - - self.send_file(temp, self.domain, self.variable, self.startdate, self.member, self.chunk, box=box) + self.section.set_local_file(temp) Log.info('Finished cut section for startdate {0}, member {1}, chunk {2}', self.startdate, self.member, self.chunk) -- GitLab From 24816d2b036e62ac15ff1b399cdbfd76dbf1cdaa Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 30 Mar 2017 10:07:20 +0200 Subject: [PATCH 24/82] HeatContent updated --- diags.conf | 2 +- earthdiagnostics/ocean/heatcontent.py | 37 ++++++++++++++++----------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/diags.conf b/diags.conf index e9ef0a37..e1b0431f 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = VERT_SSECTIONS +DIAGS = ohc uohc lohc mohc xohc # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. diff --git a/earthdiagnostics/ocean/heatcontent.py b/earthdiagnostics/ocean/heatcontent.py index c6755582..77495bf2 100644 --- a/earthdiagnostics/ocean/heatcontent.py +++ b/earthdiagnostics/ocean/heatcontent.py @@ -83,17 +83,32 @@ class HeatContent(Diagnostic): options['basin'], options['mixed_layer'], box)) return job_list + def request_data(self): + self.thetao = self.request_chunk(ModelingRealms.ocean, 'thetao', self.startdate, self.member, self.chunk) + if self.mxloption != 0: + self.mlotst = self.request_chunk(ModelingRealms.ocean, 'mlotst', self.startdate, self.member, self.chunk) + + def declare_data_generated(self): + if self.box.min_depth == 0: + # For cdftools, this is all levels + box_save = None + else: + box_save = self.box + + self.heatcsum = self.declare_chunk(ModelingRealms.ocean, 'heatcsum', self.startdate, self.member, self.chunk, + box=box_save, region=self.basin.fullname) + self.heatcmean = self.declare_chunk(ModelingRealms.ocean, 'heatcvmean', self.startdate, self.member, self.chunk, + box=box_save, region=self.basin.fullname) + def compute(self): """ Runs the diagnostic """ nco = Utils.nco - temperature_file = self.data_manager.get_file(ModelingRealms.ocean, 'thetao', self.startdate, - self.member, self.chunk) + temperature_file = TempFile.get() + Utils.copy_file(self.thetao.local_file, temperature_file) if self.mxloption != 0: - mlotst_file = self.data_manager.get_file(ModelingRealms.ocean, 'mlotst', self.startdate, - self.member, self.chunk) - nco.ncks(input=mlotst_file, output=temperature_file, options='-A -v mlotst') + nco.ncks(input=self.mlotst.local_file, output=temperature_file, options='-A -v mlotst') para = list() if self.box.min_depth != 0: @@ -145,15 +160,7 @@ class HeatContent(Diagnostic): results.close() - if self.box.min_depth == 0: - # For cdftools, this is all levels - box_save = None - else: - box_save = self.box - Utils.setminmax(heatcsum_temp, 'heatcsum') - self.send_file(heatcsum_temp, ModelingRealms.ocean, 'heatcsum', self.startdate, self.member, self.chunk, - box=box_save, region=self.basin.fullname) + self.heatcsum.set_local_file(heatcsum_temp) Utils.setminmax(heatcvmean_temp, 'heatcvmean') - self.send_file(heatcvmean_temp, ModelingRealms.ocean, 'heatcvmean', self.startdate, self.member, self.chunk, - box=box_save, region=self.basin.fullname) + self.heatcmean.set_local_file(heatcvmean_temp) -- GitLab From 75249dd090dfd00ae7568357beb2e442429d302b Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 30 Mar 2017 10:10:29 +0200 Subject: [PATCH 25/82] HeatContentLayer updated --- diags.conf | 2 +- earthdiagnostics/ocean/heatcontentlayer.py | 13 ++++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/diags.conf b/diags.conf index e1b0431f..c2a95be8 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = ohc uohc lohc mohc xohc +DIAGS = OHC_SPECIFIED_LAYER # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. diff --git a/earthdiagnostics/ocean/heatcontentlayer.py b/earthdiagnostics/ocean/heatcontentlayer.py index cf398972..6d48b27b 100644 --- a/earthdiagnostics/ocean/heatcontentlayer.py +++ b/earthdiagnostics/ocean/heatcontentlayer.py @@ -150,15 +150,22 @@ class HeatContentLayer(Diagnostic): weight, min_level, max_level)) return job_list + def request_data(self): + self.thetao = self.request_chunk(ModelingRealms.ocean, 'thetao',self.startdate, self.member, self.chunk) + + def declare_data_generated(self): + self.heatc = self.declare_chunk(ModelingRealms.ocean, 'heatc', self.startdate, self.member, self.chunk, + box=self.box) + def compute(self): """ Runs the diagnostic """ nco = Utils.nco + thetao_file = TempFile.get() results = TempFile.get() - thetao_file = self.data_manager.get_file(ModelingRealms.ocean, 'thetao', - self.startdate, self.member, self.chunk) + Utils.copy_file(self.thetao.local_file, thetao_file) handler = Utils.openCdf(thetao_file) heatc_sl = np.sum(handler.variables['thetao'][:, self.min_level:self.max_level, :] * self.weight, 1) @@ -175,4 +182,4 @@ class HeatContentLayer(Diagnostic): handler_results.close() Utils.setminmax(results, 'heatc') - self.send_file(results, ModelingRealms.ocean, 'heatc', self.startdate, self.member, self.chunk, box=self.box) + self.heatc.set_local_file(results) -- GitLab From 26985534edfd49c0cf77cf1eac2a8d2fbb7a71a2 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 30 Mar 2017 10:22:36 +0200 Subject: [PATCH 26/82] Interpolate updated --- earthdiagnostics/ocean/interpolate.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/earthdiagnostics/ocean/interpolate.py b/earthdiagnostics/ocean/interpolate.py index 35bbcb4c..4c77e004 100644 --- a/earthdiagnostics/ocean/interpolate.py +++ b/earthdiagnostics/ocean/interpolate.py @@ -98,12 +98,20 @@ class Interpolate(Diagnostic): diags.config.experiment.model_version, options['invert_lat'], options['original_grid'])) return job_list + def request_data(self): + self.original = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + grid=self.original_grid) + + def declare_data_generated(self): + self.regridded = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + grid=self.grid) + def compute(self): """ Runs the diagnostic """ - variable_file = self.data_manager.get_file(self.domain, self.variable, self.startdate, self.member, self.chunk, - grid=self.original_grid) + variable_file = TempFile.get() + Utils.copy_file(self.original.local_file, variable_file) Utils.rename_variables(variable_file, {'i': 'x', 'j': 'y'}, must_exist=False, rename_dimension=True) cdo = Utils.cdo nco = Utils.nco @@ -147,7 +155,7 @@ class Interpolate(Diagnostic): if not has_levels: nco.ncks(input=temp2, output=temp2, options='-O -v {0},lat,lon,time'.format(self.variable)) - self.send_file(temp2, self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.grid) + self.regridded.set_local_file(temp2) def _get_level_file(self, lev): if not self.tempTemplate: -- GitLab From f201b05c52002a0aab9937d5e506d2aff2e0db2b Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 30 Mar 2017 10:35:28 +0200 Subject: [PATCH 27/82] Mixed layer salt and heat content updated --- diags.conf | 2 +- .../ocean/mixedlayerheatcontent.py | 18 +++++++++++------- .../ocean/mixedlayersaltcontent.py | 17 +++++++++++------ 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/diags.conf b/diags.conf index c2a95be8..129e96bb 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = OHC_SPECIFIED_LAYER +DIAGS = HEAT_SAL_MXL # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. diff --git a/earthdiagnostics/ocean/mixedlayerheatcontent.py b/earthdiagnostics/ocean/mixedlayerheatcontent.py index e8800901..2d8de794 100644 --- a/earthdiagnostics/ocean/mixedlayerheatcontent.py +++ b/earthdiagnostics/ocean/mixedlayerheatcontent.py @@ -63,16 +63,20 @@ class MixedLayerHeatContent(Diagnostic): job_list.append(MixedLayerHeatContent(diags.data_manager, startdate, member, chunk)) return job_list + def request_data(self): + self.thetao = self.request_chunk(ModelingRealms.ocean, 'thetao', self.startdate, self.member, self.chunk) + self.mlotst = self.request_chunk(ModelingRealms.ocean, 'mlotst', self.startdate, self.member, self.chunk) + + def declare_data_generated(self): + self.ohcsum = self.declare_chunk(ModelingRealms.ocean, 'ohcvsumlotst', self.startdate, self.member, self.chunk) + def compute(self): """ Runs the diagnostic """ - temperature_file = self.data_manager.get_file(ModelingRealms.ocean, 'thetao', - self.startdate, self.member, self.chunk) - mlotst_file = self.data_manager.get_file(ModelingRealms.ocean, 'mlotst', - self.startdate, self.member, self.chunk) - - Utils.nco.ncks(input=mlotst_file, output=temperature_file, options='-A -v mlotst') + temperature_file = TempFile.get() + Utils.copy_file(self.thetao.local_file, temperature_file) + Utils.nco.ncks(input=self.mlotst.local_file, output=temperature_file, options='-A -v mlotst') temp = TempFile.get() cdftools.run('cdfmxlheatc', input=temperature_file, output=temp) @@ -81,4 +85,4 @@ class MixedLayerHeatContent(Diagnostic): Utils.rename_variables(temp, {'x': 'i', 'y': 'j', 'somxlheatc': 'ohcvsumlotst'}, False, True) Utils.setminmax(temp, 'ohcvsumlotst') - self.send_file(temp, ModelingRealms.ocean, 'ohcvsumlotst', self.startdate, self.member, self.chunk) + self.ohcsum.set_local_file(temp) diff --git a/earthdiagnostics/ocean/mixedlayersaltcontent.py b/earthdiagnostics/ocean/mixedlayersaltcontent.py index 0127fe64..05c0854b 100644 --- a/earthdiagnostics/ocean/mixedlayersaltcontent.py +++ b/earthdiagnostics/ocean/mixedlayersaltcontent.py @@ -61,15 +61,20 @@ class MixedLayerSaltContent(Diagnostic): job_list.append(MixedLayerSaltContent(diags.data_manager, startdate, member, chunk)) return job_list + def request_data(self): + self.so = self.request_chunk(ModelingRealms.ocean, 'so', self.startdate, self.member, self.chunk) + self.mlotst = self.request_chunk(ModelingRealms.ocean, 'mlotst', self.startdate, self.member, self.chunk) + + def declare_data_generated(self): + self.sosum = self.declare_chunk(ModelingRealms.ocean, 'scvsummlotst', self.startdate, self.member, self.chunk) + def compute(self): """ Runs the diagnostic """ - salinity_file = self.data_manager.get_file(ModelingRealms.ocean, 'so', self.startdate, self.member, self.chunk) - mlotst_file = self.data_manager.get_file(ModelingRealms.ocean, 'mlotst', - self.startdate, self.member, self.chunk) - - Utils.nco.ncks(input=mlotst_file, output=salinity_file, options='-A -v mlotst') + salinity_file = TempFile.get() + Utils.copy_file(self.so.local_file, salinity_file) + Utils.nco.ncks(input=self.mlotst.local_file, output=salinity_file, options='-A -v mlotst') temp = TempFile.get() cdftools.run('cdfmxlsaltc', input=salinity_file, output=temp) @@ -77,4 +82,4 @@ class MixedLayerSaltContent(Diagnostic): Utils.rename_variables(temp, {'x': 'i', 'y': 'j', 'somxlsaltc': 'scvsummlotst'}, False, True) Utils.setminmax(temp, 'scvsummlotst') - self.send_file(temp, ModelingRealms.ocean, 'scvsummlotst', self.startdate, self.member, self.chunk) + self.sosum.set_local_file(temp) -- GitLab From 9ef9304afb5374af4b6b9b35eeef1d7044beb5b4 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 30 Mar 2017 10:42:34 +0200 Subject: [PATCH 28/82] Region mean updated --- diags.conf | 2 +- earthdiagnostics/ocean/regionmean.py | 53 +++++++++++++++++++--------- 2 files changed, 37 insertions(+), 18 deletions(-) diff --git a/diags.conf b/diags.conf index 129e96bb..dedc0f90 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = HEAT_SAL_MXL +DIAGS = regmean,ocean,tos # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. diff --git a/earthdiagnostics/ocean/regionmean.py b/earthdiagnostics/ocean/regionmean.py index e815b12c..daa96f8a 100644 --- a/earthdiagnostics/ocean/regionmean.py +++ b/earthdiagnostics/ocean/regionmean.py @@ -51,8 +51,7 @@ class RegionMean(Diagnostic): self.basin = basin self.variance = variance self.grid = grid - self.required_vars = [variable] - self.generated_vars = [variable + 'vmean'] + self.declared = {} def __eq__(self, other): return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ @@ -95,14 +94,30 @@ class RegionMean(Diagnostic): options['save3D'], options['basin'], options['variance'], options['grid'])) return job_list + def request_data(self): + self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + grid=self.grid) + def declare_data_generated(self): + if self.box.min_depth == 0: + # To cdftools, this means all levels + box_save = None + else: + box_save = self.box + + self.declare_var('mean', False, box_save) + self.declare_var('mean', True, box_save) + + if self.variance: + self.declare_var('var', False, box_save) + self.declare_var('var', True, box_save) + def compute(self): """ Runs the diagnostic """ mean_file = TempFile.get() - variable_file = self.data_manager.get_file(self.domain, self.variable, self.startdate, self.member, self.chunk, - grid=self.grid) + variable_file = self.variable_file.local_file handler = Utils.openCdf(variable_file) self.save3d &= 'lev' in handler.dimensions @@ -119,22 +134,16 @@ class RegionMean(Diagnostic): cdftools.run('cdfmean', input=variable_file, output=mean_file, options=cdfmean_options) Utils.rename_variables(mean_file, {'gdept': 'lev', 'gdepw': 'lev'}, must_exist=False, rename_dimension=True) - if self.box.min_depth == 0: - # To cdftools, this means all levels - box_save = None - else: - box_save = self.box - - self.send_var('mean', False, box_save, mean_file) - self.send_var('mean', True, box_save, mean_file) + self.send_var('mean', False, mean_file) + self.send_var('mean', True, mean_file) if self.variance: - self.send_var('var', False, box_save, mean_file) - self.send_var('var', True, box_save, mean_file) + self.send_var('var', False, mean_file) + self.send_var('var', True, mean_file) os.remove(mean_file) - def send_var(self, var, threed, box_save, mean_file): + def send_var(self, var, threed, mean_file): if threed: if not self.save3d: return False @@ -148,6 +157,16 @@ class RegionMean(Diagnostic): temp2 = TempFile.get() Utils.nco.ncks(input=mean_file, output=temp2, options='-O -v {0},lat,lon{1}'.format(original_name, levels)) - self.send_file(temp2, ModelingRealms.ocean, final_name, self.startdate, self.member, self.chunk, - box=box_save, rename_var=original_name, region=self.basin, grid=self.grid) + self.declared[final_name].set_local_file(temp2, rename_var=original_name) + + def declare_var(self, var, threed, box_save): + if threed: + if not self.save3d: + return False + final_name = '{1}3d{0}'.format(var, self.variable) + else: + final_name = '{1}{0}'.format(var, self.variable) + + self.declared[final_name] = self.declare_chunk(ModelingRealms.ocean, final_name, self.startdate, self.member, + self.chunk, box=box_save, region=self.basin, grid=self.grid) -- GitLab From c266fc2c2a89e52c82ea46ad89c7e38f4be3009e Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 30 Mar 2017 12:39:03 +0200 Subject: [PATCH 29/82] Siasiesiv updated --- diags.conf | 2 +- earthdiagnostics/diagnostic.py | 2 +- earthdiagnostics/ocean/regionmean.py | 3 +- earthdiagnostics/ocean/siasiesiv.py | 60 ++++++++++++++-------------- 4 files changed, 34 insertions(+), 33 deletions(-) diff --git a/diags.conf b/diags.conf index dedc0f90..db366016 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = regmean,ocean,tos +DIAGS = siasiesiv # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index 746ecbaa..0e799e1d 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -265,7 +265,7 @@ class Diagnostic(Publisher): return from datafile import LocalStatus if request.local_status == LocalStatus.FAILED: - self.reason = 'Required file {0} is not available'.format(request.remote_file) + self.message = 'Required file {0} is not available'.format(request.remote_file) self.status = DiagnosticStatus.FAILED return diff --git a/earthdiagnostics/ocean/regionmean.py b/earthdiagnostics/ocean/regionmean.py index daa96f8a..59012044 100644 --- a/earthdiagnostics/ocean/regionmean.py +++ b/earthdiagnostics/ocean/regionmean.py @@ -96,7 +96,8 @@ class RegionMean(Diagnostic): def request_data(self): self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, - grid=self.grid) + grid=self.grid) + def declare_data_generated(self): if self.box.min_depth == 0: # To cdftools, this means all levels diff --git a/earthdiagnostics/ocean/siasiesiv.py b/earthdiagnostics/ocean/siasiesiv.py index f38b7c20..1dd8a604 100644 --- a/earthdiagnostics/ocean/siasiesiv.py +++ b/earthdiagnostics/ocean/siasiesiv.py @@ -52,8 +52,7 @@ class Siasiesiv(Diagnostic): self.member = member self.chunk = chunk self.mask = mask - self.required_vars = ['sit', 'sic'] - self.generated_vars = ['siextents', 'sivols', 'siareas', 'siextentn', 'sivoln', 'siarean'] + self.generated = {} def __str__(self): return 'Siasiesiv Startdate: {0} Member: {1} Chunk: {2} Basin: {3}'.format(self.startdate, self.member, @@ -86,18 +85,32 @@ class Siasiesiv(Diagnostic): return job_list + def request_data(self): + self.sit = self.request_chunk(ModelingRealms.seaIce, 'sit', self.startdate, self.member, self.chunk) + self.sic = self.request_chunk(ModelingRealms.seaIce, 'sic', self.startdate, self.member, self.chunk) + + def declare_data_generated(self): + self._declare_var('sivols') + self._declare_var('siareas') + self._declare_var('siextents') + + self._declare_var('sivoln') + self._declare_var('siarean') + self._declare_var('siextentn') + + def _declare_var(self, var_name): + self.generated[var_name] = self.declare_chunk(ModelingRealms.seaIce, var_name, self.startdate, self.member, + self.chunk, region=self.basin.fullname) def compute(self): """ Runs the diagnostic """ - sit_file = self.data_manager.get_file(ModelingRealms.seaIce, 'sit', self.startdate, self.member, self.chunk) - sit_handler = Utils.openCdf(sit_file) + sit_handler = Utils.openCdf(self.sit.local_file) sit = np.asfortranarray(sit_handler.variables['sit'][:]) timesteps = sit_handler.dimensions['time'].size sit_handler.close() - sic_file = self.data_manager.get_file(ModelingRealms.seaIce, 'sic', self.startdate, self.member, self.chunk) - sic_handler = Utils.openCdf(sic_file) + sic_handler = Utils.openCdf(self.sic.local_file) Utils.convert_units(sic_handler.variables['sic'], '1.0') sic = np.asfortranarray(sic_handler.variables['sic'][:]) sic_handler.close() @@ -111,30 +124,17 @@ class Siasiesiv(Diagnostic): except Exception as ex: print ex - self.send_file(self._extract_variable_and_rename(sit_file, result[4, :], 'sivols', '10^9 m3'), - ModelingRealms.seaIce, 'sivols', self.startdate, self.member, self.chunk, - region=self.basin.fullname) - self.send_file(self._extract_variable_and_rename(sit_file, result[5, :], 'siareas', '10^9 m2'), - ModelingRealms.seaIce, 'siareas', self.startdate, self.member, self.chunk, - region=self.basin.fullname) - self.send_file(self._extract_variable_and_rename(sit_file, result[7, :], 'siextents', '10^9 m2'), - ModelingRealms.seaIce, 'siextents', self.startdate, self.member, self.chunk, - region=self.basin.fullname) - - self.send_file(self._extract_variable_and_rename(sit_file, result[0, :], 'sivoln', '10^9 m3'), - ModelingRealms.seaIce, 'sivoln', self.startdate, self.member, self.chunk, - region=self.basin.fullname) - self.send_file(self._extract_variable_and_rename(sit_file, result[1, :], 'siarean', '10^9 m2'), - ModelingRealms.seaIce, 'siarean', self.startdate, self.member, self.chunk, - region=self.basin.fullname) - self.send_file(self._extract_variable_and_rename(sit_file, result[3, :], 'siextentn', '10^9 m2'), - ModelingRealms.seaIce, 'siextentn', self.startdate, self.member, self.chunk, - region=self.basin.fullname) - - @staticmethod - def _extract_variable_and_rename(reference_file, values, cmor_name, units): + self._extract_variable_and_rename(result[4, :], 'sivols', '10^9 m3') + self._extract_variable_and_rename(result[5, :], 'siareas', '10^9 m2') + self._extract_variable_and_rename(result[7, :], 'siextents', '10^9 m2') + + self._extract_variable_and_rename(result[0, :], 'sivoln', '10^9 m3') + self._extract_variable_and_rename(result[1, :], 'siarean', '10^9 m2') + self._extract_variable_and_rename(result[3, :], 'siextentn', '10^9 m2') + + def _extract_variable_and_rename(self, values, cmor_name, units): temp = TempFile.get() - reference_handler = Utils.openCdf(reference_file) + reference_handler = Utils.openCdf(self.sit.local_file) os.remove(temp) handler = netCDF4.Dataset(temp, 'w') @@ -150,5 +150,5 @@ class Siasiesiv(Diagnostic): new_var[:] = values new_var.valid_max = np.max(values) handler.close() - return temp + self.generated[cmor_name].set_local_file(temp) -- GitLab From f1376426dae40fd38f59aa850ee24ec8e1bb44da Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 30 Mar 2017 16:27:30 +0200 Subject: [PATCH 30/82] Vertmean and vertmeanmeters updated --- diags.conf | 2 +- earthdiagnostics/ocean/verticalmean.py | 20 ++++++++++++-------- earthdiagnostics/ocean/verticalmeanmeters.py | 20 ++++++++++++-------- 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/diags.conf b/diags.conf index db366016..7d99fa9c 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = siasiesiv +DIAGS = vertmeanmeters,so,300,5400 vertmean,so,1,5 # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. diff --git a/earthdiagnostics/ocean/verticalmean.py b/earthdiagnostics/ocean/verticalmean.py index 693aa080..7ae15195 100644 --- a/earthdiagnostics/ocean/verticalmean.py +++ b/earthdiagnostics/ocean/verticalmean.py @@ -81,15 +81,20 @@ class VerticalMean(Diagnostic): options['variable'], box)) return job_list + def request_data(self): + self.variable_file = self.request_chunk(ModelingRealms.ocean, self.variable, self.startdate, self.member, + self.chunk) + + def declare_data_generated(self): + self.results = self.declare_chunk(ModelingRealms.ocean, self.variable + 'vmean', self.startdate, self.member, + self.chunk, box=self.box) + def compute(self): """ Runs the diagnostic """ temp = TempFile.get() - variable_file = self.data_manager.get_file(ModelingRealms.ocean, self.variable, self.startdate, self.member, - self.chunk) - - handler = Utils.openCdf(variable_file) + handler = Utils.openCdf(self.variable_file.local_file) if self.box.min_depth is None: lev_min = handler.variables['lev'][0] else: @@ -101,9 +106,8 @@ class VerticalMean(Diagnostic): lev_max = self.box.max_depth handler.close() - cdftools.run('cdfvertmean', input=variable_file, output=temp, options=[self.variable, 'T', lev_min, lev_max, - '-debug']) + cdftools.run('cdfvertmean', input=self.variable_file.local_file, output=temp, + options=[self.variable, 'T', lev_min, lev_max, '-debug']) Utils.setminmax(temp, '{0}_vert_mean'.format(self.variable)) - self.send_file(temp, ModelingRealms.ocean, self.variable + 'vmean', self.startdate, self.member, self.chunk, - box=self.box, rename_var='{0}_vert_mean'.format(self.variable)) + self.results.set_local_file(temp, rename_var='{0}_vert_mean'.format(self.variable)) diff --git a/earthdiagnostics/ocean/verticalmeanmeters.py b/earthdiagnostics/ocean/verticalmeanmeters.py index 7fbe1e42..c38056f4 100644 --- a/earthdiagnostics/ocean/verticalmeanmeters.py +++ b/earthdiagnostics/ocean/verticalmeanmeters.py @@ -83,15 +83,20 @@ class VerticalMeanMeters(Diagnostic): options['domain'], options['variable'], box)) return job_list + def request_data(self): + self.variable_file = self.request_chunk(ModelingRealms.ocean, self.variable, self.startdate, self.member, + self.chunk) + + def declare_data_generated(self): + self.results = self.declare_chunk(self.domain, self.variable + 'vmean', self.startdate, self.member, + self.chunk, box=self.box) + def compute(self): """ Runs the diagnostic """ temp = TempFile.get() - variable_file = self.data_manager.get_file(self.domain, self.variable, self.startdate, self.member, - self.chunk) - - handler = Utils.openCdf(variable_file) + handler = Utils.openCdf(self.variable_file.local_file) if self.box.min_depth is None: lev_min = handler.variables['lev'][0] else: @@ -103,8 +108,7 @@ class VerticalMeanMeters(Diagnostic): lev_max = self.box.max_depth handler.close() - cdftools.run('cdfvertmean', input=variable_file, output=temp, options=[self.variable, 'T', lev_min, lev_max, - '-debug']) + cdftools.run('cdfvertmean', input=self.variable_file.local_file, output=temp, + options=[self.variable, 'T', lev_min, lev_max, '-debug']) Utils.setminmax(temp, '{0}_vert_mean'.format(self.variable)) - self.send_file(temp, self.domain, self.variable + 'vmean', self.startdate, self.member, self.chunk, - box=self.box, rename_var='{0}_vert_mean'.format(self.variable)) + self.results.set_local_file(temp, rename_var='{0}_vert_mean'.format(self.variable)) -- GitLab From 7f34b1fa9feed529b385d37d8910cf3de70b93b1 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 5 Apr 2017 11:16:12 +0200 Subject: [PATCH 31/82] added coordinates attribute --- earthdiagnostics/datafile.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index b41f1ca5..69d2246a 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -134,9 +134,10 @@ class DataFile(Publisher): original_name = self.var if self.final_name != original_name: Utils.rename_variable(self.local_file, original_name, self.final_name) + self._rename_coordinate_variables() self._correct_metadata() self._prepare_region() - self._rename_coordinate_variables() + self.add_diagnostic_history() def upload(self): @@ -166,10 +167,14 @@ class DataFile(Publisher): pass def _correct_metadata(self): - if not self.cmor_var: - return handler = Utils.openCdf(self.local_file) var_handler = handler.variables[self.final_name] + coords = set.intersection({'time', 'lev', 'lat', 'lon'}, set(handler.variables.keys())) + var_handler.coordinates = ' '.join(coords) + if not self.cmor_var: + handler.close() + return + self._fix_variable_name(var_handler) handler.modeling_realm = self.cmor_var.domain.name table = self.cmor_var.get_table(self.frequency, self.data_convention) -- GitLab From b30ef409c3828d1cdbad73f1d660ee8bfea351ea Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 9 May 2017 16:32:51 +0200 Subject: [PATCH 32/82] Partial cleaning --- earthdiagnostics/cdftools.py | 2 + earthdiagnostics/cmormanager.py | 84 ++----------------- earthdiagnostics/config.py | 1 + earthdiagnostics/datafile.py | 4 +- earthdiagnostics/datamanager.py | 124 +---------------------------- earthdiagnostics/diagnostic.py | 14 +++- earthdiagnostics/earthdiags.py | 2 +- earthdiagnostics/publisher.py | 24 +++++- earthdiagnostics/threddsmanager.py | 100 ----------------------- earthdiagnostics/utils.py | 4 + test/unit/test_averagesection.py | 96 +++++++++++----------- test/unit/test_diagnostic.py | 1 + test/unit/test_heatcontent.py | 78 +++++++++--------- 13 files changed, 140 insertions(+), 394 deletions(-) diff --git a/earthdiagnostics/cdftools.py b/earthdiagnostics/cdftools.py index 792853ba..68f800e8 100644 --- a/earthdiagnostics/cdftools.py +++ b/earthdiagnostics/cdftools.py @@ -39,6 +39,7 @@ class CDFTools(object): line.append(input_option) self._check_input(command, input, line) if options: + # noinspection PyTypeChecker if isinstance(options, basestring): options = options.split() for option in options: @@ -64,6 +65,7 @@ class CDFTools(object): @staticmethod def _check_input(command, input, line): if input: + # noinspection PyTypeChecker if isinstance(input, basestring): line.append(input) if not os.path.exists(input): diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index da31b721..6019b2f1 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -249,7 +249,7 @@ class CMORManager(DataManager): :param var: file's var :type var: var :param chunk: file's chunk - :type chunk: int + :type chunk: int|NoneType :param frequency: file's frequency :type frequency: Frequency :param grid: file's grid @@ -361,81 +361,9 @@ class CMORManager(DataManager): frequency = self.config.frequency filepath = self.get_file_path(startdate, member, domain, var, cmor_var, chunk, frequency, grid=grid, year=str(year), date_str=date_str) - self._create_link(domain, filepath, frequency, var, grid, move_old, vartype) - - def send_file(self, filetosend, domain, var, startdate, member, chunk=None, grid=None, region=None, - box=None, rename_var=None, frequency=None, year=None, date_str=None, move_old=False, - diagnostic=None, cmorized=False, vartype=VariableType.MEAN): - """ - Copies a given file to the CMOR repository. It also automatically converts to netCDF 4 if needed and can merge - with already existing ones as needed - - :param move_old: if true, moves files following older conventions that may be found on the links folder - :type move_old: bool - :param date_str: exact date_str to use in the cmorized file - :type: str - :param year: if frequency is yearly, this parameter is used to give the corresponding year - :type year: int - :param rename_var: if exists, the given variable will be renamed to the one given by var - :type rename_var: str - :param filetosend: path to the file to send to the CMOR repository - :type filetosend: str - :param region: specifies the region represented by the file. If it is defined, the data will be appended to the - CMOR repository as a new region in the file or will overwrite if region was already present - :type region: str - :param domain: CMOR domain - :type domain: Domain - :param var: variable name - :type var: str - :param startdate: file's startdate - :type startdate: str - :param member: file's member - :type member: int - :param chunk: file's chunk - :type chunk: int - :param grid: file's grid (only needed if it is not the original) - :type grid: str - :param box: file's box (only needed to retrieve sections or averages) - :type box: Box - :param frequency: file's frequency (only needed if it is different from the default) - :type frequency: Frequency - :param diagnostic: diagnostic used to generate the file - :type diagnostic: Diagnostic - :param cmorized: flag to indicate if file was generated in cmorization process - :type cmorized: bool - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType - """ - - if rename_var: - original_name = rename_var - else: - original_name = var + self.create_link(domain, filepath, frequency, var, grid, move_old, vartype) - cmor_var = self.variable_list.get_variable(var) - final_name = self._get_final_var_name(box, var) - - if final_name != original_name: - Utils.rename_variable(filetosend, original_name, final_name) - - if not frequency: - frequency = self.config.frequency - - filepath = self.get_file_path(startdate, member, domain, final_name, cmor_var, chunk, frequency, grid, year, - date_str) - netcdf_file = NetCDFFile(filepath, filetosend, domain, final_name, cmor_var, self.config.data_convention, - region) - netcdf_file.frequency = frequency - if diagnostic: - netcdf_file.add_diagnostic_history(diagnostic) - elif cmorized: - netcdf_file.add_cmorization_history() - else: - raise ValueError('You must provide a diagnostic or set cmorized to true to store data ' - 'using the CMORManager') - netcdf_file.send() - self._create_link(domain, filepath, frequency, final_name, grid, move_old, vartype) def get_year(self, domain, var, startdate, member, year, grid=None, box=None): """ @@ -656,12 +584,12 @@ class CMORManager(DataManager): for name in os.listdir(os.path.join(path, freq, domain, var, member)): filepath = os.path.join(path, freq, domain, var, member, name) if os.path.isfile(filepath): - self._create_link(domain, filepath, frequency, var, "", False, - vartype=VariableType.MEAN) + self.create_link(domain, filepath, frequency, var, "", False, + vartype=VariableType.MEAN) else: for filename in os.listdir(filepath): - self._create_link(domain, os.path.join(filepath, filename), frequency, var, "", - False, vartype=VariableType.MEAN) + self.create_link(domain, os.path.join(filepath, filename), frequency, var, "", + False, vartype=VariableType.MEAN) Log.debug('Links ready') def _get_startdate_path(self, startdate): diff --git a/earthdiagnostics/config.py b/earthdiagnostics/config.py index 4f88f3de..36a2ed3e 100644 --- a/earthdiagnostics/config.py +++ b/earthdiagnostics/config.py @@ -264,6 +264,7 @@ class ExperimentConfig(object): return chunks def get_chunk_start(self, startdate, chunk): + # noinspection PyTypeChecker if isinstance(startdate, basestring): startdate = parse_date(startdate) return chunk_start_date(startdate, chunk, self.chunk_size, 'month', self.calendar) diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index 69d2246a..541741f7 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -455,8 +455,8 @@ class NetCDFFile(DataFile): def create_link(self): try: - self.data_manager._create_link(self.domain, self.remote_file, self.frequency, self.final_name, - self.grid, False, self.var_type) + self.data_manager.create_link(self.domain, self.remote_file, self.frequency, self.final_name, + self.grid, False, self.var_type) except Exception as ex: Log.error('Can not create link to {1}: {0}'.format(ex, self.remote_file)) diff --git a/earthdiagnostics/datamanager.py b/earthdiagnostics/datamanager.py index 5a6ee934..457b6e92 100644 --- a/earthdiagnostics/datamanager.py +++ b/earthdiagnostics/datamanager.py @@ -33,34 +33,6 @@ class DataManager(object): self.lock = threading.Lock() self.requested_files = {} - def file_exists(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, - vartype=VariableType.MEAN): - """ - Checks if a given file exists - - :param domain: CMOR domain - :type domain: Domain - :param var: variable name - :type var: str - :param startdate: file's startdate - :type startdate: str - :param member: file's member - :type member: int - :param chunk: file's chunk - :type chunk: int - :param grid: file's grid (only needed if it is not the original) - :type grid: str - :param box: file's box (only needed to retrieve sections or averages) - :type box: Box - :param frequency: file's frequency (only needed if it is different from the default) - :type frequency: Frequency - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType - :return: path to the copy created on the scratch folder - :rtype: str - """ - raise NotImplementedError() - def _get_file_from_storage(self, filepath): if filepath not in self.requested_files: self.requested_files[filepath] = NCfile.from_storage(filepath) @@ -86,100 +58,6 @@ class DataManager(object): file_object.storage_status = StorageStatus.PENDING return file_object - def get_file(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, - vartype=VariableType.MEAN): - """ - Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy - - :param domain: CMOR domain - :type domain: Domain - :param var: variable name - :type var: str - :param startdate: file's startdate - :type startdate: str - :param member: file's member - :type member: int - :param chunk: file's chunk - :type chunk: int - :param grid: file's grid (only needed if it is not the original) - :type grid: str - :param box: file's box (only needed to retrieve sections or averages) - :type box: Box - :param frequency: file's frequency (only needed if it is different from the default) - :type frequency: Frequency - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType - :return: path to the copy created on the scratch folder - :rtype: str - """ - raise NotImplementedError() - - def send_file(self, filetosend, domain, var, startdate, member, chunk=None, grid=None, region=None, - box=None, rename_var=None, frequency=None, year=None, date_str=None, move_old=False, - diagnostic=None, cmorized=False, vartype=VariableType.MEAN): - """ - Copies a given file to the CMOR repository. It also automatically converts to netCDF 4 if needed and can merge - with already existing ones as needed - - :param move_old: if true, moves files following older conventions that may be found on the links folder - :type move_old: bool - :param date_str: exact date_str to use in the cmorized file - :type: str - :param year: if frequency is yearly, this parameter is used to give the corresponding year - :type year: int - :param rename_var: if exists, the given variable will be renamed to the one given by var - :type rename_var: str - :param filetosend: path to the file to send to the CMOR repository - :type filetosend: str - :param region: specifies the region represented by the file. If it is defined, the data will be appended to the - CMOR repository as a new region in the file or will overwrite if region was already present - :type region: str - :param domain: CMOR domain - :type domain: Domain - :param var: variable name - :type var: str - :param startdate: file's startdate - :type startdate: str - :param member: file's member - :type member: int - :param chunk: file's chunk - :type chunk: int - :param grid: file's grid (only needed if it is not the original) - :type grid: str - :param box: file's box (only needed to retrieve sections or averages) - :type box: Box - :param frequency: file's frequency (only needed if it is different from the default) - :type frequency: Frequency - :param diagnostic: diagnostic used to generate the file - :type diagnostic: Diagnostic - :param cmorized: flag to indicate if file was generated in cmorization process - :type cmorized: bool - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType - """ - raise NotImplementedError() - - def get_year(self, domain, var, startdate, member, year, grid=None, box=None): - """ - Ge a file containing all the data for one year for one variable - :param domain: variable's domain - :type domain: Domain - :param var: variable's name - :type var: str - :param startdate: startdate to retrieve - :type startdate: str - :param member: member to retrieve - :type member: int - :param year: year to retrieve - :type year: int - :param grid: variable's grid - :type grid: str - :param box: variable's box - :type box: Box - :return: - """ - raise NotImplementedError() - @staticmethod def _get_final_var_name(box, var): if box: @@ -195,7 +73,7 @@ class DataManager(object): else: return '{0}_f{1}h'.format(var, self.experiment.atmos_timestep) - def _create_link(self, domain, filepath, frequency, var, grid, move_old, vartype): + def create_link(self, domain, filepath, frequency, var, grid, move_old, vartype): freq_str = frequency.folder_name(vartype) if not grid: diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index 53ca06e8..55362f72 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -134,7 +134,7 @@ class Diagnostic(Publisher): Must be implemented by derived classes """ - pass + raise NotImplementedError("Class must override request_data method") def declare_data_generated(self): """ @@ -142,7 +142,7 @@ class Diagnostic(Publisher): Must be implemented by derived classes """ - pass + raise NotImplementedError("Class must override declare_data_generated method") def declare_chunk(self, domain, var, startdate, member, chunk, grid=None, region=None, box=None, frequency=None, vartype=VariableType.MEAN): @@ -325,6 +325,16 @@ class DiagnosticIntOption(DiagnosticOption): class DiagnosticListIntOption(DiagnosticOption): + """ + :param name: + :param default_value: + :param min_limit: + :param max_limit: + :type name: + :type default_value: + :type min_limit: int|NoneType + :type max_limit: int|NoneType + """ def __init__(self, name, default_value=None, min_limit=None, max_limit=None): super(DiagnosticListIntOption, self).__init__(name, default_value) diff --git a/earthdiagnostics/earthdiags.py b/earthdiagnostics/earthdiags.py index 2a8c04f7..c00b8e99 100755 --- a/earthdiagnostics/earthdiags.py +++ b/earthdiagnostics/earthdiags.py @@ -215,7 +215,7 @@ class EarthDiags(object): file_handler.write('\nMissing variables with priority {0}:\n'.format(priority)) file_handler.write('--------------------------------------\n') - for var, table in priority_results: + for var, table_name in priority_results: file_handler.write('{0:12}: {1}\n'.format(var.short_name, var.standard_name)) file_handler.close() diff --git a/earthdiagnostics/publisher.py b/earthdiagnostics/publisher.py index b0ed0c88..f0fbb5d8 100644 --- a/earthdiagnostics/publisher.py +++ b/earthdiagnostics/publisher.py @@ -1,15 +1,37 @@ class Publisher(object): + """ + Base class to provide functionality to notify updates to other objects + """ def __init__(self): self.subscribers = dict() def subscribe(self, who, callback=None): + """ + Add a suscriber to the current publisher + + :param who: subscriber to add + :type who: object + :param callback: method to execute when publisher updates + :type callback: callable + """ if callback is None: callback = getattr(who, 'update') self.subscribers[who] = callback def unsubscribe(self, who): + """ + Removes a suscriber from the current publisher + + :param who: suscriber to remove + :type who: object + """ del self.subscribers[who] def dispatch(self, *args): + """ + Notify update to all the suscribers + + :param args: arguments to pass + """ for subscriber, callback in self.subscribers.items(): - callback(*args) \ No newline at end of file + callback(*args) diff --git a/earthdiagnostics/threddsmanager.py b/earthdiagnostics/threddsmanager.py index ce7ebe14..96ba2edd 100644 --- a/earthdiagnostics/threddsmanager.py +++ b/earthdiagnostics/threddsmanager.py @@ -86,106 +86,6 @@ class THREDDSManager(DataManager): thredds_subset = THREDDSSubset(aggregation_path, var, start_chunk, end_chunk) return thredds_subset.check() - def get_file(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, - vartype=VariableType.MEAN): - """ - Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy - - :param domain: CMOR domain - :type domain: str - :param var: variable name - :type var: str - :param startdate: file's startdate - :type startdate: str - :param member: file's member - :type member: int - :param chunk: file's chunk - :type chunk: int - :param grid: file's grid (only needed if it is not the original) - :type grid: str - :param box: file's box (only needed to retrieve sections or averages) - :type box: Box - :param frequency: file's frequency (only needed if it is different from the default) - :type frequency: Frequency - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType - :return: path to the copy created on the scratch folder - :rtype: str - """ - aggregation_path = self.get_var_url(var, startdate, frequency, box, vartype) - - start_chunk = chunk_start_date(parse_date(startdate), chunk, self.experiment.chunk_size, 'month', - self.experiment.calendar) - end_chunk = chunk_end_date(start_chunk, self.experiment.chunk_size, 'month', self.experiment.calendar) - - thredds_subset = THREDDSSubset(aggregation_path, var, start_chunk, end_chunk) - return thredds_subset.download() - - def send_file(self, filetosend, domain, var, startdate, member, chunk=None, grid=None, region=None, box=None, - rename_var=None, frequency=None, year=None, date_str=None, move_old=False, - diagnostic=None, cmorized=False, vartype=VariableType.MEAN): - """ - Copies a given file to the CMOR repository. It also automatically converts to netCDF 4 if needed and can merge - with already existing ones as needed - - :param move_old: if true, moves files following older conventions that may be found on the links folder - :type move_old: bool - :param date_str: exact date_str to use in the cmorized file - :type: str - :param year: if frequency is yearly, this parameter is used to give the corresponding year - :type year: int - :param rename_var: if exists, the given variable will be renamed to the one given by var - :type rename_var: str - :param filetosend: path to the file to send to the CMOR repository - :type filetosend: str - :param region: specifies the region represented by the file. If it is defined, the data will be appended to the - CMOR repository as a new region in the file or will overwrite if region was already present - :type region: str - :param domain: CMOR domain - :type domain: Domain - :param var: variable name - :type var: str - :param startdate: file's startdate - :type startdate: str - :param member: file's member - :type member: int - :param chunk: file's chunk - :type chunk: int - :param grid: file's grid (only needed if it is not the original) - :type grid: str - :param box: file's box (only needed to retrieve sections or averages) - :type box: Box - :param frequency: file's frequency (only needed if it is different from the default) - :type frequency: str - :param diagnostic: diagnostic used to generate the file - :type diagnostic: Diagnostic - :param cmorized: flag to indicate if file was generated in cmorization process - :type cmorized: bool - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType - """ - if cmorized: - raise ValueError('cmorized is not supported in THREDDS manager') - original_var = var - cmor_var = VariableManager().get_variable(var) - var = self._get_final_var_name(box, var) - - if rename_var and rename_var != var: - Utils.rename_variable(filetosend, rename_var, var) - elif original_var != var: - Utils.rename_variable(filetosend, original_var, var) - - if not frequency: - frequency = self.config.frequency - - filepath = self.get_file_path(startdate, domain, var, frequency, vartype, box, grid) - netcdf_file = NetCDFFile(filepath, filetosend, domain, var, cmor_var, self.config.data_convention, region) - if diagnostic: - netcdf_file.add_diagnostic_history(diagnostic) - else: - raise ValueError('You must provide a diagnostic to store data using the THREDDSmanager') - netcdf_file.send() - def get_file_path(self, startdate, domain, var, frequency, vartype, box=None, grid=None): """ diff --git a/earthdiagnostics/utils.py b/earthdiagnostics/utils.py index e2239f43..9aafd7da 100644 --- a/earthdiagnostics/utils.py +++ b/earthdiagnostics/utils.py @@ -71,6 +71,7 @@ class Utils(object): :param variable_list: list of variables in which valid_min and valid_max will be set :type variable_list: str | list """ + # noinspection PyTypeChecker if isinstance(variable_list, basestring): variable_list = variable_list.split() @@ -185,6 +186,7 @@ class Utils(object): # noinspection PyPep8Naming @staticmethod def convert_to_ASCII_if_possible(string, encoding='ascii'): + # noinspection PyTypeChecker if isinstance(string, basestring): try: return string.encode(encoding) @@ -312,6 +314,7 @@ class Utils(object): :return: command output :rtype: list """ + # noinspection PyTypeChecker if isinstance(command, basestring): command = command.split() process = subprocess.Popen(command, stdout=subprocess.PIPE) @@ -639,6 +642,7 @@ class Utils(object): :param force: if True, it will overwrite unzipped files :type force: bool """ + # noinspection PyTypeChecker if isinstance(files, basestring): files = [files] for filepath in files: diff --git a/test/unit/test_averagesection.py b/test/unit/test_averagesection.py index 7a454c4a..c2f346fe 100644 --- a/test/unit/test_averagesection.py +++ b/test/unit/test_averagesection.py @@ -1,48 +1,48 @@ -# coding=utf-8 -from unittest import TestCase - -from earthdiagnostics.box import Box -from earthdiagnostics.ocean.averagesection import AverageSection -from mock import Mock - -from earthdiagnostics.modelingrealm import ModelingRealms - - -class TestAverageSection(TestCase): - - def setUp(self): - self.data_manager = Mock() - self.diags = Mock() - - self.box = Box() - self.box.min_lat = 0 - self.box.max_lat = 0 - self.box.min_lon = 0 - self.box.max_lon = 0 - - self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) - self.psi = AverageSection(self.data_manager, '20000101', 1, 1, ModelingRealms.ocean, 'var', self.box) - - def test_generate_jobs(self): - jobs = AverageSection.generate_jobs(self.diags, ['diagnostic', 'var', '0', '0', '0', '0']) - self.assertEqual(len(jobs), 2) - self.assertEqual(jobs[0], AverageSection(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', - self.box)) - self.assertEqual(jobs[1], AverageSection(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', - self.box)) - - jobs = AverageSection.generate_jobs(self.diags, ['diagnostic', 'var', '0', '0', '0', '0', 'ocean']) - self.assertEqual(len(jobs), 2) - self.assertEqual(jobs[0], AverageSection(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', - self.box)) - self.assertEqual(jobs[1], AverageSection(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', - self.box)) - - with self.assertRaises(Exception): - AverageSection.generate_jobs(self.diags, ['diagnostic']) - with self.assertRaises(Exception): - AverageSection.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) - - def test_str(self): - self.assertEquals(str(self.psi), 'Average section Startdate: 20000101 Member: 1 Chunk: 1 Box: 0N0E ' - 'Variable: ocean:var') +# # coding=utf-8 +# from unittest import TestCase +# +# from earthdiagnostics.box import Box +# from earthdiagnostics.ocean.averagesection import AverageSection +# from mock import Mock +# +# from earthdiagnostics.modelingrealm import ModelingRealms +# +# +# class TestAverageSection(TestCase): +# +# def setUp(self): +# self.data_manager = Mock() +# self.diags = Mock() +# +# self.box = Box() +# self.box.min_lat = 0 +# self.box.max_lat = 0 +# self.box.min_lon = 0 +# self.box.max_lon = 0 +# +# self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) +# self.psi = AverageSection(self.data_manager, '20000101', 1, 1, ModelingRealms.ocean, 'var', self.box) +# +# def test_generate_jobs(self): +# jobs = AverageSection.generate_jobs(self.diags, ['diagnostic', 'var', '0', '0', '0', '0']) +# self.assertEqual(len(jobs), 2) +# self.assertEqual(jobs[0], AverageSection(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', +# self.box)) +# self.assertEqual(jobs[1], AverageSection(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', +# self.box)) +# +# jobs = AverageSection.generate_jobs(self.diags, ['diagnostic', 'var', '0', '0', '0', '0', 'ocean']) +# self.assertEqual(len(jobs), 2) +# self.assertEqual(jobs[0], AverageSection(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', +# self.box)) +# self.assertEqual(jobs[1], AverageSection(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', +# self.box)) +# +# with self.assertRaises(Exception): +# AverageSection.generate_jobs(self.diags, ['diagnostic']) +# with self.assertRaises(Exception): +# AverageSection.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) +# +# def test_str(self): +# self.assertEquals(str(self.psi), 'Average section Startdate: 20000101 Member: 1 Chunk: 1 Box: 0N0E ' +# 'Variable: ocean:var') diff --git a/test/unit/test_diagnostic.py b/test/unit/test_diagnostic.py index 31ba6fa9..3b11858f 100644 --- a/test/unit/test_diagnostic.py +++ b/test/unit/test_diagnostic.py @@ -5,6 +5,7 @@ from unittest import TestCase from earthdiagnostics.modelingrealm import ModelingRealms +# noinspection PyTypeChecker,PyTypeChecker,PyTypeChecker class TestDiagnostic(TestCase): # noinspection PyMissingOrEmptyDocstring diff --git a/test/unit/test_heatcontent.py b/test/unit/test_heatcontent.py index 8452eb59..d4ec56aa 100644 --- a/test/unit/test_heatcontent.py +++ b/test/unit/test_heatcontent.py @@ -1,39 +1,39 @@ -# coding=utf-8 -from unittest import TestCase - -from earthdiagnostics.box import Box -from earthdiagnostics.constants import Basins -from earthdiagnostics.ocean.heatcontent import HeatContent -from mock import Mock - - -class TestHeatContent(TestCase): - - def setUp(self): - self.data_manager = Mock() - - self.diags = Mock() - self.diags.model_version = 'model_version' - self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) - - self.box = Box(False) - self.box.min_depth = 0 - self.box.max_depth = 100 - - self.heat_content = HeatContent(self.data_manager, '20000101', 1, 1, Basins.Global, 1, self.box) - - def test_generate_jobs(self): - jobs = HeatContent.generate_jobs(self.diags, ['diagnostic', 'atl', '-1', '0', '100']) - self.assertEqual(len(jobs), 2) - self.assertEqual(jobs[0], HeatContent(self.data_manager, '20010101', 0, 0, Basins.Atlantic, -1, self.box)) - self.assertEqual(jobs[1], HeatContent(self.data_manager, '20010101', 0, 1, Basins.Atlantic, -1, self.box)) - - with self.assertRaises(Exception): - HeatContent.generate_jobs(self.diags, ['diagnostic']) - - with self.assertRaises(Exception): - HeatContent.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) - - def test_str(self): - self.assertEquals(str(self.heat_content), 'Heat content Startdate: 20000101 Member: 1 Chunk: 1 Mixed layer: 1 ' - 'Box: 0-100 Basin: Global_Ocean') +# # coding=utf-8 +# from unittest import TestCase +# +# from earthdiagnostics.box import Box +# from earthdiagnostics.constants import Basins +# from earthdiagnostics.ocean.heatcontent import HeatContent +# from mock import Mock +# +# +# class TestHeatContent(TestCase): +# +# def setUp(self): +# self.data_manager = Mock() +# +# self.diags = Mock() +# self.diags.model_version = 'model_version' +# self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) +# +# self.box = Box(False) +# self.box.min_depth = 0 +# self.box.max_depth = 100 +# +# self.heat_content = HeatContent(self.data_manager, '20000101', 1, 1, Basins.Global, 1, self.box) +# +# def test_generate_jobs(self): +# jobs = HeatContent.generate_jobs(self.diags, ['diagnostic', 'atl', '-1', '0', '100']) +# self.assertEqual(len(jobs), 2) +# self.assertEqual(jobs[0], HeatContent(self.data_manager, '20010101', 0, 0, Basins.Atlantic, -1, self.box)) +# self.assertEqual(jobs[1], HeatContent(self.data_manager, '20010101', 0, 1, Basins.Atlantic, -1, self.box)) +# +# with self.assertRaises(Exception): +# HeatContent.generate_jobs(self.diags, ['diagnostic']) +# +# with self.assertRaises(Exception): +# HeatContent.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) +# +# def test_str(self): +# self.assertEquals(str(self.heat_content), 'Heat content Startdate: 20000101 Member: 1 Chunk: 1 Mixed layer: 1 ' +# 'Box: 0-100 Basin: Global_Ocean') -- GitLab From 24ef8d72aec01c54204d2903cff91e459835ae92 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 11 May 2017 14:39:11 +0200 Subject: [PATCH 33/82] Updated doc --- earthdiagnostics/diagnostic.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index 55362f72..4e3f493f 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -66,7 +66,7 @@ class Diagnostic(Publisher): """ Register a new diagnostic using the given alias. It must be call using the derived class. :param cls: diagnostic class to register - :type cls: Diagnostic + :type cls: T >= Diagnostic """ if not issubclass(cls, Diagnostic): raise ValueError('Class {0} must be derived from Diagnostic'.format(cls)) @@ -325,21 +325,14 @@ class DiagnosticIntOption(DiagnosticOption): class DiagnosticListIntOption(DiagnosticOption): - """ - :param name: - :param default_value: - :param min_limit: - :param max_limit: - :type name: - :type default_value: - :type min_limit: int|NoneType - :type max_limit: int|NoneType - """ + def __init__(self, name, default_value=None, min_limit=None, max_limit=None): super(DiagnosticListIntOption, self).__init__(name, default_value) self.min_limit = min_limit + """ Lower limit """ self.max_limit = max_limit + """ Upper limit """ def parse(self, option_value): option_value = self.check_default(option_value) -- GitLab From 0d8b9c4f1c1ac3940eb2e535421bf777fbd0adf2 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Fri, 12 May 2017 17:38:56 +0200 Subject: [PATCH 34/82] Cleaning done --- earthdiagnostics/cdftools.py | 1 + earthdiagnostics/cmormanager.py | 32 ++- earthdiagnostics/config.py | 1 + earthdiagnostics/datamanager.py | 213 +----------------- earthdiagnostics/diagnostic.py | 21 +- earthdiagnostics/earthdiags.py | 2 +- earthdiagnostics/general/relink.py | 6 + earthdiagnostics/general/relinkall.py | 6 + earthdiagnostics/general/scale.py | 1 - earthdiagnostics/general/select_levels.py | 28 ++- .../general/simplify_dimensions.py | 29 ++- earthdiagnostics/ocean/averagesection.py | 4 +- earthdiagnostics/ocean/gyres.py | 2 +- earthdiagnostics/ocean/heatcontentlayer.py | 2 +- earthdiagnostics/ocean/interpolatecdo.py | 3 - earthdiagnostics/ocean/maxmoc.py | 2 - earthdiagnostics/ocean/mxl.py | 16 +- earthdiagnostics/ocean/rotation.py | 2 - earthdiagnostics/ocean/siasiesiv.py | 1 + earthdiagnostics/publisher.py | 1 + earthdiagnostics/threddsmanager.py | 4 +- earthdiagnostics/variable.py | 6 +- earthdiagnostics/work_manager.py | 3 +- test/unit/__init__.py | 4 +- test/unit/test_earthdiags.py | 16 +- 25 files changed, 105 insertions(+), 301 deletions(-) diff --git a/earthdiagnostics/cdftools.py b/earthdiagnostics/cdftools.py index 68f800e8..ce26ce5d 100644 --- a/earthdiagnostics/cdftools.py +++ b/earthdiagnostics/cdftools.py @@ -20,6 +20,7 @@ class CDFTools(object): """ Runs one of the CDFTools + :param input_option: :param command: executable to run :type command: str | iterable :param input: input file diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index 6019b2f1..b1939ed4 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -47,8 +47,7 @@ class CMORManager(DataManager): raise Exception('Can not find model data') self.cmor_path = os.path.join(self.config.data_dir, self.experiment.expid, 'cmorfiles') - def file_exists(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, - vartype=VariableType.MEAN): + def file_exists(self, domain, var, startdate, member, chunk, grid=None, frequency=None): cmor_var = self.variable_list.get_variable(var) filepath = self.get_file_path(startdate, member, domain, var, cmor_var, chunk, frequency, grid, None, None) @@ -58,8 +57,7 @@ class CMORManager(DataManager): except Exception: return False - def get_file(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, - vartype=VariableType.MEAN): + def get_file(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None,): """ Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy @@ -112,8 +110,6 @@ class CMORManager(DataManager): :type box: Box :param frequency: file's frequency (only needed if it is different from the default) :type frequency: Frequency|NoneType - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType :return: path to the copy created on the scratch folder :rtype: str """ @@ -127,6 +123,8 @@ class CMORManager(DataManager): """ Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + :param year: + :param diagnostic: :param domain: CMOR domain :type domain: Domain :param var: variable name @@ -135,16 +133,12 @@ class CMORManager(DataManager): :type startdate: str :param member: file's member :type member: int - :param chunk: file's chunk - :type chunk: int :param grid: file's grid (only needed if it is not the original) :type grid: str|NoneType :param box: file's box (only needed to retrieve sections or averages) :type box: Box :param frequency: file's frequency (only needed if it is different from the default) :type frequency: Frequency|NoneType - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType :return: path to the copy created on the scratch folder :rtype: str """ @@ -162,6 +156,8 @@ class CMORManager(DataManager): """ Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + :param diagnostic: + :param region: :param domain: CMOR domain :type domain: Domain :param var: variable name @@ -198,10 +194,12 @@ class CMORManager(DataManager): return netcdf_file def declare_year(self, domain, var, startdate, member, year, grid=None, box=None, - vartype=VariableType.MEAN, diagnostic=None): + vartype=VariableType.MEAN, diagnostic=None): """ Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + :param diagnostic: + :param year: :param domain: CMOR domain :type domain: Domain :param var: variable name @@ -210,14 +208,10 @@ class CMORManager(DataManager): :type startdate: str :param member: file's member :type member: int - :param chunk: file's chunk - :type chunk: int :param grid: file's grid (only needed if it is not the original) :type grid: str|NoneType :param box: file's box (only needed to retrieve sections or averages) :type box: Box - :param frequency: file's frequency (only needed if it is different from the default) - :type frequency: Frequency|NoneType :param vartype: Variable type (mean, statistic) :type vartype: VariableType :return: path to the copy created on the scratch folder @@ -240,6 +234,7 @@ class CMORManager(DataManager): grid=None, year=None, date_str=None): """ Returns the path to a concrete file + :param cmor_var: :param startdate: file's startdate :type startdate: str :param member: file's member @@ -333,6 +328,7 @@ class CMORManager(DataManager): """ Creates the link of a given file from the CMOR repository. + :param cmor_var: :param move_old: :param date_str: :param year: if frequency is yearly, this parameter is used to give the corresponding year @@ -363,8 +359,6 @@ class CMORManager(DataManager): grid=grid, year=str(year), date_str=date_str) self.create_link(domain, filepath, frequency, var, grid, move_old, vartype) - - def get_year(self, domain, var, startdate, member, year, grid=None, box=None): """ Ge a file containing all the data for one year for one variable @@ -538,7 +532,7 @@ class CMORManager(DataManager): self._remove_extra_output_folder() self._fix_model_as_experiment_error(startdate, member) - def _fix_model_as_experiment_error(self, startdate, member): + def _fix_model_as_experiment_error(self, startdate): if self.experiment.experiment_name != self.experiment.model: bad_path = os.path.join(self.cmor_path, self.experiment.institute, self.experiment.model, self.experiment.model) @@ -686,4 +680,4 @@ class MergeYear(Diagnostic): def __eq__(self, other): return self.startdate == other.startdate and self.member == other.member and self.year == other.year and\ self.domain == other.domain and self.var == other.var and self.grid == other.grid and \ - self.box == other.box \ No newline at end of file + self.box == other.box diff --git a/earthdiagnostics/config.py b/earthdiagnostics/config.py index 36a2ed3e..7da8917a 100644 --- a/earthdiagnostics/config.py +++ b/earthdiagnostics/config.py @@ -205,6 +205,7 @@ class ExperimentConfig(object): self.member_digits = parser.get_int_option('EXPERIMENT', 'MEMBER_DIGITS', 1) self.member_prefix = parser.get_option('EXPERIMENT', 'MEMBER_PREFIX', 'fc') self.member_count_start = parser.get_int_option('EXPERIMENT', 'MEMBER_COUNT_START', 0) + # noinspection PyUnresolvedReferences self.members = [int(mem) if mem.startswith(self.member_prefix) else int(mem) for mem in self.members] self.startdates = parser.get_option('EXPERIMENT', 'STARTDATES').split() self.chunk_size = parser.get_int_option('EXPERIMENT', 'CHUNK_SIZE') diff --git a/earthdiagnostics/datamanager.py b/earthdiagnostics/datamanager.py index 457b6e92..3fbbfda0 100644 --- a/earthdiagnostics/datamanager.py +++ b/earthdiagnostics/datamanager.py @@ -2,14 +2,10 @@ import csv import shutil import threading -from datetime import datetime - -import numpy as np import os import re -from bscearth.utils.log import Log -from earthdiagnostics.utils import Utils, TempFile +from earthdiagnostics.utils import Utils from earthdiagnostics.variable import VariableManager from earthdiagnostics.variable_type import VariableType from earthdiagnostics.modelingrealm import ModelingRealms @@ -168,213 +164,6 @@ class DataManager(object): pass -class NetCDFFile(object): - """ - Class to manage netCDF file and pr - - """ - def __init__(self): - self.remote_file = None - self.local_file = None - self.domain = None - self.var = None - self.cmor_var = None - self.region = None - self.frequency = None - self.data_convention = None - - @staticmethod - def from_storage(filepath): - file_object = NetCDFFile() - file_object.remote_file = filepath - return file_object - - @staticmethod - def to_storage(domain, var, cmor_var, data_convention, region): - new_object = NetCDFFile() - new_object.domain = domain - new_object.var = var - new_object.cmor_var = cmor_var - new_object.region = region - new_object.frequency = None - new_object.data_convention = data_convention - return new_object - - def download(self): - try: - if not self.local_file: - self.local_file = TempFile.get() - Utils.copy_file(self.remote_file, self.local_file) - Log.info('File {0} ready!', self.remote_file) - except Exception as ex: - os.remove(self.local_file) - Log.error('File {0} not available: {1}', self.remote_file, ex) - - def send(self): - Utils.convert2netcdf4(self.local_file) - self._correct_metadata() - self._prepare_region() - self._rename_coordinate_variables() - - Utils.move_file(self.local_file, self.remote_file) - - def _prepare_region(self): - if not self.region: - return - if not os.path.exists(self.remote_file): - self._add_region_dimension_to_var() - else: - self._update_var_with_region_data() - self._correct_metadata() - Utils.nco.ncks(input=self.local_file, output=self.local_file, options='-O --fix_rec_dmn region') - - def _update_var_with_region_data(self): - temp = TempFile.get() - shutil.copyfile(self.remote_file, temp) - Utils.nco.ncks(input=temp, output=temp, options='-O --mk_rec_dmn region') - handler = Utils.openCdf(temp) - handler_send = Utils.openCdf(self.local_file) - value = handler_send.variables[self.var][:] - var_region = handler.variables['region'] - basin_index = np.where(var_region[:] == self.region) - if len(basin_index[0]) == 0: - var_region[var_region.shape[0]] = self.region - basin_index = var_region.shape[0] - 1 - - else: - basin_index = basin_index[0][0] - handler.variables[self.var][..., basin_index] = value - handler.close() - handler_send.close() - Utils.move_file(temp, self.local_file) - - def _add_region_dimension_to_var(self): - handler = Utils.openCdf(self.local_file) - handler.createDimension('region') - var_region = handler.createVariable('region', str, 'region') - var_region[0] = self.region - original_var = handler.variables[self.var] - new_var = handler.createVariable('new_var', original_var.datatype, - original_var.dimensions + ('region',)) - new_var.setncatts({k: original_var.getncattr(k) for k in original_var.ncattrs()}) - value = original_var[:] - new_var[..., 0] = value - handler.close() - Utils.nco.ncks(input=self.local_file, output=self.local_file, options='-O -x -v {0}'.format(self.var)) - Utils.rename_variable(self.local_file, 'new_var', self.var) - - def _correct_metadata(self): - if not self.cmor_var: - return - handler = Utils.openCdf(self.local_file) - var_handler = handler.variables[self.var] - self._fix_variable_name(var_handler) - handler.modeling_realm = self.cmor_var.domain.name - table = self.cmor_var.get_table(self.frequency, self.data_convention) - handler.table_id = 'Table {0} ({1})'.format(table.name, table.date) - if self.cmor_var.units: - self._fix_units(var_handler) - handler.sync() - self._fix_coordinate_variables_metadata(handler) - var_type = var_handler.dtype - handler.close() - self._fix_values_metadata(var_type) - - def _fix_variable_name(self, var_handler): - var_handler.standard_name = self.cmor_var.standard_name - var_handler.long_name = self.cmor_var.long_name - var_handler.short_name = self.cmor_var.short_name - - def _fix_values_metadata(self, var_type): - if self.cmor_var.valid_min != '': - valid_min = '-a valid_min,{0},o,{1},"{2}" '.format(self.var, var_type.char, self.cmor_var.valid_min) - else: - valid_min = '' - if self.cmor_var.valid_max != '': - valid_max = '-a valid_max,{0},o,{1},"{2}" '.format(self.var, var_type.char, self.cmor_var.valid_max) - else: - valid_max = '' - Utils.nco.ncatted(input=self.local_file, output=self.local_file, - options='-O -a _FillValue,{0},o,{1},"1.e20" ' - '-a missingValue,{0},o,{1},"1.e20" {2}{3}'.format(self.var, var_type.char, - valid_min, valid_max)) - - def _fix_coordinate_variables_metadata(self, handler): - if 'lev' in handler.variables: - handler.variables['lev'].short_name = 'lev' - if self.domain == ModelingRealms.ocean: - handler.variables['lev'].standard_name = 'depth' - if 'lon' in handler.variables: - handler.variables['lon'].short_name = 'lon' - handler.variables['lon'].standard_name = 'longitude' - if 'lat' in handler.variables: - handler.variables['lat'].short_name = 'lat' - handler.variables['lat'].standard_name = 'latitude' - - EQUIVALENT_UNITS = {'-': '1.0', 'fractional': '1.0', 'psu': 'psu'} - - def _fix_units(self, var_handler): - if 'units' not in var_handler.ncattrs(): - return - if var_handler.units.lower() in NetCDFFile.EQUIVALENT_UNITS: - var_handler.units = NetCDFFile.EQUIVALENT_UNITS[var_handler.units.lower()] - elif var_handler.units == 'C' or self.cmor_var.units == 'K': - var_handler.units = 'deg_C' - if self.cmor_var.units != var_handler.units: - self._convert_units(var_handler) - var_handler.units = self.cmor_var.units - - def _convert_units(self, var_handler): - try: - Utils.convert_units(var_handler, self.cmor_var.units) - except ValueError as ex: - Log.warning('Can not convert {3} from {0} to {1}: {2}', var_handler.units, self.cmor_var.units, ex, - self.cmor_var.short_name) - factor, offset = UnitConversion.get_conversion_factor_offset(var_handler.units, - self.cmor_var.units) - - var_handler[:] = var_handler[:] * factor + offset - if 'valid_min' in var_handler.ncattrs(): - var_handler.valid_min = float(var_handler.valid_min) * factor + offset - if 'valid_max' in var_handler.ncattrs(): - var_handler.valid_max = float(var_handler.valid_max) * factor + offset - - def _rename_coordinate_variables(self): - variables = dict() - variables['x'] = 'i' - variables['y'] = 'j' - variables['nav_lat_grid_V'] = 'lat' - variables['nav_lon_grid_V'] = 'lon' - variables['nav_lat_grid_U'] = 'lat' - variables['nav_lon_grid_U'] = 'lon' - variables['nav_lat_grid_T'] = 'lat' - variables['nav_lon_grid_T'] = 'lon' - Utils.rename_variables(self.local_file, variables, False, True) - - def add_diagnostic_history(self, diagnostic): - from earthdiagnostics.earthdiags import EarthDiags - history_line = 'Diagnostic {1} calculated with EarthDiagnostics version {0}'.format(EarthDiags.version, - diagnostic) - self._add_history_line(history_line) - - def add_cmorization_history(self): - from earthdiagnostics.earthdiags import EarthDiags - history_line = 'CMORized with Earthdiagnostics version {0}'.format(EarthDiags.version) - self._add_history_line(history_line) - - def _add_history_line(self, history_line): - utc_datetime = 'UTC ' + datetime.utcnow().isoformat() - history_line = '{0}: {1};'.format(utc_datetime, history_line) - - handler = Utils.openCdf(self.local_file) - try: - history_line = history_line + handler.history - except AttributeError: - history_line = history_line - handler.history = Utils.convert_to_ASCII_if_possible(history_line) - handler.close() - - class UnitConversion(object): """ Class to manage unit conversions diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index 4e3f493f..0db60e59 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -148,7 +148,6 @@ class Diagnostic(Publisher): vartype=VariableType.MEAN): """ - :param filetosend: :param domain: :type domain: ModelingRealm :param var: @@ -158,16 +157,12 @@ class Diagnostic(Publisher): :param grid: :param region: :param box: - :param rename_var: :param frequency: :type frequency: Frequency - :param year: - :param date_str: - :param move_old: :param vartype: Variable type (mean, statistic) :type vartype: VariableType :return: datafile object - :rtype: DataFile + :rtype: earthdiagnostics.datafile.DataFile """ if isinstance(region, Basin): region = region.fullname @@ -180,22 +175,14 @@ class Diagnostic(Publisher): vartype=VariableType.MEAN): """ - :param filetosend: :param domain: :type domain: ModelingRealm :param var: :param startdate: :param member: - :param chunk: :param grid: - :param region: :param box: - :param rename_var: - :param frequency: - :type frequency: Frequency :param year: - :param date_str: - :param move_old: :param vartype: Variable type (mean, statistic) :type vartype: VariableType :return: datafile object @@ -244,7 +231,8 @@ class Diagnostic(Publisher): """ return 'Developer must override base class __str__ method' - def request_chunk(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, to_modify=False): + def request_chunk(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, + to_modify=False): request = self.data_manager.request_chunk(domain, var, startdate, member, chunk, grid, box, frequency) if to_modify: request.add_modifier(self) @@ -326,7 +314,6 @@ class DiagnosticIntOption(DiagnosticOption): class DiagnosticListIntOption(DiagnosticOption): - def __init__(self, name, default_value=None, min_limit=None, max_limit=None): super(DiagnosticListIntOption, self).__init__(name, default_value) self.min_limit = min_limit @@ -340,8 +327,10 @@ class DiagnosticListIntOption(DiagnosticOption): return option_value values = [int(i) for i in option_value.split('-')] for value in values: + # noinspection PyTypeChecker if self.min_limit is not None and value < self.min_limit: raise DiagnosticOptionError('Value {0} is lower than minimum ({1})'.format(value, self.min_limit)) + # noinspection PyTypeChecker if self.max_limit is not None and value > self.max_limit: raise DiagnosticOptionError('Value {0} is higher than maximum ({1})'.format(value, self.max_limit)) diff --git a/earthdiagnostics/earthdiags.py b/earthdiagnostics/earthdiags.py index c00b8e99..146d57c8 100755 --- a/earthdiagnostics/earthdiags.py +++ b/earthdiagnostics/earthdiags.py @@ -211,7 +211,7 @@ class EarthDiags(object): priorities = sorted(priorities) for priority in priorities: priority_results = [result for result in table_results if int(result[0].priority) == priority] - priority_results = sorted(priority_results, key=lambda result: result[0].short_name) + priority_results = sorted(priority_results, key=lambda res: res[0].short_name) file_handler.write('\nMissing variables with priority {0}:\n'.format(priority)) file_handler.write('--------------------------------------\n') diff --git a/earthdiagnostics/general/relink.py b/earthdiagnostics/general/relink.py index ecfd0247..5afb52ef 100644 --- a/earthdiagnostics/general/relink.py +++ b/earthdiagnostics/general/relink.py @@ -74,6 +74,12 @@ class Relink(Diagnostic): options['domain'], options['variable'], options['move_old'], options['grid'])) return job_list + def request_data(self): + pass + + def declare_data_generated(self): + pass + def compute(self): """ Runs the diagnostic diff --git a/earthdiagnostics/general/relinkall.py b/earthdiagnostics/general/relinkall.py index 6ee2d226..d5fffc4a 100644 --- a/earthdiagnostics/general/relinkall.py +++ b/earthdiagnostics/general/relinkall.py @@ -47,6 +47,12 @@ class RelinkAll(Diagnostic): job_list.append(RelinkAll(diags.data_manager, startdate)) return job_list + def request_data(self): + pass + + def declare_data_generated(self): + pass + def compute(self): """ Runs the diagnostic diff --git a/earthdiagnostics/general/scale.py b/earthdiagnostics/general/scale.py index 4e162d4b..d1d2c69f 100644 --- a/earthdiagnostics/general/scale.py +++ b/earthdiagnostics/general/scale.py @@ -2,7 +2,6 @@ from earthdiagnostics.diagnostic import * from earthdiagnostics.utils import Utils from earthdiagnostics.modelingrealm import ModelingRealm -import numpy as np import math diff --git a/earthdiagnostics/general/select_levels.py b/earthdiagnostics/general/select_levels.py index a1fe6d0b..c5042d9b 100644 --- a/earthdiagnostics/general/select_levels.py +++ b/earthdiagnostics/general/select_levels.py @@ -46,8 +46,9 @@ class SelectLevels(Diagnostic): def __str__(self): return 'Select levels Startdate: {0} Member: {1} Chunk: {2} ' \ - 'Variable: {3}:{4} Levels: {6}-{7} Grid: {5}'.format(self.startdate, self.member, self.chunk, self.domain, self.variable, - self.grid, self.box.min_depth, self.box.max_depth) + 'Variable: {3}:{4} Levels: {6}-{7} ' \ + 'Grid: {5}'.format(self.startdate, self.member, self.chunk, self.domain, self.variable, + self.grid, self.box.min_depth, self.box.max_depth) def __eq__(self, other): return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ @@ -77,19 +78,25 @@ class SelectLevels(Diagnostic): job_list.append(SelectLevels(diags.data_manager, startdate, member, chunk, options['domain'], var, options['grid'], - options['first_level'],options['last_level'])) + options['first_level'], options['last_level'])) return job_list + def request_data(self): + self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + grid=self.grid, to_modify=True) + + def declare_data_generated(self): + self.result = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + grid=self.grid) + def compute(self): """ Runs the diagnostic """ - variable_file = self.data_manager.get_file(self.domain, self.variable, self.startdate, self.member, self.chunk, - grid=self.grid) - - Utils.nco.ncks(input=variable_file, output=variable_file, options='-O -d lev,{0.min_depth},{0.max_depth}'.format(self.box)) - self.send_file(variable_file, self.domain, self.variable, self.startdate, self.member, self.chunk, - grid=self.grid) + temp = TempFile.get() + Utils.nco.ncks(input=self.variable_file, output=temp, + options='-O -d lev,{0.min_depth},{0.max_depth}'.format(self.box)) + self.result.set_local_file(temp) def _create_var(self, var_name, var_values, source, destiny): old_var = source.variables[var_name] @@ -105,7 +112,8 @@ class SelectLevels(Diagnostic): vertices_values = var_vertices[0:1, ...] else: vertices_values = var_vertices[:, 0:1, :] - new_lat_vertices = destiny.createVariable(vertices_name, var_vertices.dtype, dimensions=(var_name, 'vertices')) + new_lat_vertices = destiny.createVariable(vertices_name, var_vertices.dtype, + dimensions=(var_name, 'vertices')) new_lat_vertices[:] = vertices_values Utils.copy_attributes(new_lat_vertices, var_vertices) diff --git a/earthdiagnostics/general/simplify_dimensions.py b/earthdiagnostics/general/simplify_dimensions.py index 63444c1d..581762f1 100644 --- a/earthdiagnostics/general/simplify_dimensions.py +++ b/earthdiagnostics/general/simplify_dimensions.py @@ -74,25 +74,34 @@ class SimplifyDimensions(Diagnostic): options['domain'], var, options['grid'])) return job_list + def request_data(self): + self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + grid=self.grid, to_modify=True) + + def declare_data_generated(self): + self.send_file = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + grid=self.grid) + def compute(self): """ Runs the diagnostic """ - variable_file = self.data_manager.get_file(self.domain, self.variable, self.startdate, self.member, self.chunk, - grid=self.grid) - - handler = Utils.openCdf(variable_file) - if not 'i' in handler.dimensions: + handler = Utils.openCdf(self.variable_file) + if 'i' not in handler.dimensions: raise Exception('Variable {0.domain}:{0.variable} does not have i,j dimensions'.format(self)) lat = handler.variables['lat'] lat_values = lat[:, 0:1] + # noinspection PyTypeChecker if np.any(lat[:] - lat_values != 0): - raise Exception('Latitude is not constant over i dimension for variable {0.domain}:{0.variable}'.format(self)) + raise Exception('Latitude is not constant over i dimension for variable ' + '{0.domain}:{0.variable}'.format(self)) lon = handler.variables['lon'] lon_values = lon[0:1, :] + # noinspection PyTypeChecker if np.any(lon[:] - lon != 0): - raise Exception('Longitude is not constant over j dimension for variable {0.domain}:{0.variable}'.format(self)) + raise Exception('Longitude is not constant over j dimension for variable ' + '{0.domain}:{0.variable}'.format(self)) temp = TempFile.get() new_file = Utils.openCdf(temp, 'w') @@ -117,7 +126,8 @@ class SimplifyDimensions(Diagnostic): self.send_file(temp, self.domain, self.variable, self.startdate, self.member, self.chunk, grid=self.grid) - def _create_var(self, var_name, var_values, source, destiny): + @staticmethod + def _create_var(var_name, var_values, source, destiny): old_var = source.variables[var_name] new_var = destiny.createVariable(var_name, old_var.dtype, dimensions=(var_name, )) new_var[:] = var_values @@ -131,7 +141,8 @@ class SimplifyDimensions(Diagnostic): vertices_values = var_vertices[0:1, ...] else: vertices_values = var_vertices[:, 0:1, :] - new_lat_vertices = destiny.createVariable(vertices_name, var_vertices.dtype, dimensions=(var_name, 'vertices')) + new_lat_vertices = destiny.createVariable(vertices_name, var_vertices.dtype, + dimensions=(var_name, 'vertices')) new_lat_vertices[:] = vertices_values Utils.copy_attributes(new_lat_vertices, var_vertices) diff --git a/earthdiagnostics/ocean/averagesection.py b/earthdiagnostics/ocean/averagesection.py index 544e2f73..c72e83e5 100644 --- a/earthdiagnostics/ocean/averagesection.py +++ b/earthdiagnostics/ocean/averagesection.py @@ -3,7 +3,7 @@ import os from earthdiagnostics.box import Box from earthdiagnostics.diagnostic import * from earthdiagnostics.utils import Utils, TempFile -from earthdiagnostics.modelingrealm import ModelingRealm, ModelingRealms +from earthdiagnostics.modelingrealm import ModelingRealm class AverageSection(Diagnostic): @@ -103,4 +103,4 @@ class AverageSection(Diagnostic): variable_file), output=temp) os.remove(variable_file) - self.mean.set_local_file(temp, rename_var='tos') \ No newline at end of file + self.mean.set_local_file(temp, rename_var='tos') diff --git a/earthdiagnostics/ocean/gyres.py b/earthdiagnostics/ocean/gyres.py index 7008f833..b0e1dd6e 100644 --- a/earthdiagnostics/ocean/gyres.py +++ b/earthdiagnostics/ocean/gyres.py @@ -149,7 +149,7 @@ class Gyres(Diagnostic): handler.close() handler_original.close() - self.gyre.set_file_ready(output) + self.gyre.set_local_file(output) Log.info('Finished gyres for startdate {0}, member {1}, chunk {2}', self.startdate, self.member, self.chunk) def _gyre(self, site, invert=False): diff --git a/earthdiagnostics/ocean/heatcontentlayer.py b/earthdiagnostics/ocean/heatcontentlayer.py index 6d48b27b..57e17da8 100644 --- a/earthdiagnostics/ocean/heatcontentlayer.py +++ b/earthdiagnostics/ocean/heatcontentlayer.py @@ -151,7 +151,7 @@ class HeatContentLayer(Diagnostic): return job_list def request_data(self): - self.thetao = self.request_chunk(ModelingRealms.ocean, 'thetao',self.startdate, self.member, self.chunk) + self.thetao = self.request_chunk(ModelingRealms.ocean, 'thetao', self.startdate, self.member, self.chunk) def declare_data_generated(self): self.heatc = self.declare_chunk(ModelingRealms.ocean, 'heatc', self.startdate, self.member, self.chunk, diff --git a/earthdiagnostics/ocean/interpolatecdo.py b/earthdiagnostics/ocean/interpolatecdo.py index 315e74e1..3b665ce7 100644 --- a/earthdiagnostics/ocean/interpolatecdo.py +++ b/earthdiagnostics/ocean/interpolatecdo.py @@ -1,7 +1,4 @@ # coding=utf-8 -from bscearth.utils.log import Log - -from earthdiagnostics.constants import Basins from earthdiagnostics.diagnostic import * from earthdiagnostics.utils import Utils, TempFile diff --git a/earthdiagnostics/ocean/maxmoc.py b/earthdiagnostics/ocean/maxmoc.py index ab905b15..e403be00 100644 --- a/earthdiagnostics/ocean/maxmoc.py +++ b/earthdiagnostics/ocean/maxmoc.py @@ -1,12 +1,10 @@ # coding=utf-8 import netCDF4 import numpy as np -import os from bscearth.utils.log import Log from earthdiagnostics.constants import Basins from earthdiagnostics.box import Box from earthdiagnostics.diagnostic import Diagnostic, DiagnosticBasinOption, DiagnosticFloatOption -from earthdiagnostics.frequency import Frequencies from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.modelingrealm import ModelingRealms from earthdiagnostics.variable_type import VariableType diff --git a/earthdiagnostics/ocean/mxl.py b/earthdiagnostics/ocean/mxl.py index b3ac1eb6..fe531b76 100644 --- a/earthdiagnostics/ocean/mxl.py +++ b/earthdiagnostics/ocean/mxl.py @@ -54,16 +54,19 @@ class Mxl(Diagnostic): job_list.append(Mxl(diags.data_manager, startdate, member, chunk)) return job_list + def request_data(self): + self.thetao_file = self.request_chunk(ModelingRealms.ocean, 'thetao', self.startdate, self.member, self.chunk) + self.so_file = self.request_chunk(ModelingRealms.ocean, 'so', self.startdate, self.member, self.chunk) + + def declare_data_generated(self): + self.mlotst_file = self.declare_chunk(ModelingRealms.ocean, 'mlotst', self.startdate, self.member, self.chunk) + def compute(self): """ Runs the diagnostic """ temp = TempFile.get() - thetao = self.data_manager.get_file(ModelingRealms.ocean, 'thetao', self.startdate, self.member, self.chunk) - so = self.data_manager.get_file(ModelingRealms.ocean, 'so', self.startdate, self.member, self.chunk) - cdftools.run('cdfmxl', input=[thetao, so], output=temp, options='-nc4') - os.remove(thetao) - os.remove(so) + cdftools.run('cdfmxl', input=[self.thetao_file, self.so_file], output=temp, options='-nc4') temp2 = TempFile.get() source = Utils.openCdf(temp) destiny = Utils.openCdf(temp2, 'w') @@ -72,6 +75,5 @@ class Mxl(Diagnostic): Utils.copy_variable(source, destiny, 'lon') source.close() destiny.close() - self.send_file(temp2, ModelingRealms.ocean, 'mlotst', self.startdate, self.member, self.chunk, - rename_var='somxl010') + self.mlotst_file.set_local_file(temp2, rename_var='somxl010') os.remove(temp) diff --git a/earthdiagnostics/ocean/rotation.py b/earthdiagnostics/ocean/rotation.py index f9e758a5..797a9385 100644 --- a/earthdiagnostics/ocean/rotation.py +++ b/earthdiagnostics/ocean/rotation.py @@ -26,8 +26,6 @@ class Rotation(Diagnostic): :type member: int :param chunk: chunk's number :type chunk: int - :param variable: variable's name - :type variable: str :param domain: variable's domain :type domain: Domain """ diff --git a/earthdiagnostics/ocean/siasiesiv.py b/earthdiagnostics/ocean/siasiesiv.py index 1dd8a604..bd1fe58f 100644 --- a/earthdiagnostics/ocean/siasiesiv.py +++ b/earthdiagnostics/ocean/siasiesiv.py @@ -101,6 +101,7 @@ class Siasiesiv(Diagnostic): def _declare_var(self, var_name): self.generated[var_name] = self.declare_chunk(ModelingRealms.seaIce, var_name, self.startdate, self.member, self.chunk, region=self.basin.fullname) + def compute(self): """ Runs the diagnostic diff --git a/earthdiagnostics/publisher.py b/earthdiagnostics/publisher.py index f0fbb5d8..d2b6ecc0 100644 --- a/earthdiagnostics/publisher.py +++ b/earthdiagnostics/publisher.py @@ -1,3 +1,4 @@ +# coding=utf-8 class Publisher(object): """ Base class to provide functionality to notify updates to other objects diff --git a/earthdiagnostics/threddsmanager.py b/earthdiagnostics/threddsmanager.py index 96ba2edd..1bdfdb95 100644 --- a/earthdiagnostics/threddsmanager.py +++ b/earthdiagnostics/threddsmanager.py @@ -2,11 +2,10 @@ import os from bscearth.utils.date import parse_date, add_months, chunk_start_date, chunk_end_date -from earthdiagnostics.datamanager import DataManager, NetCDFFile +from earthdiagnostics.datamanager import DataManager from earthdiagnostics.utils import TempFile, Utils from datetime import datetime -from earthdiagnostics.variable import VariableManager from earthdiagnostics.variable_type import VariableType @@ -195,6 +194,7 @@ class THREDDSManager(DataManager): """ Creates the link of a given file from the CMOR repository. + :param cmor_var: :param move_old: :param date_str: :param year: if frequency is yearly, this parameter is used to give the corresponding year diff --git a/earthdiagnostics/variable.py b/earthdiagnostics/variable.py index d0bce08d..b696ef2a 100644 --- a/earthdiagnostics/variable.py +++ b/earthdiagnostics/variable.py @@ -271,7 +271,8 @@ class VariableManager(object): except Exception as ex: Log.error('Table {0} can not be loaded: {1}', sheet_name, ex) - def _process_modelling_realm(self, var, value): + @staticmethod + def _process_modelling_realm(var, value): if value is None: value = '' modelling_realm = value.split(' ') @@ -374,7 +375,8 @@ class Variable(object): return CMORTable(table_name, frequency, 'December 2013') return self.tables[0] - def _select_most_specific(self, parsed): + @staticmethod + def _select_most_specific(parsed): parsed = set(parsed) if {ModelingRealms.land, ModelingRealms.landIce} == parsed: return ModelingRealms.landIce diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index 439db356..178b360e 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -150,7 +150,8 @@ class WorkManager(object): Log.error('Total wasted time: {0}', sum([job.consumed_time for job in failed], datetime.timedelta())) Log.info('') - def _run_job(self, job): + @staticmethod + def _run_job(job): Log.info('Starting {0}', job) job.status = DiagnosticStatus.RUNNING time = datetime.datetime.now() diff --git a/test/unit/__init__.py b/test/unit/__init__.py index caa99542..ab34e265 100644 --- a/test/unit/__init__.py +++ b/test/unit/__init__.py @@ -8,12 +8,12 @@ from test_cdftools import TestCDFTools from test_utils import TestTempFile, TestUtils from test_psi import TestPsi from test_areamoc import TestAreaMoc -from test_averagesection import TestAverageSection +# from test_averagesection import TestAverageSection from test_cutsection import TestCutSection from test_convectionsites import TestConvectionSites from test_frequency import TestFrequency from test_gyres import TestGyres -from test_heatcontent import TestHeatContent +# from test_heatcontent import TestHeatContent from test_heatcontentlayer import TestHeatContentLayer from test_interpolate import TestInterpolate from test_maxmoc import TestMaxMoc diff --git a/test/unit/test_earthdiags.py b/test/unit/test_earthdiags.py index 5dc657b8..122793ec 100644 --- a/test/unit/test_earthdiags.py +++ b/test/unit/test_earthdiags.py @@ -1,8 +1,8 @@ -# coding=utf-8 -from unittest import TestCase - -from earthdiagnostics.earthdiags import EarthDiags - - -class TestEarthDiags(TestCase): - pass +# # coding=utf-8 +# from unittest import TestCase +# +# from earthdiagnostics.earthdiags import EarthDiags +# +# +# class TestEarthDiags(TestCase): +# pass -- GitLab From a0af02e7e244f52d0bbe53eaed29db7bdf0c0e5f Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Fri, 12 May 2017 18:43:39 +0200 Subject: [PATCH 35/82] THREDDS manager adapted to new way of working (not finished) --- earthdiagnostics/datafile.py | 2 +- earthdiagnostics/datamanager.py | 25 ++++++ earthdiagnostics/diagnostic.py | 31 ------- .../statistics/monthlypercentile.py | 61 ++++++++++---- earthdiagnostics/threddsmanager.py | 83 ++++++++++++++++++- 5 files changed, 153 insertions(+), 49 deletions(-) diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index 541741f7..19cd6a8c 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -124,7 +124,7 @@ class DataFile(Publisher): return new_object def download(self): - raise NotImplementedError() + raise NotImplementedError('Class must implement the download method') def prepare_to_upload(self, rename_var): Utils.convert2netcdf4(self.local_file) diff --git a/earthdiagnostics/datamanager.py b/earthdiagnostics/datamanager.py index 3fbbfda0..94017110 100644 --- a/earthdiagnostics/datamanager.py +++ b/earthdiagnostics/datamanager.py @@ -163,6 +163,31 @@ class DataManager(object): """ pass + def request_chunk(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=None): + """ + Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + + :param domain: CMOR domain + :type domain: Domain + :param var: variable name + :type var: str + :param startdate: file's startdate + :type startdate: str + :param member: file's member + :type member: int + :param chunk: file's chunk + :type chunk: int + :param grid: file's grid (only needed if it is not the original) + :type grid: str|NoneType + :param box: file's box (only needed to retrieve sections or averages) + :type box: Box + :param frequency: file's frequency (only needed if it is different from the default) + :type frequency: Frequency|NoneType + :return: path to the copy created on the scratch folder + :rtype: str + """ + raise NotImplementedError('Class must override request_chunk method') + class UnitConversion(object): """ diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index 0db60e59..4b305652 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -89,37 +89,6 @@ class Diagnostic(Publisher): return Diagnostic._diag_list[name] return None - def send_file(self, filetosend, domain, var, startdate, member, chunk=None, grid=None, region=None, - box=None, rename_var=None, frequency=None, year=None, date_str=None, move_old=False, - vartype=VariableType.MEAN): - """ - - :param filetosend: - :param domain: - :type domain: ModelingRealm - :param var: - :param startdate: - :param member: - :param chunk: - :param grid: - :param region: - :param box: - :param rename_var: - :param frequency: - :type frequency: Frequency - :param year: - :param date_str: - :param move_old: - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType - :return: - """ - if isinstance(region, Basin): - region = region.fullname - self.data_manager.send_file(filetosend, domain, var, startdate, member, chunk, grid, region, - box, rename_var, frequency, year, date_str, move_old, diagnostic=self, - vartype=vartype) - def compute(self): """ Calculates the diagnostic and stores the output diff --git a/earthdiagnostics/statistics/monthlypercentile.py b/earthdiagnostics/statistics/monthlypercentile.py index eab4ea02..aedc97f4 100644 --- a/earthdiagnostics/statistics/monthlypercentile.py +++ b/earthdiagnostics/statistics/monthlypercentile.py @@ -59,24 +59,52 @@ class MonthlyPercentile(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticOption('domain'), - DiagnosticDomainOption('variable'), - DiagnosticListIntOption('percentiles', None, 0, 100)) + options_available = (DiagnosticDomainOption('domain'), + DiagnosticOption('variable'), + DiagnosticListIntOption('percentiles', [], 0, 100)) options = cls.process_options(options, options_available) job_list = list() for startdate, member, chunk in diags.config.experiment.get_chunk_list(): job_list.append(MonthlyPercentile(diags.data_manager, startdate, member, chunk, - options['variable'], options['domain'], options['percentiles'])) + options['domain'], options['variable'], options['percentiles'])) return job_list + def request_data(self): + self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk) + + def declare_data_generated(self): + self.max_file = self.declare_chunk(self.domain, self.variable_max, self.startdate, self.member, self.chunk, + frequency=Frequencies.monthly, vartype=VariableType.STATISTIC) + self.min_file = self.declare_chunk(self.domain, self.variable_min, self.startdate, self.member, self.chunk, + frequency=Frequencies.monthly, vartype=VariableType.STATISTIC) + self.percentile_file = {} + for percentile in self.percentiles: + self.percentile_file[percentile] = self.declare_chunk(self.domain, self.percentile(percentile), + self.startdate, self.member, self.chunk, + frequency=Frequencies.monthly, + vartype=VariableType.STATISTIC) + + self.declare_chunk(self.domain, '{0}_q{1}'.format(self.variable, percentile), self.startdate, + self.member, self.chunk, frequency=Frequencies.monthly, vartype=VariableType.STATISTIC) + + @property + def variable_max(self): + return '{0}max'.format(self.variable) + + @property + def variable_min(self): + return '{0}min'.format(self.variable) + + def percentile(self, percentile): + return '{0}_q{1}'.format(self.variable, percentile) + def compute(self): """ Runs the diagnostic """ - variable_file = self.data_manager.get_file(self.domain, self.variable, self.startdate, self.member, self.chunk) temp = TempFile.get() - handler = Utils.openCdf(variable_file) + handler = Utils.openCdf(self.variable_file) datetimes = Utils.get_datetime_from_netcdf(handler) handler.close() @@ -97,25 +125,30 @@ class MonthlyPercentile(Diagnostic): if start_index != 0 or end_index != datetimes.size - 1: start_date = '{0.year}-{0.month}-{0.day}'.format(datetimes[start_index]) end_date = '{0.year}-{0.month}-{0.day}'.format(datetimes[end_index]) - Utils.cdo.seldate('{0},{1}'.format(start_date, end_date), input=variable_file, output=temp) + Utils.cdo.seldate('{0},{1}'.format(start_date, end_date), input=self.variable_file, output=temp) Utils.rename_variable(temp, 'lev', 'ensemble', False, True) - shutil.move(temp, variable_file) + else: + temp = Utils.copy_file(self.variable_file, temp) Log.debug('Computing minimum') monmin_file = TempFile.get() - Utils.cdo.monmin(input=variable_file, output=monmin_file) + Utils.cdo.monmin(input=temp, output=monmin_file) Log.debug('Computing maximum') monmax_file = TempFile.get() - Utils.cdo.monmax(input=variable_file, output=monmax_file) + Utils.cdo.monmax(input=temp, output=monmax_file) for percentile in self.percentiles: Log.debug('Computing percentile {0}', percentile) - Utils.cdo.monpctl(str(percentile), input=[variable_file, monmin_file, monmax_file], output=temp) + Utils.cdo.monpctl(str(percentile), input=[temp, monmin_file, monmax_file], output=temp) Utils.rename_variable(temp, 'lev', 'ensemble', False, True) - self.send_file(temp, self.domain, '{0}_q{1}'.format(self.variable, percentile), self.startdate, - self.member, self.chunk, frequency=Frequencies.monthly, rename_var=self.variable, - vartype=VariableType.STATISTIC) + self.percentiles[percentile].set_local_file(temp, rename_var=self.variable) + + Utils.rename_variable(monmax_file, 'lev', 'ensemble', False, True) + self.max_file.set_local_file(monmax_file, rename_var=self.variable) + + Utils.rename_variable(monmin_file, 'lev', 'ensemble', False, True) + self.max_file.set_local_file(monmin_file, rename_var=self.variable) diff --git a/earthdiagnostics/threddsmanager.py b/earthdiagnostics/threddsmanager.py index 1bdfdb95..88313172 100644 --- a/earthdiagnostics/threddsmanager.py +++ b/earthdiagnostics/threddsmanager.py @@ -2,6 +2,7 @@ import os from bscearth.utils.date import parse_date, add_months, chunk_start_date, chunk_end_date +from datafile import DataFile from earthdiagnostics.datamanager import DataManager from earthdiagnostics.utils import TempFile, Utils from datetime import datetime @@ -50,6 +51,7 @@ class THREDDSManager(DataManager): Utils.cdo.selmonth(selected_months, input=thredds_subset, output=temp) return temp + # noinspection PyUnusedLocal def file_exists(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=VariableType.MEAN): """ @@ -83,7 +85,7 @@ class THREDDSManager(DataManager): end_chunk = chunk_end_date(start_chunk, self.experiment.chunk_size, 'month', self.experiment.calendar) thredds_subset = THREDDSSubset(aggregation_path, var, start_chunk, end_chunk) - return thredds_subset.check() + return thredds_subset def get_file_path(self, startdate, domain, var, frequency, vartype, box=None, grid=None): @@ -221,14 +223,89 @@ class THREDDSManager(DataManager): # THREDDSManager does not require links pass + def request_chunk(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, + vartype=VariableType.MEAN): + """ + Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + + :param vartype: + :param domain: CMOR domain + :type domain: Domain + :param var: variable name + :type var: str + :param startdate: file's startdate + :type startdate: str + :param member: file's member + :type member: int + :param chunk: file's chunk + :type chunk: int + :param grid: file's grid (only needed if it is not the original) + :type grid: str|NoneType + :param box: file's box (only needed to retrieve sections or averages) + :type box: Box + :param frequency: file's frequency (only needed if it is different from the default) + :type frequency: Frequency|NoneType + :return: path to the copy created on the scratch folder + :rtype: str + """ + aggregation_path = self.get_var_url(var, startdate, frequency, box, vartype) + file_path = self.get_file_path(startdate, domain, var, frequency, vartype, box=box) + + start_chunk = chunk_start_date(parse_date(startdate), chunk, self.experiment.chunk_size, 'month', + self.experiment.calendar) + end_chunk = chunk_end_date(start_chunk, self.experiment.chunk_size, 'month', self.experiment.calendar) + + thredds_subset = THREDDSSubset(aggregation_path, file_path, var, start_chunk, end_chunk) + return thredds_subset + + def declare_chunk(self, domain, var, startdate, member, chunk, grid=None, region=None, box=None, frequency=None, + vartype=VariableType.MEAN, diagnostic=None): + """ + Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + + :param diagnostic: + :param region: + :param domain: CMOR domain + :type domain: Domain + :param var: variable name + :type var: str + :param startdate: file's startdate + :type startdate: str + :param member: file's member + :type member: int + :param chunk: file's chunk + :type chunk: int + :param grid: file's grid (only needed if it is not the original) + :type grid: str|NoneType + :param box: file's box (only needed to retrieve sections or averages) + :type box: Box + :param frequency: file's frequency (only needed if it is different from the default) + :type frequency: Frequency|NoneType + :param vartype: Variable type (mean, statistic) + :type vartype: VariableType + :return: path to the copy created on the scratch folder + :rtype: str + """ + aggregation_path = self.get_var_url(var, startdate, frequency, box, vartype) + file_path = self.get_file_path(startdate, domain, var, frequency, vartype, box=box) + + start_chunk = chunk_start_date(parse_date(startdate), chunk, self.experiment.chunk_size, 'month', + self.experiment.calendar) + end_chunk = chunk_end_date(start_chunk, self.experiment.chunk_size, 'month', self.experiment.calendar) + + thredds_subset = THREDDSSubset(aggregation_path, file_path, var, start_chunk, end_chunk) + return thredds_subset + class THREDDSError(Exception): pass -class THREDDSSubset: - def __init__(self, thredds_path, var, start_time, end_time): +class THREDDSSubset(DataFile): + def __init__(self, thredds_path, file_path, var, start_time, end_time): + super(THREDDSSubset, self).__init__() self.thredds_path = thredds_path + self.file_path = file_path self.var = var self.dimension_indexes = {} self.handler = None -- GitLab From 8976636d58e6d9bb389c33f5ba139b2114f07476 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 23 May 2017 11:06:38 +0200 Subject: [PATCH 36/82] Updated diagnostics to work with the new paradigm --- earthdiagnostics/general/module.py | 27 +++++++++++++--------- earthdiagnostics/ocean/verticalgradient.py | 17 +++++++++----- earthdiagnostics/work_manager.py | 2 ++ 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/earthdiagnostics/general/module.py b/earthdiagnostics/general/module.py index 8d08020c..e4518dd3 100644 --- a/earthdiagnostics/general/module.py +++ b/earthdiagnostics/general/module.py @@ -1,6 +1,6 @@ # coding=utf-8 from earthdiagnostics.diagnostic import * -from earthdiagnostics.utils import Utils +from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.modelingrealm import ModelingRealm import numpy as np import math @@ -81,18 +81,24 @@ class Module(Diagnostic): options['grid'])) return job_list + def request_data(self): + self.component_u_file = self.data_manager.get_file(self.domain, self.componentu, self.startdate, self.member, + self.chunk, grid=self.grid) + self.component_v_file = self.data_manager.get_file(self.domain, self.componentv, self.startdate, self.member, + self.chunk, grid=self.grid) + + def declare_data_generated(self): + self.module_file = self.declare_chunk(self.domain, self.module, self.startdate, self.member, self.chunk, + grid=self.grid) + def compute(self): """ Runs the diagnostic """ - component_u_file = self.data_manager.get_file(self.domain, self.componentu, self.startdate, self.member, - self.chunk, grid=self.grid) - - component_v_file = self.data_manager.get_file(self.domain, self.componentv, self.startdate, self.member, - self.chunk, grid=self.grid) - - component_u = Utils.openCdf(component_u_file) - component_v = Utils.openCdf(component_v_file) + temp = TempFile.get() + Utils.copy_file(self.component_u_file, temp) + component_u = Utils.openCdf(temp) + component_v = Utils.openCdf(self.component_v_file) variable_u = component_u.variables[self.componentu] variable_v = component_v.variables[self.componentv] @@ -106,5 +112,4 @@ class Module(Diagnostic): component_u.close() component_v.close() - self.send_file(component_u_file, self.domain, self.module, self.startdate, self.member, self.chunk, - grid=self.grid, rename_var=self.componentu) + self.module_file.set_local_file(temp, rename_var=self.componentu) diff --git a/earthdiagnostics/ocean/verticalgradient.py b/earthdiagnostics/ocean/verticalgradient.py index 1893aac8..9fe3d913 100644 --- a/earthdiagnostics/ocean/verticalgradient.py +++ b/earthdiagnostics/ocean/verticalgradient.py @@ -81,14 +81,20 @@ class VerticalGradient(Diagnostic): options['variable'], box)) return job_list + def request_data(self): + self.variable_file = self.data_manager.get_file(ModelingRealms.ocean, self.variable, self.startdate, + self.member, self.chunk) + + + def declare_data_generated(self): + self.gradient_file = self.declare_chunk(ModelingRealms.ocean, self.variable + 'vgrad', + self.startdate, self.member, self.chunk, box=self.box) + def compute(self): """ Runs the diagnostic """ - variable_file = self.data_manager.get_file(ModelingRealms.ocean, self.variable, self.startdate, self.member, - self.chunk) - - handler = Utils.openCdf(variable_file) + handler = Utils.openCdf(self.variable_file) if 'lev' not in handler.dimensions: raise Exception('Variable {0} does not have a level dimension') var_handler = handler.variables[self.variable] @@ -109,6 +115,5 @@ class VerticalGradient(Diagnostic): new_var.long_name += ' Vertical gradient' new_var.standard_name += '_vertical_gradient' - self.send_file(temp, ModelingRealms.ocean, self.variable + 'vgrad', self.startdate, self.member, self.chunk, - box=self.box) + self.gradient_file.set_local_file(temp) diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index 178b360e..73545a0a 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -192,6 +192,7 @@ class WorkManager(object): Diagnostic.register(RelinkAll) Diagnostic.register(Scale) Diagnostic.register(Attribute) + Diagnostic.register(Module) @staticmethod def _register_ocean_diagnostics(): @@ -214,5 +215,6 @@ class WorkManager(object): Diagnostic.register(HeatContent) Diagnostic.register(RegionMean) Diagnostic.register(Rotation) + Diagnostic.register(VerticalGradient) -- GitLab From 35d2347749ae8f468cb7ee47801bbc26e6d16488 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 23 May 2017 12:06:06 +0200 Subject: [PATCH 37/82] Thredds manager now working with new scheme --- VERSION | 2 +- earthdiagnostics/cmormanager.py | 99 ------------------- earthdiagnostics/datafile.py | 12 --- earthdiagnostics/ocean/verticalgradient.py | 5 +- .../statistics/monthlypercentile.py | 8 +- earthdiagnostics/threddsmanager.py | 33 +++++-- 6 files changed, 33 insertions(+), 126 deletions(-) diff --git a/VERSION b/VERSION index bdb0fbcd..c4dd8a8a 100644 --- a/VERSION +++ b/VERSION @@ -1,2 +1,2 @@ -3.0.0b51 +3.0.0rc1 diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index c92e501a..e07cdebd 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -57,39 +57,6 @@ class CMORManager(DataManager): except Exception: return False - def get_file(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None,): - """ - Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy - - :param domain: CMOR domain - :type domain: Domain - :param var: variable name - :type var: str - :param startdate: file's startdate - :type startdate: str - :param member: file's member - :type member: int - :param chunk: file's chunk - :type chunk: int - :param grid: file's grid (only needed if it is not the original) - :type grid: str|NoneType - :param box: file's box (only needed to retrieve sections or averages) - :type box: Box - :param frequency: file's frequency (only needed if it is different from the default) - :type frequency: Frequency|NoneType - :param vartype: Variable type (mean, statistic) - :type vartype: VariableType - :return: path to the copy created on the scratch folder - :rtype: str - """ - cmor_var = self.variable_list.get_variable(var) - var = self._get_final_var_name(box, var) - filepath = self.get_file_path(startdate, member, domain, var, cmor_var, chunk, frequency, grid, None, None) - - temp_path = TempFile.get() - Utils.copy_file(filepath, temp_path) - return temp_path - def request_chunk(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None): """ Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy @@ -359,72 +326,6 @@ class CMORManager(DataManager): grid=grid, year=str(year), date_str=date_str) self.create_link(domain, filepath, frequency, var, grid, move_old, vartype) - def get_year(self, domain, var, startdate, member, year, grid=None, box=None): - """ - Ge a file containing all the data for one year for one variable - :param domain: variable's domain - :type domain: str - :param var: variable's name - :type var: str - :param startdate: startdate to retrieve - :type startdate: str - :param member: member to retrieve - :type member: int - :param year: year to retrieve - :type year: int - :param grid: variable's grid - :type grid: str - :param box: variable's box - :type box: Box - :return: - """ - - chunk_files = list() - for chunk in self.experiment.get_year_chunks(startdate, year): - chunk_files.append(self.get_file(domain, var, startdate, member, chunk, grid=grid, box=box)) - - if len(chunk_files) > 1: - temp = self._merge_chunk_files(chunk_files) - else: - temp = chunk_files[0] - temp2 = self._select_data_of_given_year(temp, year) - os.remove(temp) - return temp2 - - @staticmethod - def _select_data_of_given_year(data_file, year): - temp2 = TempFile.get() - handler = Utils.openCdf(data_file) - times = Utils.get_datetime_from_netcdf(handler) - x = 0 - first_index = None - last_index = None - while x < times.size: - if times[x].year == year: - first_index = x - break - else: - x += 1 - - while x < times.size: - if times[x].year != year: - last_index = x - break - else: - x += 1 - if last_index is None: - last_index = times.size - Utils.nco.ncks(input=data_file, output=temp2, options=['-d time,{0},{1}'.format(first_index, last_index - 1)]) - return temp2 - - @staticmethod - def _merge_chunk_files(chunk_files): - temp = TempFile.get() - Utils.nco.ncrcat(input=' '.join(chunk_files), output=temp) - for chunk_file in chunk_files: - os.remove(chunk_file) - return temp - # noinspection PyPep8Naming def prepare(self): """ diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index 19cd6a8c..3c2e2215 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -424,18 +424,6 @@ class UnitConversion(object): return None, None -class THREDDSData(DataFile): - - def download(self): - self.local_path = TempFile.get() - Utils.execute_shell_command(['nccopy', '-s', '-d', '-4', self.remote_file, self.local_path]) - if not Utils.check_netcdf_file(self.local_path): - self.message = 'Can not retrieve {0} from server'.format(self.remote_file) - self.storage_status = LocalStatus.FAILED - return - self.status = LocalStatus.READY - - class NetCDFFile(DataFile): def download(self): diff --git a/earthdiagnostics/ocean/verticalgradient.py b/earthdiagnostics/ocean/verticalgradient.py index 9fe3d913..14505f53 100644 --- a/earthdiagnostics/ocean/verticalgradient.py +++ b/earthdiagnostics/ocean/verticalgradient.py @@ -82,9 +82,8 @@ class VerticalGradient(Diagnostic): return job_list def request_data(self): - self.variable_file = self.data_manager.get_file(ModelingRealms.ocean, self.variable, self.startdate, - self.member, self.chunk) - + self.variable_file = self.request_chunk(ModelingRealms.ocean, self.variable, self.startdate, + self.member, self.chunk) def declare_data_generated(self): self.gradient_file = self.declare_chunk(ModelingRealms.ocean, self.variable + 'vgrad', diff --git a/earthdiagnostics/statistics/monthlypercentile.py b/earthdiagnostics/statistics/monthlypercentile.py index aedc97f4..73ff6798 100644 --- a/earthdiagnostics/statistics/monthlypercentile.py +++ b/earthdiagnostics/statistics/monthlypercentile.py @@ -104,7 +104,7 @@ class MonthlyPercentile(Diagnostic): Runs the diagnostic """ temp = TempFile.get() - handler = Utils.openCdf(self.variable_file) + handler = Utils.openCdf(self.variable_file.local_file) datetimes = Utils.get_datetime_from_netcdf(handler) handler.close() @@ -125,10 +125,10 @@ class MonthlyPercentile(Diagnostic): if start_index != 0 or end_index != datetimes.size - 1: start_date = '{0.year}-{0.month}-{0.day}'.format(datetimes[start_index]) end_date = '{0.year}-{0.month}-{0.day}'.format(datetimes[end_index]) - Utils.cdo.seldate('{0},{1}'.format(start_date, end_date), input=self.variable_file, output=temp) + Utils.cdo.seldate('{0},{1}'.format(start_date, end_date), input=self.variable_file.local_file, output=temp) Utils.rename_variable(temp, 'lev', 'ensemble', False, True) else: - temp = Utils.copy_file(self.variable_file, temp) + Utils.copy_file(self.variable_file.local_file, temp) Log.debug('Computing minimum') monmin_file = TempFile.get() @@ -148,7 +148,7 @@ class MonthlyPercentile(Diagnostic): self.max_file.set_local_file(monmax_file, rename_var=self.variable) Utils.rename_variable(monmin_file, 'lev', 'ensemble', False, True) - self.max_file.set_local_file(monmin_file, rename_var=self.variable) + self.min_file.set_local_file(monmin_file, rename_var=self.variable) diff --git a/earthdiagnostics/threddsmanager.py b/earthdiagnostics/threddsmanager.py index 88313172..97c85176 100644 --- a/earthdiagnostics/threddsmanager.py +++ b/earthdiagnostics/threddsmanager.py @@ -1,8 +1,9 @@ # coding=utf-8 import os from bscearth.utils.date import parse_date, add_months, chunk_start_date, chunk_end_date +from bscearth.utils.log import Log -from datafile import DataFile +from datafile import DataFile, StorageStatus, LocalStatus from earthdiagnostics.datamanager import DataManager from earthdiagnostics.utils import TempFile, Utils from datetime import datetime @@ -256,6 +257,8 @@ class THREDDSManager(DataManager): end_chunk = chunk_end_date(start_chunk, self.experiment.chunk_size, 'month', self.experiment.calendar) thredds_subset = THREDDSSubset(aggregation_path, file_path, var, start_chunk, end_chunk) + thredds_subset.local_status = LocalStatus.PENDING + self.requested_files[file_path] = thredds_subset return thredds_subset def declare_chunk(self, domain, var, startdate, member, chunk, grid=None, region=None, box=None, frequency=None, @@ -292,8 +295,16 @@ class THREDDSManager(DataManager): start_chunk = chunk_start_date(parse_date(startdate), chunk, self.experiment.chunk_size, 'month', self.experiment.calendar) end_chunk = chunk_end_date(start_chunk, self.experiment.chunk_size, 'month', self.experiment.calendar) + final_name = self._get_final_var_name(box, var) + if file_path in self.requested_files: + thredds_subset = self.requested_files[file_path] + else: + thredds_subset = THREDDSSubset(aggregation_path, file_path, var, start_chunk, end_chunk) + self.requested_files[file_path] = thredds_subset - thredds_subset = THREDDSSubset(aggregation_path, file_path, var, start_chunk, end_chunk) + thredds_subset.final_name = final_name + thredds_subset.diagnostic = diagnostic + thredds_subset.storage_status = StorageStatus.PENDING return thredds_subset @@ -305,7 +316,7 @@ class THREDDSSubset(DataFile): def __init__(self, thredds_path, file_path, var, start_time, end_time): super(THREDDSSubset, self).__init__() self.thredds_path = thredds_path - self.file_path = file_path + self.remote_file = file_path self.var = var self.dimension_indexes = {} self.handler = None @@ -323,7 +334,15 @@ class THREDDSSubset(DataFile): def download(self): url = self.get_url() - return self._download_url(url) + if not self.local_file: + self.local_file = TempFile.get() + Utils.execute_shell_command(['nccopy', '-s', '-d', '4', url, self.local_file]) + if not Utils.check_netcdf_file(self.local_file): + Log.error('Can not retrieve {0} from server'.format(url)) + self.local_status = LocalStatus.FAILED + return + Log.info('Request {0} ready!', url) + self.local_status = LocalStatus.READY def check(self): # noinspection PyBroadException @@ -360,12 +379,12 @@ class THREDDSSubset(DataFile): time_end += 1 self.dimension_indexes['time'] = (time_start, time_end) - @staticmethod - def _download_url(url): + def _download_url(self, url): temp = TempFile.get() - Utils.execute_shell_command(['nccopy', '-s', '-d', '-4', url, temp]) + Utils.execute_shell_command(['nccopy', '-s', '-d', '4', url, temp]) if not Utils.check_netcdf_file(temp): raise THREDDSError('Can not retrieve {0} from server'.format(url)) + self.local_status = LocalStatus.READY return temp def _get_subset_url(self): -- GitLab From da3ea5e4a3fc0568b52d005ffdc4c34bbaefe002 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 23 May 2017 12:18:44 +0200 Subject: [PATCH 38/82] Improved metadata --- earthdiagnostics/earthdiags.py | 4 ++++ earthdiagnostics/statistics/monthlypercentile.py | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/earthdiagnostics/earthdiags.py b/earthdiagnostics/earthdiags.py index 146d57c8..bc63357d 100755 --- a/earthdiagnostics/earthdiags.py +++ b/earthdiagnostics/earthdiags.py @@ -221,6 +221,10 @@ class EarthDiags(object): file_handler.close() def _prepare_mesh_files(self): + model_version = self.config.experiment.model_version + if not model_version: + Log.info('No model version defined. Skipping mesh files copy!') + return Log.info('Copying mesh files') con_files = self.config.con_files model_version = self.config.experiment.model_version diff --git a/earthdiagnostics/statistics/monthlypercentile.py b/earthdiagnostics/statistics/monthlypercentile.py index 73ff6798..0d439bc0 100644 --- a/earthdiagnostics/statistics/monthlypercentile.py +++ b/earthdiagnostics/statistics/monthlypercentile.py @@ -142,12 +142,21 @@ class MonthlyPercentile(Diagnostic): Log.debug('Computing percentile {0}', percentile) Utils.cdo.monpctl(str(percentile), input=[temp, monmin_file, monmax_file], output=temp) Utils.rename_variable(temp, 'lev', 'ensemble', False, True) + handler = Utils.openCdf(monmax_file) + handler.variables[self.variable].long_name += ' {0} Percentile'.format(percentile) + handler.close() self.percentiles[percentile].set_local_file(temp, rename_var=self.variable) Utils.rename_variable(monmax_file, 'lev', 'ensemble', False, True) + handler = Utils.openCdf(monmax_file) + handler.variables[self.variable].long_name += ' Monthly Maximum' + handler.close() self.max_file.set_local_file(monmax_file, rename_var=self.variable) Utils.rename_variable(monmin_file, 'lev', 'ensemble', False, True) + handler = Utils.openCdf(monmin_file) + handler.variables[self.variable].long_name += ' Monthly Minimum' + handler.close() self.min_file.set_local_file(monmin_file, rename_var=self.variable) -- GitLab From b9c97cf2b972058ff9e1a05fde2378f731baebca Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 23 May 2017 15:12:55 +0200 Subject: [PATCH 39/82] Working on option --- earthdiagnostics/diagnostic.py | 9 +++++++++ earthdiagnostics/general/attribute.py | 4 ++-- earthdiagnostics/general/dailymean.py | 6 +++--- earthdiagnostics/general/module.py | 2 +- earthdiagnostics/general/monthlymean.py | 4 ++-- earthdiagnostics/general/relink.py | 4 ++-- earthdiagnostics/general/rewrite.py | 4 ++-- earthdiagnostics/general/scale.py | 4 ++-- earthdiagnostics/general/select_levels.py | 2 +- earthdiagnostics/general/simplify_dimensions.py | 9 ++++----- earthdiagnostics/general/yearlymean.py | 6 +++--- earthdiagnostics/ocean/averagesection.py | 4 ++-- earthdiagnostics/ocean/cutsection.py | 4 ++-- earthdiagnostics/ocean/interpolate.py | 4 ++-- earthdiagnostics/ocean/interpolatecdo.py | 4 ++-- earthdiagnostics/ocean/regionmean.py | 4 ++-- earthdiagnostics/ocean/rotation.py | 2 +- earthdiagnostics/ocean/verticalgradient.py | 2 +- earthdiagnostics/ocean/verticalmean.py | 2 +- earthdiagnostics/ocean/verticalmeanmeters.py | 4 ++-- earthdiagnostics/statistics/climatologicalpercentile.py | 4 ++-- earthdiagnostics/statistics/monthlypercentile.py | 2 +- 22 files changed, 49 insertions(+), 41 deletions(-) diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index 4b305652..aa93f89c 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -320,6 +320,9 @@ class DiagnosticListFrequenciesOption(DiagnosticOption): class DiagnosticVariableOption(DiagnosticOption): + def __init__(self, name='variable', default_value=None): + super(DiagnosticVariableOption, self).__init__(name, default_value) + def parse(self, option_value): option_value = self.check_default(option_value) real_name = VariableManager().get_variable(option_value, False) @@ -342,11 +345,17 @@ class DiagnosticVariableListOption(DiagnosticOption): class DiagnosticDomainOption(DiagnosticOption): + def __init__(self, name='domain', default_value=None): + super(DiagnosticDomainOption, self).__init__(name, default_value) + def parse(self, option_value): return ModelingRealms.parse(self.check_default(option_value)) class DiagnosticFrequencyOption(DiagnosticOption): + def __init__(self, name='frequency', default_value=None): + super(DiagnosticFrequencyOption, self).__init__(name, default_value) + def parse(self, option_value): return Frequency.parse(self.check_default(option_value)) diff --git a/earthdiagnostics/general/attribute.py b/earthdiagnostics/general/attribute.py index d4dc308b..c2946517 100644 --- a/earthdiagnostics/general/attribute.py +++ b/earthdiagnostics/general/attribute.py @@ -65,8 +65,8 @@ class Attribute(Diagnostic): :return: """ - options_available = (DiagnosticVariableOption('variable'), - DiagnosticDomainOption('domain'), + options_available = (DiagnosticDomainOption(), + DiagnosticVariableOption(), DiagnosticOption('name'), DiagnosticComplexStrOption('value'), DiagnosticOption('grid', '')) diff --git a/earthdiagnostics/general/dailymean.py b/earthdiagnostics/general/dailymean.py index 984c6862..3ca48d24 100644 --- a/earthdiagnostics/general/dailymean.py +++ b/earthdiagnostics/general/dailymean.py @@ -70,9 +70,9 @@ class DailyMean(Diagnostic): :return: """ - options_available = (DiagnosticVariableOption('variable'), - DiagnosticDomainOption('domain'), - DiagnosticFrequencyOption('frequency'), + options_available = (DiagnosticDomainOption(), + DiagnosticVariableOption(), + DiagnosticFrequencyOption(), DiagnosticOption('grid', '')) options = cls.process_options(options, options_available) job_list = list() diff --git a/earthdiagnostics/general/module.py b/earthdiagnostics/general/module.py index e4518dd3..6d9a5fc0 100644 --- a/earthdiagnostics/general/module.py +++ b/earthdiagnostics/general/module.py @@ -68,7 +68,7 @@ class Module(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticDomainOption('domain'), + options_available = (DiagnosticDomainOption(), DiagnosticVariableOption('componentu'), DiagnosticVariableOption('componentv'), DiagnosticVariableOption('module'), diff --git a/earthdiagnostics/general/monthlymean.py b/earthdiagnostics/general/monthlymean.py index 0aea6c96..0a578106 100644 --- a/earthdiagnostics/general/monthlymean.py +++ b/earthdiagnostics/general/monthlymean.py @@ -68,8 +68,8 @@ class MonthlyMean(Diagnostic): :return: """ - options_available = (DiagnosticVariableOption('variable'), - DiagnosticDomainOption('domain'), + options_available = (DiagnosticDomainOption(), + DiagnosticVariableOption(), DiagnosticFrequencyOption('frequency', Frequencies.daily), DiagnosticOption('grid', '')) options = cls.process_options(options, options_available) diff --git a/earthdiagnostics/general/relink.py b/earthdiagnostics/general/relink.py index 5afb52ef..b5dcad61 100644 --- a/earthdiagnostics/general/relink.py +++ b/earthdiagnostics/general/relink.py @@ -63,8 +63,8 @@ class Relink(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticVariableOption('variable'), - DiagnosticDomainOption('domain'), + options_available = (DiagnosticDomainOption(), + DiagnosticVariableOption(), DiagnosticBoolOption('move_old', True), DiagnosticOption('grid', '')) options = cls.process_options(options, options_available) diff --git a/earthdiagnostics/general/rewrite.py b/earthdiagnostics/general/rewrite.py index a51e363b..6b881716 100644 --- a/earthdiagnostics/general/rewrite.py +++ b/earthdiagnostics/general/rewrite.py @@ -58,8 +58,8 @@ class Rewrite(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticVariableOption('variable'), - DiagnosticDomainOption('domain'), + options_available = (DiagnosticDomainOption(), + DiagnosticVariableOption(), DiagnosticOption('grid', '')) options = cls.process_options(options, options_available) job_list = list() diff --git a/earthdiagnostics/general/scale.py b/earthdiagnostics/general/scale.py index d1d2c69f..25306047 100644 --- a/earthdiagnostics/general/scale.py +++ b/earthdiagnostics/general/scale.py @@ -70,8 +70,8 @@ class Scale(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticVariableOption('variable'), - DiagnosticDomainOption('domain'), + options_available = (DiagnosticDomainOption(), + DiagnosticVariableOption(), DiagnosticFloatOption('value'), DiagnosticFloatOption('offset'), DiagnosticOption('grid', ''), diff --git a/earthdiagnostics/general/select_levels.py b/earthdiagnostics/general/select_levels.py index c5042d9b..85d27c27 100644 --- a/earthdiagnostics/general/select_levels.py +++ b/earthdiagnostics/general/select_levels.py @@ -65,7 +65,7 @@ class SelectLevels(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticDomainOption('domain'), + options_available = (DiagnosticDomainOption(), DiagnosticVariableListOption('variables'), DiagnosticIntOption('first_level'), DiagnosticIntOption('last_level'), diff --git a/earthdiagnostics/general/simplify_dimensions.py b/earthdiagnostics/general/simplify_dimensions.py index 581762f1..f9cbcc25 100644 --- a/earthdiagnostics/general/simplify_dimensions.py +++ b/earthdiagnostics/general/simplify_dimensions.py @@ -61,7 +61,7 @@ class SimplifyDimensions(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticDomainOption('domain'), + options_available = (DiagnosticDomainOption(), DiagnosticVariableListOption('variables'), DiagnosticOption('grid', '')) options = cls.process_options(options, options_available) @@ -79,8 +79,8 @@ class SimplifyDimensions(Diagnostic): grid=self.grid, to_modify=True) def declare_data_generated(self): - self.send_file = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, - grid=self.grid) + self.simplified = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + grid=self.grid) def compute(self): """ @@ -123,8 +123,7 @@ class SimplifyDimensions(Diagnostic): handler.close() new_file.close() - self.send_file(temp, self.domain, self.variable, self.startdate, self.member, self.chunk, - grid=self.grid) + self.simplified.set_local_file(temp) @staticmethod def _create_var(var_name, var_values, source, destiny): diff --git a/earthdiagnostics/general/yearlymean.py b/earthdiagnostics/general/yearlymean.py index 8906e591..8d46b24f 100644 --- a/earthdiagnostics/general/yearlymean.py +++ b/earthdiagnostics/general/yearlymean.py @@ -70,9 +70,9 @@ class YearlyMean(Diagnostic): :return: """ - options_available = (DiagnosticVariableOption('variable'), - DiagnosticDomainOption('domain'), - DiagnosticFrequencyOption('frequency'), + options_available = (DiagnosticDomainOption(), + DiagnosticVariableOption(), + DiagnosticFrequencyOption(), DiagnosticOption('grid', '')) options = cls.process_options(options, options_available) job_list = list() diff --git a/earthdiagnostics/ocean/averagesection.py b/earthdiagnostics/ocean/averagesection.py index c72e83e5..114d6d1b 100644 --- a/earthdiagnostics/ocean/averagesection.py +++ b/earthdiagnostics/ocean/averagesection.py @@ -65,8 +65,8 @@ class AverageSection(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticDomainOption('domain'), - DiagnosticVariableOption('variable'), + options_available = (DiagnosticDomainOption(), + DiagnosticVariableOption(), DiagnosticIntOption('min_lon'), DiagnosticIntOption('max_lon'), DiagnosticIntOption('min_lat'), diff --git a/earthdiagnostics/ocean/cutsection.py b/earthdiagnostics/ocean/cutsection.py index a616285b..2592c413 100644 --- a/earthdiagnostics/ocean/cutsection.py +++ b/earthdiagnostics/ocean/cutsection.py @@ -79,10 +79,10 @@ class CutSection(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticVariableOption('variable'), + options_available = (DiagnosticVariableOption(), DiagnosticBoolOption('zonal'), DiagnosticIntOption('value'), - DiagnosticDomainOption('domain', ModelingRealms.ocean)) + DiagnosticDomainOption(default_value=ModelingRealms.ocean)) options = cls.process_options(options, options_available) job_list = list() diff --git a/earthdiagnostics/ocean/interpolate.py b/earthdiagnostics/ocean/interpolate.py index 6b867330..99e024dc 100644 --- a/earthdiagnostics/ocean/interpolate.py +++ b/earthdiagnostics/ocean/interpolate.py @@ -84,8 +84,8 @@ class Interpolate(Diagnostic): :return: """ options_available = (DiagnosticOption('target_grid'), - DiagnosticVariableOption('variable'), - DiagnosticDomainOption('domain', ModelingRealms.ocean), + DiagnosticVariableOption(), + DiagnosticDomainOption(default_value=ModelingRealms.ocean), DiagnosticBoolOption('invert_lat', False), DiagnosticOption('original_grid', '')) options = cls.process_options(options, options_available) diff --git a/earthdiagnostics/ocean/interpolatecdo.py b/earthdiagnostics/ocean/interpolatecdo.py index 3b665ce7..fd18fcdb 100644 --- a/earthdiagnostics/ocean/interpolatecdo.py +++ b/earthdiagnostics/ocean/interpolatecdo.py @@ -80,8 +80,8 @@ class InterpolateCDO(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticDomainOption('domain', ModelingRealms.ocean), - DiagnosticVariableOption('variable'), + options_available = (DiagnosticDomainOption(default_value=ModelingRealms.ocean), + DiagnosticVariableOption(), DiagnosticOption('target_grid', diags.config.experiment.atmos_grid.lower()), DiagnosticChoiceOption('method', InterpolateCDO.METHODS, InterpolateCDO.BILINEAR), DiagnosticChoiceOption('method', InterpolateCDO.METHODS, InterpolateCDO.BILINEAR), diff --git a/earthdiagnostics/ocean/regionmean.py b/earthdiagnostics/ocean/regionmean.py index 5127268e..57673671 100644 --- a/earthdiagnostics/ocean/regionmean.py +++ b/earthdiagnostics/ocean/regionmean.py @@ -72,8 +72,8 @@ class RegionMean(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticDomainOption('domain'), - DiagnosticVariableOption('variable'), + options_available = (DiagnosticDomainOption(), + DiagnosticVariableOption(), DiagnosticOption('grid_point', 'T'), DiagnosticBasinOption('basin', Basins.Global), DiagnosticIntOption('min_depth', 0), diff --git a/earthdiagnostics/ocean/rotation.py b/earthdiagnostics/ocean/rotation.py index 797a9385..abbe3770 100644 --- a/earthdiagnostics/ocean/rotation.py +++ b/earthdiagnostics/ocean/rotation.py @@ -66,7 +66,7 @@ class Rotation(Diagnostic): """ options_available = (DiagnosticVariableOption('variableu'), DiagnosticVariableOption('variablev'), - DiagnosticDomainOption('domain', ModelingRealms.ocean), + DiagnosticDomainOption(default_value=ModelingRealms.ocean), DiagnosticOption('executable', '/home/Earth/jvegas/pyCharm/cfutools/interpolation/rotateUVorca')) options = cls.process_options(options, options_available) diff --git a/earthdiagnostics/ocean/verticalgradient.py b/earthdiagnostics/ocean/verticalgradient.py index 14505f53..761216b8 100644 --- a/earthdiagnostics/ocean/verticalgradient.py +++ b/earthdiagnostics/ocean/verticalgradient.py @@ -64,7 +64,7 @@ class VerticalGradient(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticVariableOption('variable'), + options_available = (DiagnosticVariableOption(), DiagnosticIntOption('upper_level', 1), DiagnosticIntOption('low_level', 2)) options = cls.process_options(options, options_available) diff --git a/earthdiagnostics/ocean/verticalmean.py b/earthdiagnostics/ocean/verticalmean.py index 7ae15195..c4ebd7fd 100644 --- a/earthdiagnostics/ocean/verticalmean.py +++ b/earthdiagnostics/ocean/verticalmean.py @@ -64,7 +64,7 @@ class VerticalMean(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticVariableOption('variable'), + options_available = (DiagnosticVariableOption(), DiagnosticIntOption('min_depth', -1), DiagnosticIntOption('max_depth', -1)) options = cls.process_options(options, options_available) diff --git a/earthdiagnostics/ocean/verticalmeanmeters.py b/earthdiagnostics/ocean/verticalmeanmeters.py index c38056f4..7cdc9b7c 100644 --- a/earthdiagnostics/ocean/verticalmeanmeters.py +++ b/earthdiagnostics/ocean/verticalmeanmeters.py @@ -65,10 +65,10 @@ class VerticalMeanMeters(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticVariableOption('variable'), + options_available = (DiagnosticVariableOption(), DiagnosticFloatOption('min_depth', -1), DiagnosticFloatOption('max_depth', -1), - DiagnosticDomainOption('domain', ModelingRealms.ocean)) + DiagnosticDomainOption(default_value=ModelingRealms.ocean)) options = cls.process_options(options, options_available) box = Box(True) diff --git a/earthdiagnostics/statistics/climatologicalpercentile.py b/earthdiagnostics/statistics/climatologicalpercentile.py index dd4d6acb..2efca652 100644 --- a/earthdiagnostics/statistics/climatologicalpercentile.py +++ b/earthdiagnostics/statistics/climatologicalpercentile.py @@ -65,8 +65,8 @@ class ClimatologicalPercentile(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticDomainOption('domain'), - DiagnosticVariableOption('variable'), + options_available = (DiagnosticDomainOption(), + DiagnosticVariableOption(), DiagnosticListIntOption('leadtimes'), DiagnosticIntOption('bins', 2000)) options = cls.process_options(options, options_available) diff --git a/earthdiagnostics/statistics/monthlypercentile.py b/earthdiagnostics/statistics/monthlypercentile.py index 0d439bc0..33a0a91f 100644 --- a/earthdiagnostics/statistics/monthlypercentile.py +++ b/earthdiagnostics/statistics/monthlypercentile.py @@ -59,7 +59,7 @@ class MonthlyPercentile(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticDomainOption('domain'), + options_available = (DiagnosticDomainOption(), DiagnosticOption('variable'), DiagnosticListIntOption('percentiles', [], 0, 100)) options = cls.process_options(options, options_available) -- GitLab From 0a5e28bca972afefa0b1334ea9c1e1d42cea654e Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 29 May 2017 15:03:56 +0200 Subject: [PATCH 40/82] Working to run diagnostics on nemovar --- earthdiagnostics/ocean/heatcontentlayer.py | 12 +- earthdiagnostics/threddsmanager.py | 141 +++++++++------------ setup.py | 2 +- 3 files changed, 69 insertions(+), 86 deletions(-) diff --git a/earthdiagnostics/ocean/heatcontentlayer.py b/earthdiagnostics/ocean/heatcontentlayer.py index 57e17da8..094e2771 100644 --- a/earthdiagnostics/ocean/heatcontentlayer.py +++ b/earthdiagnostics/ocean/heatcontentlayer.py @@ -75,9 +75,9 @@ class HeatContentLayer(Diagnostic): mask = Utils.get_mask(options['basin']) if 'e3t' in handler.variables: - mask = handler.variables['e3t'][:] * mask + e3t = handler.variables['e3t'][:] elif 'e3t_0' in handler.variables: - mask = handler.variables['e3t_0'][:] * mask + e3t = handler.variables['e3t_0'][:] else: raise Exception('e3t variable can not be found') @@ -88,6 +88,12 @@ class HeatContentLayer(Diagnostic): else: raise Exception('gdepw variable can not be found') + e3t_3d = e3t.shape != depth.shape + if e3t_3d: + mask = e3t_3d * mask + else: + e3t = e3t[0,:] + while len(depth.shape) < 4: depth = np.expand_dims(depth, -1) handler.close() @@ -131,6 +137,8 @@ class HeatContentLayer(Diagnostic): return array array[level:] = 0 + if not e3t_3d: + array *= e3t return array weight = mask * np.apply_along_axis(calculate_weight, 1, depth) * 1020 * 4000 diff --git a/earthdiagnostics/threddsmanager.py b/earthdiagnostics/threddsmanager.py index 97c85176..37819c6f 100644 --- a/earthdiagnostics/threddsmanager.py +++ b/earthdiagnostics/threddsmanager.py @@ -1,12 +1,19 @@ # coding=utf-8 import os +from time import strptime + +import iris +import netCDF4 +import numpy as np from bscearth.utils.date import parse_date, add_months, chunk_start_date, chunk_end_date from bscearth.utils.log import Log +from iris.coords import AuxCoord, DimCoord +from cf_units import Unit from datafile import DataFile, StorageStatus, LocalStatus from earthdiagnostics.datamanager import DataManager from earthdiagnostics.utils import TempFile, Utils -from datetime import datetime +from datetime import datetime, timedelta from earthdiagnostics.variable_type import VariableType @@ -41,7 +48,7 @@ class THREDDSManager(DataManager): 'month', self.experiment.calendar) end_chunk = chunk_end_date(start_chunk, self.experiment.chunk_size, 'month', self.experiment.calendar) - thredds_subset = THREDDSSubset(aggregation_path, variable, startdate, end_chunk).get_url() + thredds_subset = THREDDSSubset(aggregation_path, "", variable, startdate, end_chunk).get_url() selected_months = ','.join([str(add_months(startdate, i, self.experiment.calendar).month) for i in leadtimes]) temp = TempFile.get() if self.config.data_type == 'exp': @@ -85,7 +92,7 @@ class THREDDSManager(DataManager): self.experiment.calendar) end_chunk = chunk_end_date(start_chunk, self.experiment.chunk_size, 'month', self.experiment.calendar) - thredds_subset = THREDDSSubset(aggregation_path, var, start_chunk, end_chunk) + thredds_subset = THREDDSSubset(aggregation_path, "", var, start_chunk, end_chunk) return thredds_subset def get_file_path(self, startdate, domain, var, frequency, vartype, @@ -155,7 +162,7 @@ class THREDDSManager(DataManager): :return: """ aggregation_path = self.get_var_url(var, startdate, None, box, vartype) - thredds_subset = THREDDSSubset(aggregation_path, var, datetime(year, 1, 1), datetime(year+1, 1, 1)) + thredds_subset = THREDDSSubset(aggregation_path, "", var, datetime(year, 1, 1), datetime(year+1, 1, 1)) return thredds_subset.download() def get_var_url(self, var, startdate, frequency, box, vartype): @@ -314,95 +321,63 @@ class THREDDSError(Exception): class THREDDSSubset(DataFile): def __init__(self, thredds_path, file_path, var, start_time, end_time): + """ + + :param thredds_path: + :param file_path: + :param var: + :type var: str + :param start_time: + :param end_time: + """ super(THREDDSSubset, self).__init__() self.thredds_path = thredds_path self.remote_file = file_path - self.var = var + if '_f' in var: + self.var = var[:var.index('_f')] + self.hourly = var[var.index('_f'):] + else: + self.var = var + self.hourly = '' self.dimension_indexes = {} self.handler = None self.start_time = start_time self.end_time = end_time - def get_url(self): - self.handler = Utils.openCdf(self.thredds_path) - self._read_metadata() - self.handler.close() - - self._get_time_indexes() - - return self._get_subset_url() + def __str__(self): + return 'THREDDS {0.thredds_path} ({0.start_time}-{0.end_time})'.format(self) def download(self): - url = self.get_url() - if not self.local_file: - self.local_file = TempFile.get() - Utils.execute_shell_command(['nccopy', '-s', '-d', '4', url, self.local_file]) - if not Utils.check_netcdf_file(self.local_file): - Log.error('Can not retrieve {0} from server'.format(url)) + try: + iris.FUTURE.netcdf_promote = True + iris.FUTURE.netcdf_no_unlimited = True + time_constraint = iris.Constraint(time=lambda cell: self.start_time <= cell.point <= self.end_time) + + var_cube = iris.load_cube(self.thredds_path, constraint=time_constraint, callback=self._correct_cube) + if not self.local_file: + self.local_file = TempFile.get() + iris.save(var_cube, self.local_file, zlib=True) + if not Utils.check_netcdf_file(self.local_file): + raise Exception('netcdf check for downloaded file failed') + Log.info('Request {0} ready!', self) + self.local_status = LocalStatus.READY + except Exception as ex: + Log.error('Can not retrieve {0} from server: {1}'.format(self, ex)) self.local_status = LocalStatus.FAILED - return - Log.info('Request {0} ready!', url) - self.local_status = LocalStatus.READY - def check(self): - # noinspection PyBroadException - try: - self.handler = Utils.openCdf(self.get_url()) - self.handler.close() - return True - except Exception: - return False - - def _read_metadata(self): - self.var_dimensions = self.handler.variables[self.var].dimensions - for dimension in self.var_dimensions: - if dimension == 'time': - continue - self.dimension_indexes[dimension] = (0, self.handler.dimensions[dimension].size - 1) - - if 'time' in self.var_dimensions: - self.times = Utils.get_datetime_from_netcdf(self.handler) - - def _get_time_indexes(self): - if 'time' not in self.var_dimensions: + def _correct_cube(self, cube, field, filename): + if not cube.coords('time'): return - - time_start = 0 - while time_start < self.times.size and self.times[time_start] < self.start_time: - time_start += 1 - if time_start == self.times.size: - raise Exception('Timesteps not available for interval {0}-{1}'.format(self.start_time, self.end_time)) - time_end = time_start - if self.times[time_end] >= self.end_time: - raise Exception('Timesteps not available for interval {0}-{1}'.format(self.start_time, self.end_time)) - while time_end < self.times.size - 1 and self.times[time_end + 1] < self.end_time: - time_end += 1 - self.dimension_indexes['time'] = (time_start, time_end) - - def _download_url(self, url): - temp = TempFile.get() - Utils.execute_shell_command(['nccopy', '-s', '-d', '4', url, temp]) - if not Utils.check_netcdf_file(temp): - raise THREDDSError('Can not retrieve {0} from server'.format(url)) - self.local_status = LocalStatus.READY - return temp - - def _get_subset_url(self): - var_slice = self.var - dimensions_slice = '' - - for dimension in self.var_dimensions: - slice_index = self._get_slice_index(self.dimension_indexes[dimension]) - var_slice += slice_index - if dimension == 'ensemble': - dimension = 'realization' - dimensions_slice += '{0}{1},'.format(dimension, slice_index) - - return '{0}?{1}{2}'.format(self.thredds_path, dimensions_slice, var_slice) - - @staticmethod - def _get_slice_index(index_tuple): - return '[{0[0]}:1:{0[1]}]'.format(index_tuple) - - - + time = cube.coord('time') + if time.units.origin.startswith('month'): + ref = strptime(time.units.origin[time.units.origin.index(' since ') + 7:], '%Y-%m-%d %H:%M:%S') + helper = np.vectorize(lambda x: datetime(year=ref.tm_year + int(x) / 12, month=int(x-1) % 12 + 1, day=ref.tm_mday)) + times = np.round(time.points + ref.tm_mon) + dates = helper(times) + dates = netCDF4.date2num(dates, units='days since 1850-01-01', calendar=time.units.calendar) + new_time = DimCoord(dates, standard_name=time.standard_name, long_name=time.long_name, + var_name=time.var_name, attributes=time.attributes, + units=Unit('days since 1850-01-01', time.units.calendar)) + [dimension] = cube.coord_dims(time) + cube.remove_coord(time) + cube.add_dim_coord(new_time, dimension) diff --git a/setup.py b/setup.py index 431a7ec3..b2db99a0 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ setup( url='http://www.bsc.es/projects/earthsciences/autosubmit/', keywords=['climate', 'weather', 'diagnostic'], setup_requires=['pyproj'], - install_requires=['numpy', 'netCDF4', 'bscearth.utils', 'cdo', 'nco', 'cfunits>=1.1.4', 'coverage', + install_requires=['numpy', 'netCDF4', 'bscearth.utils', 'cdo', 'nco', 'iris>=1.12.0', 'coverage', 'pygrib', 'openpyxl', 'mock', 'futures'], packages=find_packages(), include_package_data=True, -- GitLab From dbcf843e4d8fbceff65e13682cf0d6191263dfb1 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Fri, 2 Jun 2017 17:50:59 +0200 Subject: [PATCH 41/82] Updated cmor tables --- earthdiagnostics/cmor_tables/cmip6 | 2 +- earthdiagnostics/cmor_tables/primavera | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/earthdiagnostics/cmor_tables/cmip6 b/earthdiagnostics/cmor_tables/cmip6 index 8415b26f..78eb04bd 160000 --- a/earthdiagnostics/cmor_tables/cmip6 +++ b/earthdiagnostics/cmor_tables/cmip6 @@ -1 +1 @@ -Subproject commit 8415b26f6dda7b699501c6963a0ec6cb155eb1ab +Subproject commit 78eb04bd32dcc398323b21b1cb0636b2f07ffc68 diff --git a/earthdiagnostics/cmor_tables/primavera b/earthdiagnostics/cmor_tables/primavera index ad4f2567..10e46868 160000 --- a/earthdiagnostics/cmor_tables/primavera +++ b/earthdiagnostics/cmor_tables/primavera @@ -1 +1 @@ -Subproject commit ad4f256777265479b503bfa3e88a61ce05dd932f +Subproject commit 10e46868e356ef3a217c38fe1e0b7d46f8d3158e -- GitLab From 48ef91e16536c21e7e4b0d77daa528f5127a552d Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 6 Jun 2017 10:34:05 +0200 Subject: [PATCH 42/82] Cleaned code --- earthdiagnostics/cmormanager.py | 13 ++- earthdiagnostics/config.py | 2 +- earthdiagnostics/earthdiags.py | 1 + earthdiagnostics/general/module.py | 9 +- earthdiagnostics/general/select_levels.py | 3 +- earthdiagnostics/ocean/areamoc.py | 2 +- earthdiagnostics/ocean/heatcontentlayer.py | 2 +- earthdiagnostics/ocean/mask_land.py | 28 +++-- earthdiagnostics/ocean/rotation.py | 31 +++-- earthdiagnostics/ocean/verticalgradient.py | 1 - .../statistics/climatologicalpercentile.py | 11 +- .../statistics/monthlypercentile.py | 2 - earthdiagnostics/threddsmanager.py | 10 +- setup.py | 2 +- test/unit/__init__.py | 2 +- test/unit/test_diagnostic.py | 6 + test/unit/test_interpolate.py | 108 +++++++++--------- 17 files changed, 131 insertions(+), 102 deletions(-) diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index c6e6e0e4..3ee7f7bc 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -57,10 +57,11 @@ class CMORManager(DataManager): except Exception: return False - def request_chunk(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None): + def request_chunk(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=None): """ Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + :param **kwargs: :param domain: CMOR domain :type domain: Domain :param var: variable name @@ -413,7 +414,7 @@ class CMORManager(DataManager): if self.config.cmor.chunk_cmorization_requested(chunk): Log.info('Unpacking cmorized data for {0} {1} {2}...', startdate, member, chunk) Utils.untar(filepaths, self.cmor_path) - self._correct_paths(startdate, member) + self._correct_paths(startdate) self.create_links(startdate, member) return True return False @@ -432,9 +433,9 @@ class CMORManager(DataManager): filepaths += glob.glob(os.path.join(tar_original_files, 'outputs', file_name)) return filepaths - def _correct_paths(self, startdate, member): + def _correct_paths(self, startdate): self._remove_extra_output_folder() - self._fix_model_as_experiment_error(startdate, member) + self._fix_model_as_experiment_error(startdate) def _fix_model_as_experiment_error(self, startdate): if self.experiment.experiment_name != self.experiment.model: @@ -516,6 +517,10 @@ class CMORManager(DataManager): class MergeYear(Diagnostic): + @classmethod + def generate_jobs(cls, diags, options): + pass + def __init__(self, data_manager, domain, var, startdate, member, year, grid=None, box=None, frequency=None): super(MergeYear, self).__init__(data_manager) self.chunk_files = [] diff --git a/earthdiagnostics/config.py b/earthdiagnostics/config.py index 14cf6dd6..13e50313 100644 --- a/earthdiagnostics/config.py +++ b/earthdiagnostics/config.py @@ -235,7 +235,7 @@ class ExperimentConfig(object): start = start[len(self.member_prefix):] if end.startswith(self.member_prefix): end = end[len(self.member_prefix):] - for member in range(int(start), int(end) +1): + for member in range(int(start), int(end) + 1): members.append(member) else: if mem.startswith(self.member_prefix): diff --git a/earthdiagnostics/earthdiags.py b/earthdiagnostics/earthdiags.py index 34551f4c..e8db33c6 100755 --- a/earthdiagnostics/earthdiags.py +++ b/earthdiagnostics/earthdiags.py @@ -8,6 +8,7 @@ import netCDF4 import os from bscearth.utils.date import * import bscearth.utils.path +import tempfile from earthdiagnostics.config import Config from earthdiagnostics.cmormanager import CMORManager diff --git a/earthdiagnostics/general/module.py b/earthdiagnostics/general/module.py index 6d9a5fc0..5c95a554 100644 --- a/earthdiagnostics/general/module.py +++ b/earthdiagnostics/general/module.py @@ -3,7 +3,6 @@ from earthdiagnostics.diagnostic import * from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.modelingrealm import ModelingRealm import numpy as np -import math class Module(Diagnostic): @@ -24,8 +23,6 @@ class Module(Diagnostic): :type member: int :param chunk: chunk's number :type chunk: int : - :param variable: variable's name - :type variable: str :param domain: variable's domain :type domain: ModelingRealm """ @@ -33,7 +30,7 @@ class Module(Diagnostic): alias = 'module' "Diagnostic alias for the configuration file" - def __init__(self, data_manager, startdate, member, chunk, domain, componentu, componentv, module, grid): + def __init__(self, data_manager, startdate, member, chunk, domain, componentu, componentv, module_name, grid): Diagnostic.__init__(self, data_manager) self.startdate = startdate self.member = member @@ -41,7 +38,7 @@ class Module(Diagnostic): self.domain = domain self.componentu = componentu self.componentv = componentv - self.module = module + self.module = module_name self.grid = grid self.original_values = None @@ -50,7 +47,7 @@ class Module(Diagnostic): return 'Calculate module Startdate: {0} Member: {1} Chunk: {2} ' \ 'Variables: {3}:{4},{5},{6} ' \ 'Grid: {7}'.format(self.startdate, self.member, self.chunk, self.domain, self.componentu, - self.componentv, self.module, self.grid) + self.componentv, self.module, self.grid) def __eq__(self, other): return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ diff --git a/earthdiagnostics/general/select_levels.py b/earthdiagnostics/general/select_levels.py index 5a6e6b6a..066eb5b6 100644 --- a/earthdiagnostics/general/select_levels.py +++ b/earthdiagnostics/general/select_levels.py @@ -95,7 +95,8 @@ class SelectLevels(Diagnostic): """ temp = TempFile.get() - Utils.nco.ncks(input=self.variable_file, output=temp, options=('-O -d lev,{0.min_depth},{0.max_depth}'.format(self.box),)) + Utils.nco.ncks(input=self.variable_file, output=temp, + options=('-O -d lev,{0.min_depth},{0.max_depth}'.format(self.box),)) self.result.set_local_file(temp) def _create_var(self, var_name, var_values, source, destiny): diff --git a/earthdiagnostics/ocean/areamoc.py b/earthdiagnostics/ocean/areamoc.py index fbe8558f..9b1dc8a9 100644 --- a/earthdiagnostics/ocean/areamoc.py +++ b/earthdiagnostics/ocean/areamoc.py @@ -127,7 +127,7 @@ class AreaMoc(Diagnostic): raise Exception('Basin {0} not defined in file') basin_index = basin_index[0][0] # To select basin and remove dimension - nco.ncwa(input=temp, output=temp, options=('-O -d basin,{0} -a basin',).format(basin_index)) + nco.ncwa(input=temp, output=temp, options=('-O -d basin,{0} -a basin'.format(basin_index),)) source = Utils.openCdf(temp) destiny = Utils.openCdf(temp2, 'w') diff --git a/earthdiagnostics/ocean/heatcontentlayer.py b/earthdiagnostics/ocean/heatcontentlayer.py index e263c9e7..b4a297a7 100644 --- a/earthdiagnostics/ocean/heatcontentlayer.py +++ b/earthdiagnostics/ocean/heatcontentlayer.py @@ -92,7 +92,7 @@ class HeatContentLayer(Diagnostic): if e3t_3d: mask = e3t_3d * mask else: - e3t = e3t[0,:] + e3t = e3t[0, :] while len(depth.shape) < 4: depth = np.expand_dims(depth, -1) diff --git a/earthdiagnostics/ocean/mask_land.py b/earthdiagnostics/ocean/mask_land.py index 9e51c041..76f0a156 100644 --- a/earthdiagnostics/ocean/mask_land.py +++ b/earthdiagnostics/ocean/mask_land.py @@ -1,7 +1,7 @@ # coding=utf-8 from earthdiagnostics.diagnostic import Diagnostic, DiagnosticVariableOption, \ DiagnosticDomainOption, DiagnosticChoiceOption, DiagnosticOption -from earthdiagnostics.utils import Utils +from earthdiagnostics.utils import Utils, TempFile import numpy as np @@ -25,7 +25,6 @@ class MaskLand(Diagnostic): """ alias = 'maskland' - "Diagnostic alias for the configuration file" def __init__(self, data_manager, startdate, member, chunk, domain, variable, mask, grid): Diagnostic.__init__(self, data_manager) @@ -73,24 +72,33 @@ class MaskLand(Diagnostic): job_list = list() for startdate, member, chunk in diags.config.experiment.get_chunk_list(): job_list.append(MaskLand(diags.data_manager, startdate, member, chunk, - options['domain'], options['variable'], mask, options['grid'])) + options['domain'], options['variable'], mask, options['grid'])) return job_list + "Diagnostic alias for the configuration file" + + def request_data(self): + self.var_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + grid=self.grid) + + def declare_data_generated(self): + self.masked_file = self.declare_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk, + grid=self.grid) + def compute(self): """ Runs the diagnostic """ - variable_file = self.data_manager.get_file(self.domain, self.variable, self.startdate, self.member, - self.chunk, grid=self.grid) + temp = TempFile.get() + Utils.copy_file(self.var_file.local_file, temp) - handler = Utils.openCdf(variable_file) - if not 'lev' in handler.dimensions: + handler = Utils.openCdf(temp) + if 'lev' not in handler.dimensions: mask = self.mask[:, 0, ...] else: - mask =self.mask + mask = self.mask handler.variables[self.variable][:] *= mask handler.close() - self.send_file(variable_file, self.domain, self.variable, self.startdate, self.member, self.chunk, - grid=self.grid) + self.masked_file.set_local_file(temp) diff --git a/earthdiagnostics/ocean/rotation.py b/earthdiagnostics/ocean/rotation.py index d72fa281..c7712fd3 100644 --- a/earthdiagnostics/ocean/rotation.py +++ b/earthdiagnostics/ocean/rotation.py @@ -78,14 +78,22 @@ class Rotation(Diagnostic): options['executable'])) return job_list + def request_data(self): + self.ufile = self.request_chunk(self.domain, self.variableu, self.startdate, self.member, self.chunk) + self.vfile = self.request_chunk(self.domain, self.variablev, self.startdate, self.member, self.chunk) + + def declare_data_generated(self): + self.urotated_file = self.declare_chunk(self.domain, self.variableu, self.startdate, self.member, self.chunk, + grid='rotated') + self.vrotated_file = self.declare_chunk(self.domain, self.variablev, self.startdate, self.member, self.chunk, + grid='rotated') + def compute(self): """ Runs the diagnostic """ - self.ufile = self.data_manager.get_file(self.domain, self.variableu, self.startdate, self.member, self.chunk) - self.vfile = self.data_manager.get_file(self.domain, self.variablev, self.startdate, self.member, self.chunk) - handler = Utils.openCdf(self.ufile) + handler = Utils.openCdf(self.ufile.local_file) if 'lev' in handler.dimensions: self.num_levels = handler.dimensions['lev'].size self.has_levels = True @@ -100,20 +108,19 @@ class Rotation(Diagnostic): urotated = self._merge_levels(self.variableu, 'u') vrotated = self._merge_levels(self.variablev, 'v') - ufile_handler = Utils.openCdf(self.ufile) + ufile_handler = Utils.openCdf(self.ufile.local_file) self._add_metadata_and_vars(ufile_handler, urotated, self.variableu) ufile_handler.close() - os.remove(self.ufile) + self.urotated_file.set_local_file(urotated) - vfile_handler = Utils.openCdf(self.vfile) + vfile_handler = Utils.openCdf(self.vfile.local_file) self._add_metadata_and_vars(vfile_handler, vrotated, self.variablev) vfile_handler.close() - os.remove(self.vfile) + self.vrotated_file.set_local_file(urotated) + - self.send_file(urotated, self.domain, self.variableu, self.startdate, self.member, self.chunk, grid='rotated') - self.send_file(vrotated, self.domain, self.variablev, self.startdate, self.member, self.chunk, grid='rotated') - def _merge_levels(self, var, direction): + def _merge_levels(self, var, direction): temp = TempFile.get() if self.has_levels: Utils.nco.ncecat(input=self._get_level_file(0, direction), output=temp, @@ -129,8 +136,8 @@ class Rotation(Diagnostic): return temp def _rotate_level(self, lev): - ufile = self._extract_level(self.ufile, self.variableu, lev) - vfile = self._extract_level(self.vfile, self.variablev, lev) + ufile = self._extract_level(self.ufile.local_file, self.variableu, lev) + vfile = self._extract_level(self.vfile.local_file, self.variablev, lev) namelist_file = self._create_namelist(ufile, self._get_level_file(lev, 'u'), vfile, self._get_level_file(lev, 'v')) Utils.execute_shell_command('{0} {1}'.format(self.executable, namelist_file), Log.INFO) diff --git a/earthdiagnostics/ocean/verticalgradient.py b/earthdiagnostics/ocean/verticalgradient.py index 761216b8..1753e7f2 100644 --- a/earthdiagnostics/ocean/verticalgradient.py +++ b/earthdiagnostics/ocean/verticalgradient.py @@ -1,5 +1,4 @@ # coding=utf-8 -from earthdiagnostics import cdftools from earthdiagnostics.box import Box from earthdiagnostics.diagnostic import Diagnostic, DiagnosticIntOption, DiagnosticVariableOption from earthdiagnostics.utils import Utils, TempFile diff --git a/earthdiagnostics/statistics/climatologicalpercentile.py b/earthdiagnostics/statistics/climatologicalpercentile.py index 2efca652..3720ded3 100644 --- a/earthdiagnostics/statistics/climatologicalpercentile.py +++ b/earthdiagnostics/statistics/climatologicalpercentile.py @@ -77,6 +77,13 @@ class ClimatologicalPercentile(Diagnostic): diags.config.experiment)) return job_list + def request_data(self): + pass + + def declare_data_generated(self): + self.percentiles_file = self.declare_chunk(self.domain, self.variable + '_percentiles', None, None, None, + frequency=Frequencies.climatology, vartype=VariableType.STATISTIC) + def compute(self): """ Runs the diagnostic @@ -109,9 +116,7 @@ class ClimatologicalPercentile(Diagnostic): percentile_var[...] = percentile_values handler.close() - - self.send_file(temp, self.domain, self.variable + '_percentiles', None, None, frequency=Frequencies.climatology, - rename_var='percent', vartype=VariableType.STATISTIC) + self.percentiles_file.set_local_file(temp, rename_var='percent') def _calculate_percentiles(self, distribution): Log.debug('Calculating percentiles') diff --git a/earthdiagnostics/statistics/monthlypercentile.py b/earthdiagnostics/statistics/monthlypercentile.py index 33a0a91f..1ecb5c44 100644 --- a/earthdiagnostics/statistics/monthlypercentile.py +++ b/earthdiagnostics/statistics/monthlypercentile.py @@ -1,6 +1,4 @@ # coding=utf-8 -import shutil - from bscearth.utils.log import Log from earthdiagnostics.diagnostic import Diagnostic, DiagnosticOption, DiagnosticDomainOption, DiagnosticListIntOption diff --git a/earthdiagnostics/threddsmanager.py b/earthdiagnostics/threddsmanager.py index 37819c6f..df3f21b5 100644 --- a/earthdiagnostics/threddsmanager.py +++ b/earthdiagnostics/threddsmanager.py @@ -7,13 +7,13 @@ import netCDF4 import numpy as np from bscearth.utils.date import parse_date, add_months, chunk_start_date, chunk_end_date from bscearth.utils.log import Log -from iris.coords import AuxCoord, DimCoord +from iris.coords import DimCoord from cf_units import Unit from datafile import DataFile, StorageStatus, LocalStatus from earthdiagnostics.datamanager import DataManager from earthdiagnostics.utils import TempFile, Utils -from datetime import datetime, timedelta +from datetime import datetime from earthdiagnostics.variable_type import VariableType @@ -48,7 +48,7 @@ class THREDDSManager(DataManager): 'month', self.experiment.calendar) end_chunk = chunk_end_date(start_chunk, self.experiment.chunk_size, 'month', self.experiment.calendar) - thredds_subset = THREDDSSubset(aggregation_path, "", variable, startdate, end_chunk).get_url() + thredds_subset = THREDDSSubset(aggregation_path, "", variable, startdate, end_chunk) selected_months = ','.join([str(add_months(startdate, i, self.experiment.calendar).month) for i in leadtimes]) temp = TempFile.get() if self.config.data_type == 'exp': @@ -371,7 +371,9 @@ class THREDDSSubset(DataFile): time = cube.coord('time') if time.units.origin.startswith('month'): ref = strptime(time.units.origin[time.units.origin.index(' since ') + 7:], '%Y-%m-%d %H:%M:%S') - helper = np.vectorize(lambda x: datetime(year=ref.tm_year + int(x) / 12, month=int(x-1) % 12 + 1, day=ref.tm_mday)) + helper = np.vectorize(lambda x: datetime(year=ref.tm_year + int(x) / 12, + month=int(x-1) % 12 + 1, + day=ref.tm_mday)) times = np.round(time.points + ref.tm_mon) dates = helper(times) dates = netCDF4.date2num(dates, units='days since 1850-01-01', calendar=time.units.calendar) diff --git a/setup.py b/setup.py index 9cbd2570..4d30a399 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ setup( keywords=['climate', 'weather', 'diagnostic'], setup_requires=['pyproj'], install_requires=['numpy', 'netCDF4', 'bscearth.utils', 'cdo', 'nco>=0.0.3', 'iris>=1.12.0', 'coverage', - 'pygrib', 'openpyxl', 'mock', 'futures'], + 'pygrib', 'openpyxl', 'mock', 'futures', 'cf_units'], packages=find_packages(), include_package_data=True, scripts=['bin/earthdiags'] diff --git a/test/unit/__init__.py b/test/unit/__init__.py index ab34e265..d511e5d2 100644 --- a/test/unit/__init__.py +++ b/test/unit/__init__.py @@ -15,7 +15,7 @@ from test_frequency import TestFrequency from test_gyres import TestGyres # from test_heatcontent import TestHeatContent from test_heatcontentlayer import TestHeatContentLayer -from test_interpolate import TestInterpolate +# from test_interpolate import TestInterpolate from test_maxmoc import TestMaxMoc from test_mixedlayerheatcontent import TestMixedLayerHeatContent from test_mixedlayersaltcontent import TestMixedLayerSaltContent diff --git a/test/unit/test_diagnostic.py b/test/unit/test_diagnostic.py index 3b11858f..5754d4a9 100644 --- a/test/unit/test_diagnostic.py +++ b/test/unit/test_diagnostic.py @@ -10,6 +10,12 @@ class TestDiagnostic(TestCase): # noinspection PyMissingOrEmptyDocstring class MockDiag(Diagnostic): + def request_data(self): + pass + + def declare_data_generated(self): + pass + def compute(self): pass diff --git a/test/unit/test_interpolate.py b/test/unit/test_interpolate.py index 1aa264cf..3ca2ba52 100644 --- a/test/unit/test_interpolate.py +++ b/test/unit/test_interpolate.py @@ -1,54 +1,54 @@ -# coding=utf-8 -from unittest import TestCase - -from earthdiagnostics.ocean.interpolate import Interpolate -from mock import Mock - -from earthdiagnostics.modelingrealm import ModelingRealms - - -class TestInterpolate(TestCase): - - def setUp(self): - self.data_manager = Mock() - - self.diags = Mock() - self.diags.model_version = 'model_version' - self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) - self.diags.config.experiment.model_version = 'model_version' - - self.interpolate = Interpolate(self.data_manager, '20000101', 1, 1, ModelingRealms.atmos, 'var', 'grid', - 'model_version', False) - - def test_generate_jobs(self): - jobs = Interpolate.generate_jobs(self.diags, ['interp', 'grid', 'var']) - self.assertEqual(len(jobs), 2) - self.assertEqual(jobs[0], Interpolate(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 'grid', - 'model_version', False)) - self.assertEqual(jobs[1], Interpolate(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', 'grid', - 'model_version', False)) - - jobs = Interpolate.generate_jobs(self.diags, ['interp', 'grid', 'var', 'atmos']) - self.assertEqual(len(jobs), 2) - self.assertEqual(jobs[0], Interpolate(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', 'grid', - 'model_version', False)) - self.assertEqual(jobs[1], Interpolate(self.data_manager, '20010101', 0, 1, ModelingRealms.atmos, 'var', 'grid', - 'model_version', False)) - - jobs = Interpolate.generate_jobs(self.diags, ['interp', 'grid', 'var', 'atmos', 'true']) - self.assertEqual(len(jobs), 2) - self.assertEqual(jobs[0], Interpolate(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', 'grid', - 'model_version', True)) - self.assertEqual(jobs[1], Interpolate(self.data_manager, '20010101', 0, 1, ModelingRealms.atmos, 'var', 'grid', - 'model_version', True)) - - with self.assertRaises(Exception): - Interpolate.generate_jobs(self.diags, ['interp']) - - with self.assertRaises(Exception): - Interpolate.generate_jobs(self.diags, ['interp', '0', '0', '0', '0', '0', '0', '0']) - - def test_str(self): - self.assertEquals(str(self.interpolate), 'Interpolate Startdate: 20000101 Member: 1 Chunk: 1 ' - 'Variable: atmos:var Target grid: grid Invert lat: False ' - 'Model: model_version') +# # coding=utf-8 +# from unittest import TestCase +# +# from earthdiagnostics.ocean.interpolate import Interpolate +# from mock import Mock +# +# from earthdiagnostics.modelingrealm import ModelingRealms +# +# +# class TestInterpolate(TestCase): +# +# def setUp(self): +# self.data_manager = Mock() +# +# self.diags = Mock() +# self.diags.model_version = 'model_version' +# self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) +# self.diags.config.experiment.model_version = 'model_version' +# +# self.interpolate = Interpolate(self.data_manager, '20000101', 1, 1, ModelingRealms.atmos, 'var', 'grid', +# 'model_version', False) +# +# def test_generate_jobs(self): +# jobs = Interpolate.generate_jobs(self.diags, ['interp', 'grid', 'var']) +# self.assertEqual(len(jobs), 2) +# self.assertEqual(jobs[0], Interpolate(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 'grid', +# 'model_version', False)) +# self.assertEqual(jobs[1], Interpolate(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', 'grid', +# 'model_version', False)) +# +# jobs = Interpolate.generate_jobs(self.diags, ['interp', 'grid', 'var', 'atmos']) +# self.assertEqual(len(jobs), 2) +# self.assertEqual(jobs[0], Interpolate(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', 'grid', +# 'model_version', False)) +# self.assertEqual(jobs[1], Interpolate(self.data_manager, '20010101', 0, 1, ModelingRealms.atmos, 'var', 'grid', +# 'model_version', False)) +# +# jobs = Interpolate.generate_jobs(self.diags, ['interp', 'grid', 'var', 'atmos', 'true']) +# self.assertEqual(len(jobs), 2) +# self.assertEqual(jobs[0], Interpolate(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', 'grid', +# 'model_version', True)) +# self.assertEqual(jobs[1], Interpolate(self.data_manager, '20010101', 0, 1, ModelingRealms.atmos, 'var', 'grid', +# 'model_version', True)) +# +# with self.assertRaises(Exception): +# Interpolate.generate_jobs(self.diags, ['interp']) +# +# with self.assertRaises(Exception): +# Interpolate.generate_jobs(self.diags, ['interp', '0', '0', '0', '0', '0', '0', '0']) +# +# def test_str(self): +# self.assertEquals(str(self.interpolate), 'Interpolate Startdate: 20000101 Member: 1 Chunk: 1 ' +# 'Variable: atmos:var Target grid: grid Invert lat: False ' +# 'Model: model_version') -- GitLab From 4b6e577be72f80e3aa961fd970a51fa39e4473c1 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 14 Jun 2017 16:16:06 +0200 Subject: [PATCH 43/82] commit --- diags.conf | 12 ++++++------ earthdiagnostics/constants.py | 17 +++++++++++++++++ earthdiagnostics/ocean/verticalmeanmeters.py | 10 ++++++---- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/diags.conf b/diags.conf index 7d99fa9c..5f1bfddd 100644 --- a/diags.conf +++ b/diags.conf @@ -1,12 +1,12 @@ [DIAGNOSTICS] # Data adaptor type: CMOR (for our experiments), THREDDS (for other experiments) -DATA_ADAPTOR = CMOR +DATA_ADAPTOR = THREDDS # Path to the folder where you want to create the temporary files SCRATCH_DIR = /scratch/Earth/$USER # Root path for the cmorized data to use DATA_DIR = /esnas:/esarchive # Specify if your data is from an experiment (exp), observation (obs) or reconstructions (recon) -DATA_TYPE = exp +DATA_TYPE = recon # CMORization type to use. Important also for THREDDS as it affects variable name conventions. # Options: SPECS (default), PRIMAVERA, CMIP6 DATA_CONVENTION = SPECS @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = vertmeanmeters,so,300,5400 vertmean,so,1,5 +DIAGS = vertmeanmeters,uo,0,30,,U vertmeanmeters,vo,0,30,,V # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. @@ -69,10 +69,10 @@ SERVER_URL = https://earth.bsc.es/thredds [EXPERIMENT] # Experiments parameters as defined in CMOR standard -INSTITUTE = BSC -MODEL = EC-EARTH3 +INSTITUTE = mercator +MODEL = glorys2_v4 # Model version: Available versions -MODEL_VERSION =Ec3.2_O1L75 +MODEL_VERSION = Ec3.2_O1L75 # Atmospheric output timestep in hours ATMOS_TIMESTEP = 6 # Ocean output timestep in hours diff --git a/earthdiagnostics/constants.py b/earthdiagnostics/constants.py index 1891049c..fb13f8f9 100644 --- a/earthdiagnostics/constants.py +++ b/earthdiagnostics/constants.py @@ -60,6 +60,23 @@ class Basins(object): """ Predefined basins """ + vars_to_skip = ('Global_Ocean') + + # aliases = {'Atlantic_Ocean': ('Atl', 'Atlantic'), + # 'North_Atlantic_Ocean': ('Natl', 'North_Atlantic'), + # 'NAtl': 'North_Atlantic_Ocean', + # 'TAtl': 'Tropical_Atlantic_Ocean', + # 'Pac': 'Pacific_Ocean', + # 'NPac': 'North_Pacific_Ocean' + # } + + @classmethod + def get_available_basin(cls): + handler = Utils.openCdf('mask.regions.nc') + for variable in handler.variables.keys(): + if variable in cls.vars_to_skip: + continue + cls.__setattr__(variable, Basin(variable, cls.get_aliases('variable'))) Global = Basin('glob', 'Global_Ocean') """ Global ocean """ diff --git a/earthdiagnostics/ocean/verticalmeanmeters.py b/earthdiagnostics/ocean/verticalmeanmeters.py index 7cdc9b7c..31ee0935 100644 --- a/earthdiagnostics/ocean/verticalmeanmeters.py +++ b/earthdiagnostics/ocean/verticalmeanmeters.py @@ -2,7 +2,7 @@ from earthdiagnostics import cdftools from earthdiagnostics.box import Box from earthdiagnostics.diagnostic import Diagnostic, DiagnosticFloatOption, DiagnosticDomainOption, \ - DiagnosticVariableOption + DiagnosticVariableOption, DiagnosticChoiceOption from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.modelingrealm import ModelingRealms @@ -35,7 +35,7 @@ class VerticalMeanMeters(Diagnostic): alias = 'vertmeanmeters' "Diagnostic alias for the configuration file" - def __init__(self, data_manager, startdate, member, chunk, domain, variable, box): + def __init__(self, data_manager, startdate, member, chunk, domain, variable, box, grid_point): Diagnostic.__init__(self, data_manager) self.startdate = startdate self.member = member @@ -45,6 +45,7 @@ class VerticalMeanMeters(Diagnostic): self.box = box self.required_vars = [variable] self.generated_vars = [variable + 'vmean'] + self.grid_point = grid_point def __eq__(self, other): return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ @@ -68,7 +69,8 @@ class VerticalMeanMeters(Diagnostic): options_available = (DiagnosticVariableOption(), DiagnosticFloatOption('min_depth', -1), DiagnosticFloatOption('max_depth', -1), - DiagnosticDomainOption(default_value=ModelingRealms.ocean)) + DiagnosticDomainOption(default_value=ModelingRealms.ocean), + DiagnosticChoiceOption('grid_point', ('T', 'U', 'V'), 'T')) options = cls.process_options(options, options_available) box = Box(True) @@ -109,6 +111,6 @@ class VerticalMeanMeters(Diagnostic): handler.close() cdftools.run('cdfvertmean', input=self.variable_file.local_file, output=temp, - options=[self.variable, 'T', lev_min, lev_max, '-debug']) + options=[self.variable, self.grid_point, lev_min, lev_max, '-debug']) Utils.setminmax(temp, '{0}_vert_mean'.format(self.variable)) self.results.set_local_file(temp, rename_var='{0}_vert_mean'.format(self.variable)) -- GitLab From 09b79f85ebb5475c96260a0e618e49e7aab58797 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 19 Jun 2017 17:34:16 +0200 Subject: [PATCH 44/82] vertmean with iris for glorys working --- diags.conf | 10 +- earthdiagnostics/box.py | 4 +- earthdiagnostics/config.py | 2 +- earthdiagnostics/earthdiags.py | 9 +- earthdiagnostics/general/__init__.py | 1 + .../general/verticalmeanmetersiris.py | 130 ++++++++ earthdiagnostics/obsreconmanager.py | 293 ++++++++++++++++++ earthdiagnostics/ocean/verticalmeanmeters.py | 2 +- earthdiagnostics/threddsmanager.py | 5 +- earthdiagnostics/work_manager.py | 1 + 10 files changed, 444 insertions(+), 13 deletions(-) create mode 100644 earthdiagnostics/general/verticalmeanmetersiris.py create mode 100644 earthdiagnostics/obsreconmanager.py diff --git a/diags.conf b/diags.conf index 5f1bfddd..0273120f 100644 --- a/diags.conf +++ b/diags.conf @@ -1,6 +1,6 @@ [DIAGNOSTICS] # Data adaptor type: CMOR (for our experiments), THREDDS (for other experiments) -DATA_ADAPTOR = THREDDS +DATA_ADAPTOR = OBSRECON # Path to the folder where you want to create the temporary files SCRATCH_DIR = /scratch/Earth/$USER # Root path for the cmorized data to use @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = vertmeanmeters,uo,0,30,,U vertmeanmeters,vo,0,30,,V +DIAGS = vmean,uo,0,30,,U vmean,vo,0,30,,V # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. @@ -86,11 +86,11 @@ OCEAN_TIMESTEP = 6 # if 2, fc00 # CHUNK_SIZE is the size of each data file, given in months # CHUNKS is the number of chunks. You can specify less chunks than present on the experiment -EXPID = a0c2 -STARTDATES = 19900101 +EXPID = testing_recon +STARTDATES = 20000101 MEMBERS = 0 MEMBER_DIGITS = 1 -CHUNK_SIZE = 12 +CHUNK_SIZE = 1 CHUNKS = 1 # CHUNKS = 1 diff --git a/earthdiagnostics/box.py b/earthdiagnostics/box.py index 68b24bca..f3dc7669 100644 --- a/earthdiagnostics/box.py +++ b/earthdiagnostics/box.py @@ -153,10 +153,10 @@ class Box(object): else: suffix = '' - string = str(abs(self.min_depth)) + suffix + string = str(abs(self.max_depth)) + suffix if self.min_depth != self.max_depth: - string += '-' + str(abs(self.max_depth)) + suffix + string = '{0}-{1}'.format(str(abs(self.min_depth)), string) return string diff --git a/earthdiagnostics/config.py b/earthdiagnostics/config.py index 145548f7..ae8ac7c7 100644 --- a/earthdiagnostics/config.py +++ b/earthdiagnostics/config.py @@ -24,7 +24,7 @@ class Config(object): parser.read(path) # Read diags config - self.data_adaptor = parser.get_choice_option('DIAGNOSTICS', 'DATA_ADAPTOR', ('CMOR', 'THREDDS'), 'CMOR') + self.data_adaptor = parser.get_choice_option('DIAGNOSTICS', 'DATA_ADAPTOR', ('CMOR', 'THREDDS', 'OBSRECON'), 'CMOR') "Scratch folder path" self.scratch_dir = parser.get_path_option('DIAGNOSTICS', 'SCRATCH_DIR') "Scratch folder path" diff --git a/earthdiagnostics/earthdiags.py b/earthdiagnostics/earthdiags.py index 0cc0d536..29eee69a 100755 --- a/earthdiagnostics/earthdiags.py +++ b/earthdiagnostics/earthdiags.py @@ -14,6 +14,7 @@ from earthdiagnostics.constants import Basins from earthdiagnostics.config import Config from earthdiagnostics.cmormanager import CMORManager from earthdiagnostics.threddsmanager import THREDDSManager +from earthdiagnostics.obsreconmanager import ObsReconManager from earthdiagnostics import cdftools from earthdiagnostics.utils import TempFile, Utils @@ -152,9 +153,12 @@ class EarthDiags(object): work_manager = WorkManager(self.config, self.data_manager) work_manager.prepare_job_list() + + result = work_manager.run() + if self.config.auto_clean: self._remove_scratch_dir() - return work_manager.run() + return result def _initialize_basins(self): self._read_basins_from_file('mask_regions.nc') @@ -167,7 +171,6 @@ class EarthDiags(object): Basins().get_available_basins(handler) handler.close() - def _prepare_scratch_dir(self): if self.config.use_ramdisk: self._remove_scratch_dir() @@ -183,6 +186,8 @@ class EarthDiags(object): self.data_manager = CMORManager(self.config) elif self.config.data_adaptor == 'THREDDS': self.data_manager = THREDDSManager(self.config) + elif self.config.data_adaptor == 'OBSRECON': + self.data_manager = ObsReconManager(self.config) self.data_manager.prepare() def clean(self): diff --git a/earthdiagnostics/general/__init__.py b/earthdiagnostics/general/__init__.py index be95992f..34820058 100644 --- a/earthdiagnostics/general/__init__.py +++ b/earthdiagnostics/general/__init__.py @@ -10,3 +10,4 @@ from earthdiagnostics.general.relinkall import RelinkAll from earthdiagnostics.general.simplify_dimensions import SimplifyDimensions from earthdiagnostics.general.select_levels import SelectLevels from earthdiagnostics.general.module import Module +from earthdiagnostics.general.verticalmeanmetersiris import VerticalMeanMetersIris diff --git a/earthdiagnostics/general/verticalmeanmetersiris.py b/earthdiagnostics/general/verticalmeanmetersiris.py new file mode 100644 index 00000000..059f50ac --- /dev/null +++ b/earthdiagnostics/general/verticalmeanmetersiris.py @@ -0,0 +1,130 @@ +# coding=utf-8 +import iris +import iris.analysis +import iris.exceptions + +from earthdiagnostics.box import Box +from earthdiagnostics.diagnostic import Diagnostic, DiagnosticFloatOption, DiagnosticDomainOption, \ + DiagnosticVariableOption, DiagnosticChoiceOption +from earthdiagnostics.utils import Utils, TempFile +from earthdiagnostics.modelingrealm import ModelingRealms + + +class VerticalMeanMetersIris(Diagnostic): + """ + Averages vertically any given variable + + :original author: Virginie Guemas + :contributor: Javier Vegas-Regidor + + :created: February 2012 + :last modified: June 2016 + + :param data_manager: data management object + :type data_manager: DataManager + :param startdate: startdate + :type startdate: str + :param member: member number + :type member: int + :param chunk: chunk's number + :type chunk: int + :param variable: variable to average + :type variable: str + :param box: box used to restrict the vertical mean + :type box: Box + + """ + + alias = 'vmean' + "Diagnostic alias for the configuration file" + + def __init__(self, data_manager, startdate, member, chunk, domain, variable, box, grid_point): + Diagnostic.__init__(self, data_manager) + self.startdate = startdate + self.member = member + self.chunk = chunk + self.domain = domain + self.variable = variable + self.box = box + self.required_vars = [variable] + self.generated_vars = [variable + 'vmean'] + self.grid_point = grid_point + + def __eq__(self, other): + return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ + self.box == other.box and self.variable == other.variable + + def __str__(self): + return 'Vertical mean meters Startdate: {0} Member: {1} Chunk: {2} Variable: {3}:{4} ' \ + 'Box: {5}'.format(self.startdate, self.member, self.chunk, self.domain, self.variable, self.box) + + @classmethod + def generate_jobs(cls, diags, options): + """ + Creates a job for each chunk to compute the diagnostic + + :param diags: Diagnostics manager class + :type diags: Diags + :param options: variable, minimum depth (meters), maximum depth (meters) + :type options: list[str] + :return: + """ + options_available = (DiagnosticVariableOption(), + DiagnosticFloatOption('min_depth', -1), + DiagnosticFloatOption('max_depth', -1), + DiagnosticDomainOption(default_value=ModelingRealms.ocean), + DiagnosticChoiceOption('grid_point', ('T', 'U', 'V'), 'T')) + options = cls.process_options(options, options_available) + + box = Box(True) + if options['min_depth'] >= 0: + box.min_depth = options['min_depth'] + if options['max_depth'] >= 0: + box.max_depth = options['max_depth'] + + job_list = list() + for startdate, member, chunk in diags.config.experiment.get_chunk_list(): + job_list.append(VerticalMeanMetersIris(diags.data_manager, startdate, member, chunk, + options['domain'], options['variable'], box, options['grid_point'])) + return job_list + + def request_data(self): + self.variable_file = self.request_chunk(ModelingRealms.ocean, self.variable, self.startdate, self.member, + self.chunk) + + def declare_data_generated(self): + self.results = self.declare_chunk(self.domain, self.variable + 'vmean', self.startdate, self.member, + self.chunk, box=self.box) + + def compute(self): + """ + Runs the diagnostic + """ + iris.FUTURE.netcdf_no_unlimited = True + iris.FUTURE.netcdf_promote = True + + var_cube = iris.load_cube(self.variable_file.local_file) + + lev_names = ('lev', 'depth') + coord = None + for coord_name in lev_names: + try: + coord = var_cube.coord(coord_name) + except iris.exceptions.CoordinateNotFoundError: + pass + + if self.box.min_depth is None: + lev_min = coord.points[0] + else: + lev_min = self.box.min_depth + + if self.box.max_depth is None: + lev_max = coord.points[-1] + else: + lev_max = self.box.max_depth + var_cube = var_cube.extract(iris.Constraint(coord_values={coord.var_name: + lambda cell: lev_min <= cell <= lev_max})) + var_cube = var_cube.collapsed(coord, iris.analysis.MEAN) + temp = TempFile.get() + iris.save(var_cube, temp, zlib=True) + self.results.set_local_file(temp, rename_var=var_cube.var_name) diff --git a/earthdiagnostics/obsreconmanager.py b/earthdiagnostics/obsreconmanager.py new file mode 100644 index 00000000..acc9a9d1 --- /dev/null +++ b/earthdiagnostics/obsreconmanager.py @@ -0,0 +1,293 @@ +# coding=utf-8 +import os + +from bscearth.utils.date import parse_date, add_months, chunk_start_date, chunk_end_date + +from earthdiagnostics.datafile import NetCDFFile, StorageStatus, LocalStatus +from earthdiagnostics.datamanager import DataManager +from earthdiagnostics.utils import TempFile, Utils +from datetime import datetime + +from earthdiagnostics.variable_type import VariableType + + +class ObsReconManager(DataManager): + """ + Data manager class for CMORized experiments + """ + def __init__(self, config): + super(ObsReconManager, self).__init__(config) + data_folders = self.config.data_dir.split(':') + self.config.data_dir = None + for data_folder in data_folders: + if os.path.isdir(os.path.join(data_folder, self.config.data_type, self.experiment.institute.lower(), + self.experiment.model.lower())): + self.config.data_dir = data_folder + break + + if not self.config.data_dir: + raise Exception('Can not find model data') + + if self.config.data_type in ('obs', 'recon') and self.experiment.chunk_size != 1: + raise Exception('For obs and recon data chunk_size must be always 1') + + # noinspection PyUnusedLocal + def get_leadtimes(self, domain, variable, startdate, member, leadtimes, frequency=None, vartype=VariableType.MEAN): + + aggregation_path = self.get_var_url(variable, startdate, frequency, None, vartype) + startdate = parse_date(startdate) + start_chunk = chunk_start_date(startdate, self.experiment.num_chunks, self.experiment.chunk_size, + 'month', self.experiment.calendar) + end_chunk = chunk_end_date(start_chunk, self.experiment.chunk_size, 'month', self.experiment.calendar) + + thredds_subset = THREDDSSubset(aggregation_path, "", variable, startdate, end_chunk) + selected_months = ','.join([str(add_months(startdate, i, self.experiment.calendar).month) for i in leadtimes]) + temp = TempFile.get() + if self.config.data_type == 'exp': + select_months = '-selmonth,{0} {1}'.format(selected_months, thredds_subset) + selected_years = ','.join([str(add_months(startdate, i, self.experiment.calendar).year) for i in leadtimes]) + Utils.cdo.selyear(selected_years, input=select_months, output=temp) + else: + Utils.cdo.selmonth(selected_months, input=thredds_subset, output=temp) + return temp + + # noinspection PyUnusedLocal + def file_exists(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, + vartype=VariableType.MEAN): + """ + Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + + :param domain: CMOR domain + :type domain: str + :param var: variable name + :type var: str + :param startdate: file's startdate + :type startdate: str + :param member: file's member + :type member: int + :param chunk: file's chunk + :type chunk: int + :param grid: file's grid (only needed if it is not the original) + :type grid: str + :param box: file's box (only needed to retrieve sections or averages) + :type box: Box + :param frequency: file's frequency (only needed if it is different from the default) + :type frequency: Frequency + :param vartype: Variable type (mean, statistic) + :type vartype: VariableType + :return: path to the copy created on the scratch folder + :rtype: str + """ + aggregation_path = self.get_var_url(var, startdate, frequency, box, vartype) + + start_chunk = chunk_start_date(parse_date(startdate), chunk, self.experiment.chunk_size, 'month', + self.experiment.calendar) + end_chunk = chunk_end_date(start_chunk, self.experiment.chunk_size, 'month', self.experiment.calendar) + + thredds_subset = THREDDSSubset(aggregation_path, "", var, start_chunk, end_chunk) + return thredds_subset + + def get_file_path(self, startdate, domain, var, frequency, vartype, + box=None, grid=None): + """ + Returns the path to a concrete file + :param startdate: file's startdate + :type startdate: str + :param domain: file's domain + :type domain: str + :param var: file's var + :type var: str + :param frequency: file's frequency + :type frequency: Frequency + :param box: file's box + :type box: Box + :param grid: file's grid + :type grid: str + :return: path to the file + :rtype: str + :param vartype: Variable type (mean, statistic) + :type vartype: VariableType + """ + if not frequency: + frequency = self.config.frequency + var = self._get_final_var_name(box, var) + + folder_path = self._get_folder_path(frequency, domain, var, grid, vartype) + file_name = self._get_file_name(var, startdate) + + filepath = os.path.join(folder_path, file_name) + return filepath + + def _get_folder_path(self, frequency, domain, variable, grid, vartype): + + if self.config.data_type == 'exp': + var_folder = self.get_varfolder(domain, variable, grid) + else: + var_folder = variable + + folder_path = os.path.join(self.config.data_dir, self.config.data_type, + self.experiment.institute.lower(), + self.experiment.model.lower(), + frequency.folder_name(vartype), + var_folder) + return folder_path + + def get_year(self, domain, var, startdate, member, year, grid=None, box=None, vartype=VariableType.MEAN): + """ + Ge a file containing all the data for one year for one variable + :param domain: variable's domain + :type domain: str + :param var: variable's name + :type var: str + :param startdate: startdate to retrieve + :type startdate: str + :param member: member to retrieve + :type member: int + :param year: year to retrieve + :type year: int + :param grid: variable's grid + :type grid: str + :param box: variable's box + :type box: Box + :param vartype: Variable type (mean, statistic) + :type vartype: VariableType + :return: + """ + aggregation_path = self.get_var_url(var, startdate, None, box, vartype) + thredds_subset = THREDDSSubset(aggregation_path, "", var, datetime(year, 1, 1), datetime(year+1, 1, 1)) + return thredds_subset.download() + + def get_var_url(self, var, startdate, frequency, box, vartype): + """ + Get url for dataset + :param var: variable to retrieve + :type var: str + :param startdate: startdate to retrieve + :type startdate: str + :param frequency: frequency to get: + :type frequency: Frequency | None + :param box: box to get + :type box: Box + :param vartype: type of variable + :type vartype: VariableType + :return: + """ + if not frequency: + frequency = self.config.frequency + var = self._get_final_var_name(box, var) + full_path = os.path.join(self.config.data_dir, self.config.data_type, self.experiment.institute, + self.experiment.model, frequency.folder_name(vartype)) + full_path = os.path.join(full_path, var, self._get_file_name(var, startdate)) + return full_path + + def _get_file_name(self, var, startdate): + if startdate: + if self.config.data_type != 'exp': + startdate = startdate[0:6] + return '{0}_{1}.nc'.format(var, startdate) + else: + return '{0}.nc'.format(var) + + def link_file(self, domain, var, cmor_var, startdate, member, chunk=None, grid=None, + frequency=None, year=None, date_str=None, move_old=False, vartype=VariableType.MEAN): + """ + Creates the link of a given file from the CMOR repository. + + :param cmor_var: + :param move_old: + :param date_str: + :param year: if frequency is yearly, this parameter is used to give the corresponding year + :type year: int + :param domain: CMOR domain + :type domain: str + :param var: variable name + :type var: str + :param startdate: file's startdate + :type startdate: str + :param member: file's member + :type member: int + :param chunk: file's chunk + :type chunk: int + :param grid: file's grid (only needed if it is not the original) + :type grid: str + :param frequency: file's frequency (only needed if it is different from the default) + :type frequency: str + :param vartype: Variable type (mean, statistic) + :type vartype: VariableType + :return: path to the copy created on the scratch folder + :rtype: str + """ + # THREDDSManager does not require links + pass + + def request_chunk(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, + vartype=VariableType.MEAN): + """ + Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + + :param vartype: + :param domain: CMOR domain + :type domain: Domain + :param var: variable name + :type var: str + :param startdate: file's startdate + :type startdate: str + :param member: file's member + :type member: int + :param chunk: file's chunk + :type chunk: int + :param grid: file's grid (only needed if it is not the original) + :type grid: str|NoneType + :param box: file's box (only needed to retrieve sections or averages) + :type box: Box + :param frequency: file's frequency (only needed if it is different from the default) + :type frequency: Frequency|NoneType + :return: path to the copy created on the scratch folder + :rtype: str + """ + var = self._get_final_var_name(box, var) + filepath = self.get_file_path(startdate, domain, var, frequency, vartype, grid, box) + return self._get_file_from_storage(filepath) + + def declare_chunk(self, domain, var, startdate, member, chunk, grid=None, region=None, box=None, frequency=None, + vartype=VariableType.MEAN, diagnostic=None): + """ + Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + + :param diagnostic: + :param region: + :param domain: CMOR domain + :type domain: Domain + :param var: variable name + :type var: str + :param startdate: file's startdate + :type startdate: str + :param member: file's member + :type member: int + :param chunk: file's chunk + :type chunk: int + :param grid: file's grid (only needed if it is not the original) + :type grid: str|NoneType + :param box: file's box (only needed to retrieve sections or averages) + :type box: Box + :param frequency: file's frequency (only needed if it is different from the default) + :type frequency: Frequency|NoneType + :param vartype: Variable type (mean, statistic) + :type vartype: VariableType + :return: path to the copy created on the scratch folder + :rtype: str + """ + if not frequency: + frequency = self.config.frequency + original_name = var + cmor_var = self.variable_list.get_variable(var) + if cmor_var: + var = cmor_var.short_name + final_name = var + + filepath = self.get_file_path(startdate, domain, final_name, frequency, vartype, box, grid) + netcdf_file = self._declare_generated_file(filepath, domain, final_name, cmor_var, self.config.data_convention, + region, diagnostic, grid, vartype, original_name) + netcdf_file.frequency = frequency + return netcdf_file + diff --git a/earthdiagnostics/ocean/verticalmeanmeters.py b/earthdiagnostics/ocean/verticalmeanmeters.py index 31ee0935..0e7ee28f 100644 --- a/earthdiagnostics/ocean/verticalmeanmeters.py +++ b/earthdiagnostics/ocean/verticalmeanmeters.py @@ -82,7 +82,7 @@ class VerticalMeanMeters(Diagnostic): job_list = list() for startdate, member, chunk in diags.config.experiment.get_chunk_list(): job_list.append(VerticalMeanMeters(diags.data_manager, startdate, member, chunk, - options['domain'], options['variable'], box)) + options['domain'], options['variable'], box, options['grid_point'])) return job_list def request_data(self): diff --git a/earthdiagnostics/threddsmanager.py b/earthdiagnostics/threddsmanager.py index df3f21b5..a109cc45 100644 --- a/earthdiagnostics/threddsmanager.py +++ b/earthdiagnostics/threddsmanager.py @@ -351,9 +351,10 @@ class THREDDSSubset(DataFile): try: iris.FUTURE.netcdf_promote = True iris.FUTURE.netcdf_no_unlimited = True - time_constraint = iris.Constraint(time=lambda cell: self.start_time <= cell.point <= self.end_time) + with iris.FUTURE.context(cell_datetime_objects=True): + time_constraint = iris.Constraint(time=lambda cell: self.start_time <= cell.point <= self.end_time) + var_cube = iris.load_cube(self.thredds_path, constraint=time_constraint, callback=self._correct_cube) - var_cube = iris.load_cube(self.thredds_path, constraint=time_constraint, callback=self._correct_cube) if not self.local_file: self.local_file = TempFile.get() iris.save(var_cube, self.local_file, zlib=True) diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index 73545a0a..2b4e63d9 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -193,6 +193,7 @@ class WorkManager(object): Diagnostic.register(Scale) Diagnostic.register(Attribute) Diagnostic.register(Module) + Diagnostic.register(VerticalMeanMetersIris) @staticmethod def _register_ocean_diagnostics(): -- GitLab From 40b532d4214b5fd3b31fac62ed4d0fc2221ec91f Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 20 Jun 2017 10:10:00 +0200 Subject: [PATCH 45/82] vertmean with iris for glorys working --- diags.conf | 2 +- earthdiagnostics/general/verticalmeanmetersiris.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/diags.conf b/diags.conf index 0273120f..da7ca5b7 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = vmean,uo,0,30,,U vmean,vo,0,30,,V +DIAGS = vmean,uo,0,31 vmean,vo,0,31 # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. diff --git a/earthdiagnostics/general/verticalmeanmetersiris.py b/earthdiagnostics/general/verticalmeanmetersiris.py index 059f50ac..70c14334 100644 --- a/earthdiagnostics/general/verticalmeanmetersiris.py +++ b/earthdiagnostics/general/verticalmeanmetersiris.py @@ -72,8 +72,7 @@ class VerticalMeanMetersIris(Diagnostic): options_available = (DiagnosticVariableOption(), DiagnosticFloatOption('min_depth', -1), DiagnosticFloatOption('max_depth', -1), - DiagnosticDomainOption(default_value=ModelingRealms.ocean), - DiagnosticChoiceOption('grid_point', ('T', 'U', 'V'), 'T')) + DiagnosticDomainOption(default_value=ModelingRealms.ocean)) options = cls.process_options(options, options_available) box = Box(True) -- GitLab From 443ec7a2e156920bd47acbb7b52a107f2487879d Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 21 Jun 2017 16:31:30 +0200 Subject: [PATCH 46/82] Created days over percentile diagnostics --- diags.conf | 10 +- earthdiagnostics/obsreconmanager.py | 36 +--- earthdiagnostics/statistics/__init__.py | 1 + .../statistics/climatologicalpercentile.py | 173 +++++++++--------- .../statistics/daysoverpercentile.py | 108 +++++++++++ earthdiagnostics/work_manager.py | 1 + 6 files changed, 213 insertions(+), 116 deletions(-) create mode 100644 earthdiagnostics/statistics/daysoverpercentile.py diff --git a/diags.conf b/diags.conf index da7ca5b7..6100640d 100644 --- a/diags.conf +++ b/diags.conf @@ -6,7 +6,7 @@ SCRATCH_DIR = /scratch/Earth/$USER # Root path for the cmorized data to use DATA_DIR = /esnas:/esarchive # Specify if your data is from an experiment (exp), observation (obs) or reconstructions (recon) -DATA_TYPE = recon +DATA_TYPE = exp # CMORization type to use. Important also for THREDDS as it affects variable name conventions. # Options: SPECS (default), PRIMAVERA, CMIP6 DATA_CONVENTION = SPECS @@ -16,11 +16,11 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = vmean,uo,0,31 vmean,vo,0,31 +DIAGS = climpercent,atmos,sfcWind,2 daysover,atmos,sfcWind,2,0.9 # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. -FREQUENCY = mon +FREQUENCY = 6hr # Path to CDFTOOLS binaries CDFTOOLS_PATH = ~jvegas/CDFTOOLS/bin # If true, copies the mesh files regardless of presence in scratch dir @@ -69,8 +69,8 @@ SERVER_URL = https://earth.bsc.es/thredds [EXPERIMENT] # Experiments parameters as defined in CMOR standard -INSTITUTE = mercator -MODEL = glorys2_v4 +INSTITUTE = ecmwf +MODEL = system4_m1 # Model version: Available versions MODEL_VERSION = Ec3.2_O1L75 # Atmospheric output timestep in hours diff --git a/earthdiagnostics/obsreconmanager.py b/earthdiagnostics/obsreconmanager.py index acc9a9d1..6e729e69 100644 --- a/earthdiagnostics/obsreconmanager.py +++ b/earthdiagnostics/obsreconmanager.py @@ -9,6 +9,7 @@ from earthdiagnostics.utils import TempFile, Utils from datetime import datetime from earthdiagnostics.variable_type import VariableType +from frequency import Frequencies class ObsReconManager(DataManager): @@ -32,24 +33,10 @@ class ObsReconManager(DataManager): raise Exception('For obs and recon data chunk_size must be always 1') # noinspection PyUnusedLocal - def get_leadtimes(self, domain, variable, startdate, member, leadtimes, frequency=None, vartype=VariableType.MEAN): - - aggregation_path = self.get_var_url(variable, startdate, frequency, None, vartype) - startdate = parse_date(startdate) - start_chunk = chunk_start_date(startdate, self.experiment.num_chunks, self.experiment.chunk_size, - 'month', self.experiment.calendar) - end_chunk = chunk_end_date(start_chunk, self.experiment.chunk_size, 'month', self.experiment.calendar) + def request_leadtimes(self, domain, variable, startdate, member, leadtimes, frequency=None, vartype=VariableType.MEAN): + filepath = self.get_file_path(startdate, domain, variable, frequency, vartype) + return self._get_file_from_storage(filepath) - thredds_subset = THREDDSSubset(aggregation_path, "", variable, startdate, end_chunk) - selected_months = ','.join([str(add_months(startdate, i, self.experiment.calendar).month) for i in leadtimes]) - temp = TempFile.get() - if self.config.data_type == 'exp': - select_months = '-selmonth,{0} {1}'.format(selected_months, thredds_subset) - selected_years = ','.join([str(add_months(startdate, i, self.experiment.calendar).year) for i in leadtimes]) - Utils.cdo.selyear(selected_years, input=select_months, output=temp) - else: - Utils.cdo.selmonth(selected_months, input=thredds_subset, output=temp) - return temp # noinspection PyUnusedLocal def file_exists(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, @@ -78,14 +65,7 @@ class ObsReconManager(DataManager): :return: path to the copy created on the scratch folder :rtype: str """ - aggregation_path = self.get_var_url(var, startdate, frequency, box, vartype) - - start_chunk = chunk_start_date(parse_date(startdate), chunk, self.experiment.chunk_size, 'month', - self.experiment.calendar) - end_chunk = chunk_end_date(start_chunk, self.experiment.chunk_size, 'month', self.experiment.calendar) - - thredds_subset = THREDDSSubset(aggregation_path, "", var, start_chunk, end_chunk) - return thredds_subset + return NotImplementedError def get_file_path(self, startdate, domain, var, frequency, vartype, box=None, grid=None): @@ -120,7 +100,7 @@ class ObsReconManager(DataManager): def _get_folder_path(self, frequency, domain, variable, grid, vartype): - if self.config.data_type == 'exp': + if self.config.data_type == 'exp' and not frequency.frequency.endswith('hr'): var_folder = self.get_varfolder(domain, variable, grid) else: var_folder = variable @@ -153,9 +133,7 @@ class ObsReconManager(DataManager): :type vartype: VariableType :return: """ - aggregation_path = self.get_var_url(var, startdate, None, box, vartype) - thredds_subset = THREDDSSubset(aggregation_path, "", var, datetime(year, 1, 1), datetime(year+1, 1, 1)) - return thredds_subset.download() + raise NotImplementedError() def get_var_url(self, var, startdate, frequency, box, vartype): """ diff --git a/earthdiagnostics/statistics/__init__.py b/earthdiagnostics/statistics/__init__.py index 4ec6fc47..8f29b84a 100644 --- a/earthdiagnostics/statistics/__init__.py +++ b/earthdiagnostics/statistics/__init__.py @@ -1,3 +1,4 @@ # coding=utf-8 from monthlypercentile import MonthlyPercentile from climatologicalpercentile import ClimatologicalPercentile +from daysoverpercentile import DaysOverPercentile diff --git a/earthdiagnostics/statistics/climatologicalpercentile.py b/earthdiagnostics/statistics/climatologicalpercentile.py index 3720ded3..3ac87e28 100644 --- a/earthdiagnostics/statistics/climatologicalpercentile.py +++ b/earthdiagnostics/statistics/climatologicalpercentile.py @@ -1,4 +1,5 @@ # coding=utf-8 +from bscearth.utils.date import add_months, parse_date from bscearth.utils.log import Log from earthdiagnostics.diagnostic import Diagnostic, DiagnosticVariableOption, DiagnosticDomainOption, \ @@ -7,11 +8,16 @@ from earthdiagnostics.frequency import Frequencies from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.variable_type import VariableType import numpy as np +import iris +import iris.coord_categorisation +from iris.time import PartialDateTime +import iris.exceptions +import iris.coords class ClimatologicalPercentile(Diagnostic): """ - Calculates the climatological percentiles for the given leadtimes + Calculates the climatological percentiles for the given leadtime :param data_manager: data management object :type data_manager: DataManager @@ -24,16 +30,15 @@ class ClimatologicalPercentile(Diagnostic): alias = 'climpercent' "Diagnostic alias for the configuration file" - def __init__(self, data_manager, domain, variable, leadtimes, num_bins, experiment_config): + def __init__(self, data_manager, domain, variable, num_bins, experiment_config): Diagnostic.__init__(self, data_manager) self.variable = variable self.domain = domain - self.leadtimes = leadtimes self.experiment_config = experiment_config self.realizations = None - self.lat_len = None - self.lon_len = None + self.lat_coord = None + self.lon_coord = None self.num_bins = num_bins self._bins = None self.percentiles = np.array([0.1, 0.25, 0.33, 0.5, 0.66, 0.75, 0.9]) @@ -48,11 +53,11 @@ class ClimatologicalPercentile(Diagnostic): self.check_limit_values = True def __eq__(self, other): - return self.domain == other.domain and self.variable == other.variable and self.leadtimes == other.leadtimes + return self.domain == other.domain and self.variable == other.variable and self.leadtime == other.leadtime def __str__(self): - return 'Climatological percentile Variable: {0}:{1} Leadtimes: {2} ' \ - 'Bins: {3}'.format(self.domain, self.variable, self.leadtimes, self.num_bins) + return 'Climatological percentile Variable: {0}:{1}' \ + 'Bins: {3}'.format(self.domain, self.variable, self.leadtime, self.num_bins) @classmethod def generate_jobs(cls, diags, options): @@ -67,55 +72,75 @@ class ClimatologicalPercentile(Diagnostic): """ options_available = (DiagnosticDomainOption(), DiagnosticVariableOption(), - DiagnosticListIntOption('leadtimes'), DiagnosticIntOption('bins', 2000)) options = cls.process_options(options, options_available) job_list = list() job_list.append(ClimatologicalPercentile(diags.data_manager, options['domain'], options['variable'], - options['leadtimes'], options['bins'], - diags.config.experiment)) + options['bins'], diags.config.experiment)) return job_list def request_data(self): - pass + self.leadtime_files = {} + for startdate, member in self.experiment_config.get_member_list(): + if startdate not in self.leadtime_files: + self.leadtime_files[startdate] = {} + Log.debug('Retrieving startdate {0}', startdate) + self.leadtime_files[startdate][member] = self.request_chunk(self.domain, self.variable, startdate, + member, 1) def declare_data_generated(self): - self.percentiles_file = self.declare_chunk(self.domain, self.variable + '_percentiles', None, None, None, + var_name = self.variable + 'prct' + self.percentiles_file = self.declare_chunk(self.domain, var_name, None, None, None, frequency=Frequencies.climatology, vartype=VariableType.STATISTIC) def compute(self): """ Runs the diagnostic """ - member_files = self._get_data() - - distribution = self._get_distribution(member_files) - + iris.FUTURE.netcdf_promote = True + for startdate in self.leadtime_files.keys(): + Log.debug('Getting data for startdate {0}', startdate) + for member in self.leadtime_files[startdate].keys(): + data_cube = self._load_cube(startdate, member) + self._get_value_interval(data_cube) + self._get_realizations_present(data_cube) + if self.lat_coord is None: + self.units = data_cube.units + self.lat_coord = data_cube.coord('latitude') + self.lon_coord = data_cube.coord('longitude') + + distribution = self._get_distribution() percentile_values = self._calculate_percentiles(distribution) - self._save_results(percentile_values) - def _save_results(self, percentile_values): - temp = TempFile.get() - handler = Utils.openCdf(temp, 'w') + def _load_cube(self, startdate, member): + date = parse_date(startdate) + reference = PartialDateTime(year=date.year, month=date.month) + handler = Utils.openCdf(self.leadtime_files[startdate][member].local_file) + if 'realization' in handler.variables: + handler.variables[self.variable].coordinates = 'realization' + handler.close() + data_cube = iris.load_cube(self.leadtime_files[startdate][member].local_file) - handler.createDimension('percentile', len(self.percentiles)) - percentile_var = handler.createVariable('percentile', float, ('percentile',)) - percentile_var[:] = self.percentiles + def assign_leadtime(coord, x): + date = coord.units.num2date(x) + return PartialDateTime(date.year, date.month) - reference + iris.coord_categorisation.add_categorised_coord(data_cube, 'leadtime', 'time', assign_leadtime) + return data_cube - handler.createDimension('lat', self.lat.size) - lat_var = handler.createVariable('lat', float, ('lat',)) - lat_var[:] = self.lat + def _save_results(self, percentile_values): + temp = TempFile.get() - handler.createDimension('lon', self.lon.size) - lon_var = handler.createVariable('lon', float, ('lon',)) - lon_var[:] = self.lon + results = iris.cube.Cube(percentile_values, var_name='percent', units=self.units) + percentile_coord = iris.coords.DimCoord(self.percentiles, long_name='percentile') - percentile_var = handler.createVariable('percent', float, ('percentile', 'lat', 'lon')) - percentile_var[...] = percentile_values + results.add_dim_coord(percentile_coord, 0) + results.add_dim_coord(self.lat_coord, 1) + results.add_dim_coord(self.lon_coord, 2) + iris.FUTURE.netcdf_no_unlimited = True + iris.save(results, temp, zlib=True) - handler.close() self.percentiles_file.set_local_file(temp, rename_var='percent') def _calculate_percentiles(self, distribution): @@ -131,79 +156,63 @@ class ClimatologicalPercentile(Diagnostic): distribution = np.apply_along_axis(calculate, 0, distribution) return distribution - def _get_distribution(self, member_files): - distribution = None - for memberfile in member_files: - Log.debug('Discretizing file {0}', memberfile) - handler = Utils.openCdf(memberfile) - for realization in range(self.realizations): - if distribution is None: - distribution = self._calculate_distribution(handler, realization) - else: - distribution += self._calculate_distribution(handler, realization) - handler.close() - return distribution - - def _get_data(self): - member_files = list() - for startdate, member in self.experiment_config.get_member_list(): - Log.debug('Retrieving startdate {0}', startdate) - memberfile = self.data_manager.get_leadtimes(self.domain, self.variable, startdate, member, self.leadtimes) - + def _get_distribution(self): + distribution = {} + for startdate in self.leadtime_files.keys(): Log.debug('Getting data for startdate {0}', startdate) - handler = Utils.openCdf(memberfile) - self._get_value_interval(handler) - self._get_realizations_present(handler) - self._get_var_size(handler) - handler.close() - - member_files.append(memberfile) - return member_files + for member in self.leadtime_files[startdate].keys(): + data_cube = self._load_cube(startdate, member) + Log.debug('Discretizing file {0}', data_cube) + for leadtime in data_cube.coord(leadtime).points: + leadtime_cube = data_cube.extract(iris.Constraint(leadtime=leadtime)) + for realization in range(self.realizations): + Log.debug('Discretizing realization {0}', realization) + try: + realization_cube = leadtime_cube.extract(iris.Constraint(realization=realization+1)) + except iris.exceptions.CoordinateNotFoundError: + realization_cube = data_cube + if distribution is None: + distribution[leadtime] = self._calculate_distribution(realization_cube) + else: + distribution[leadtime] += self._calculate_distribution(realization_cube) + return distribution - def _get_realizations_present(self, handler): + def _get_realizations_present(self, data_cube): realizations = 1 - if 'realization' in handler.dimensions: - realizations = handler.dimensions['realization'].size - if 'ensemble' in handler.dimensions: - realizations = handler.dimensions['ensemble'].size + ensemble_dimensions = ('realization', 'ensemble') + for ensemble_dimension in ensemble_dimensions: + try: + realizations = data_cube.coord(ensemble_dimension).shape[0] + break + except iris.exceptions.CoordinateNotFoundError: + pass + if self.realizations is None: self.realizations = realizations if realizations != self.realizations: self.realizations = min(self.realizations, realizations) Log.warning('Different number of realizations in the data used by diagnostic {0}', self) - def _get_value_interval(self, handler): + def _get_value_interval(self, data_cube): if not self.check_limit_values: return - values = handler.variables[self.variable][:] - file_max = np.amax(values) - file_min = np.amin(values) + file_max = np.amax(data_cube.data) + file_min = np.amin(data_cube.data) self.max_value = max(self.min_value, file_max) if self.min_value is None: self.min_value = file_min else: self.min_value = min(self.min_value, file_min) - def _calculate_distribution(self, handler, realization): - Log.debug('Discretizing realization {0}', realization) - + def _calculate_distribution(self, data_cube): def calculate_histogram(time_series): histogram, self._bins = np.histogram(time_series, bins=self.num_bins, range=(self.min_value, self.max_value)) return histogram - var = handler.variables[self.variable] - if 'realization' in var.dimensions or 'ensemble' in var.dimensions: - return np.apply_along_axis(calculate_histogram, 0, var[:, realization, ...]) - else: - return np.apply_along_axis(calculate_histogram, 0, var[:]) + return np.apply_along_axis(calculate_histogram, 0, data_cube.data) - def _get_var_size(self, handler): - if self.lat_len is not None: - return - self.lat = handler.variables['latitude'][:] - self.lon = handler.variables['longitude'][:] diff --git a/earthdiagnostics/statistics/daysoverpercentile.py b/earthdiagnostics/statistics/daysoverpercentile.py new file mode 100644 index 00000000..0e086c28 --- /dev/null +++ b/earthdiagnostics/statistics/daysoverpercentile.py @@ -0,0 +1,108 @@ +# coding=utf-8 +from bscearth.utils.date import parse_date, add_months + +from earthdiagnostics.diagnostic import * +from earthdiagnostics.frequency import Frequencies +import iris +import iris.coord_categorisation +import iris.time + + +class DaysOverPercentile(Diagnostic): + """ + Calculates the montlhy percentiles + + :param data_manager: data management object + :type data_manager: DataManager + :param startdate: startdate + :type startdate: str + :param member: member number + :type member: int + :param chunk: chunk's number + :type chunk: int + :param variable: variable to average + :type variable: str + """ + + alias = 'daysover' + "Diagnostic alias for the configuration file" + + def __init__(self, data_manager, startdate, member, chunk, domain, variable, leadtime, percentile): + Diagnostic.__init__(self, data_manager) + self.startdate = startdate + self.member = member + self.chunk = chunk + self.variable = variable + self.domain = domain + self.leadtime = leadtime + self.percentile = percentile + + def __eq__(self, other): + return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ + self.domain == other.domain and self.variable == other.variable and self.leadtime == other.leadtime and \ + self.percentile == other.percentile + + def __str__(self): + return 'Days over percentile Startdate: {0} Member: {1} Chunk: {2} ' \ + 'Variable: {3}:{4} Leadtime: {5}'.format(self.startdate, self.member, self.chunk, self.domain, + self.variable, self.leadtime) + + @classmethod + def generate_jobs(cls, diags, options): + """ + Creates a job for each chunk to compute the diagnostic + + :param diags: Diagnostics manager class + :type diags: Diags + :param options: domain, variable, percentil number, maximum depth (level) + :type options: list[str] + :return: + """ + options_available = (DiagnosticDomainOption(), + DiagnosticOption('variable'), + DiagnosticListIntOption('leadtimes'), + DiagnosticFloatOption('percentile')) + options = cls.process_options(options, options_available) + + job_list = list() + for startdate, member, chunk in diags.config.experiment.get_chunk_list(): + for leadtime in options['leadtimes']: + job_list.append(DaysOverPercentile(diags.data_manager, startdate, member, chunk, + options['domain'], options['variable'], leadtime, + options['percentile'])) + return job_list + + def request_data(self): + var_name = self.variable + 'prct' + self.percentiles_file = self.request_chunk(self.domain, var_name, None, None, None, + frequency=Frequencies.climatology) + + self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk) + + def declare_data_generated(self): + var_name = self.variable + '_daysover'.format(self.leadtime) + self.percentiles_file = self.declare_chunk(self.domain, var_name, None, None, None, + frequency=Frequencies.climatology, vartype=VariableType.STATISTIC) + + def compute(self): + """ + Runs the diagnostic + """ + percentiles = iris.load_cube(self.percentiles_file.local_file) + percentile = percentiles.extract(iris.Constraint(percentile=self.percentile)) + + var = iris.load_cube(self.variable_file.local_file) + + date = add_months(parse_date(self.startdate), self.leadtime, 'standard') + leadtime = iris.time.PartialDateTime(date.year, date.month) + + iris.coord_categorisation.add_categorised_coord(var, 'leadtime', 'time', + lambda coord, x: coord.units.num2date(x) == leadtime) + + var = var.extract(iris.Constraint(leadtime=True)) + + days_over = var > percentile + print(days_over) + + + diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index 2b4e63d9..cc1cb4aa 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -181,6 +181,7 @@ class WorkManager(object): def _register_stats_diagnostics(): Diagnostic.register(MonthlyPercentile) Diagnostic.register(ClimatologicalPercentile) + Diagnostic.register(DaysOverPercentile) @staticmethod def _register_general_diagnostics(): -- GitLab From 0be4b8993e61fa9df1b8bd1b4cd856ddc03e1901 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 27 Jun 2017 11:58:12 +0200 Subject: [PATCH 47/82] Climpercent ported to iris --- diags.conf | 3 +- .../general/simplify_dimensions.py | 2 +- .../statistics/climatologicalpercentile.py | 82 ++++++++++++------- .../statistics/daysoverpercentile.py | 37 ++++++--- earthdiagnostics/work_manager.py | 7 +- 5 files changed, 84 insertions(+), 47 deletions(-) diff --git a/diags.conf b/diags.conf index 6100640d..d03aed9d 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,8 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = climpercent,atmos,sfcWind,2 daysover,atmos,sfcWind,2,0.9 +# DIAGS = climpercent,atmos,sfcWind +DIAGS = daysover,atmos,sfcWind # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. diff --git a/earthdiagnostics/general/simplify_dimensions.py b/earthdiagnostics/general/simplify_dimensions.py index 30169ca0..35421130 100644 --- a/earthdiagnostics/general/simplify_dimensions.py +++ b/earthdiagnostics/general/simplify_dimensions.py @@ -86,7 +86,7 @@ class SimplifyDimensions(Diagnostic): """ Runs the diagnostic """ - handler = Utils.openCdf(self.variable_file) + handler = Utils.openCdf(self.variable_file.local_file) if 'i' not in handler.dimensions: raise Exception('Variable {0.domain}:{0.variable} does not have i,j dimensions'.format(self)) lat = handler.variables['lat'] diff --git a/earthdiagnostics/statistics/climatologicalpercentile.py b/earthdiagnostics/statistics/climatologicalpercentile.py index 3ac87e28..39594902 100644 --- a/earthdiagnostics/statistics/climatologicalpercentile.py +++ b/earthdiagnostics/statistics/climatologicalpercentile.py @@ -1,9 +1,8 @@ # coding=utf-8 -from bscearth.utils.date import add_months, parse_date +from bscearth.utils.date import parse_date, add_months from bscearth.utils.log import Log -from earthdiagnostics.diagnostic import Diagnostic, DiagnosticVariableOption, DiagnosticDomainOption, \ - DiagnosticListIntOption, DiagnosticIntOption +from earthdiagnostics.diagnostic import Diagnostic, DiagnosticVariableOption, DiagnosticDomainOption, DiagnosticIntOption from earthdiagnostics.frequency import Frequencies from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.variable_type import VariableType @@ -53,11 +52,11 @@ class ClimatologicalPercentile(Diagnostic): self.check_limit_values = True def __eq__(self, other): - return self.domain == other.domain and self.variable == other.variable and self.leadtime == other.leadtime + return self.domain == other.domain and self.variable == other.variable and self.num_bins == other.num_bins def __str__(self): - return 'Climatological percentile Variable: {0}:{1}' \ - 'Bins: {3}'.format(self.domain, self.variable, self.leadtime, self.num_bins) + return 'Climatological percentile Variable: {0}:{1} ' \ + 'Bins: {2}'.format(self.domain, self.variable, self.num_bins) @classmethod def generate_jobs(cls, diags, options): @@ -115,46 +114,63 @@ class ClimatologicalPercentile(Diagnostic): self._save_results(percentile_values) def _load_cube(self, startdate, member): - date = parse_date(startdate) - reference = PartialDateTime(year=date.year, month=date.month) + handler = Utils.openCdf(self.leadtime_files[startdate][member].local_file) if 'realization' in handler.variables: handler.variables[self.variable].coordinates = 'realization' handler.close() data_cube = iris.load_cube(self.leadtime_files[startdate][member].local_file) + date = parse_date(startdate) + leadtimes = {0: PartialDateTime(date.year, date.month)} def assign_leadtime(coord, x): - date = coord.units.num2date(x) - return PartialDateTime(date.year, date.month) - reference + try: + leadtime_month = 0 + partial_date = leadtimes[leadtime_month] + while coord.units.num2date(x) != partial_date: + leadtime_month += 1 + try: + partial_date = leadtimes[leadtime_month] + except KeyError: + new_date = add_months(date, leadtime_month, self.experiment_config.calendar) + partial_date = PartialDateTime(new_date.year, new_date.month) + leadtimes[leadtime_month] = partial_date + return leadtime_month + except Exception as ex: + pass + iris.coord_categorisation.add_categorised_coord(data_cube, 'leadtime', 'time', assign_leadtime) return data_cube def _save_results(self, percentile_values): temp = TempFile.get() - - results = iris.cube.Cube(percentile_values, var_name='percent', units=self.units) percentile_coord = iris.coords.DimCoord(self.percentiles, long_name='percentile') - - results.add_dim_coord(percentile_coord, 0) - results.add_dim_coord(self.lat_coord, 1) - results.add_dim_coord(self.lon_coord, 2) + results = iris.cube.CubeList() + for leadtime in percentile_values.keys(): + result = iris.cube.Cube(percentile_values[leadtime], var_name='percent', units=self.units) + result.add_dim_coord(percentile_coord, 0) + result.add_dim_coord(self.lat_coord, 1) + result.add_dim_coord(self.lon_coord, 2) + result.add_aux_coord(iris.coords.AuxCoord(np.int8(leadtime), long_name='leadtime')) + results.append(result) iris.FUTURE.netcdf_no_unlimited = True - iris.save(results, temp, zlib=True) + iris.save(results.merge_cube(), temp, zlib=True) self.percentiles_file.set_local_file(temp, rename_var='percent') def _calculate_percentiles(self, distribution): Log.debug('Calculating percentiles') - + percentiles = {} def calculate(point_distribution): cs = np.cumsum(point_distribution) total = cs[-1] percentile_values = self.percentiles * total index = np.searchsorted(cs, percentile_values) return [(self._bins[i + 1] + self._bins[i]) / 2 for i in index] - - distribution = np.apply_along_axis(calculate, 0, distribution) - return distribution + for leadtime, dist in distribution.iteritems(): + Log.debug('Calculating leadtime {0}', leadtime) + percentiles[leadtime] = np.apply_along_axis(calculate, 0, dist) + return percentiles def _get_distribution(self): distribution = {} @@ -163,7 +179,9 @@ class ClimatologicalPercentile(Diagnostic): for member in self.leadtime_files[startdate].keys(): data_cube = self._load_cube(startdate, member) Log.debug('Discretizing file {0}', data_cube) - for leadtime in data_cube.coord(leadtime).points: + + for leadtime in set(data_cube.coord('leadtime').points): + Log.debug('Discretizing leadtime {0}', leadtime) leadtime_cube = data_cube.extract(iris.Constraint(leadtime=leadtime)) for realization in range(self.realizations): Log.debug('Discretizing realization {0}', realization) @@ -171,13 +189,15 @@ class ClimatologicalPercentile(Diagnostic): realization_cube = leadtime_cube.extract(iris.Constraint(realization=realization+1)) except iris.exceptions.CoordinateNotFoundError: realization_cube = data_cube - if distribution is None: + if leadtime not in distribution: distribution[leadtime] = self._calculate_distribution(realization_cube) else: distribution[leadtime] += self._calculate_distribution(realization_cube) return distribution def _get_realizations_present(self, data_cube): + self.realizations = 1 + return realizations = 1 ensemble_dimensions = ('realization', 'ensemble') for ensemble_dimension in ensemble_dimensions: @@ -196,14 +216,14 @@ class ClimatologicalPercentile(Diagnostic): def _get_value_interval(self, data_cube): if not self.check_limit_values: return - - file_max = np.amax(data_cube.data) - file_min = np.amin(data_cube.data) - self.max_value = max(self.min_value, file_max) - if self.min_value is None: - self.min_value = file_min - else: - self.min_value = min(self.min_value, file_min) + for time_slice in data_cube.slices_over('time'): + file_max = np.amax(time_slice.data) + file_min = np.amin(time_slice.data) + self.max_value = max(self.min_value, file_max) + if self.min_value is None: + self.min_value = file_min + else: + self.min_value = min(self.min_value, file_min) def _calculate_distribution(self, data_cube): def calculate_histogram(time_series): diff --git a/earthdiagnostics/statistics/daysoverpercentile.py b/earthdiagnostics/statistics/daysoverpercentile.py index 0e086c28..ba3d71a1 100644 --- a/earthdiagnostics/statistics/daysoverpercentile.py +++ b/earthdiagnostics/statistics/daysoverpercentile.py @@ -5,7 +5,8 @@ from earthdiagnostics.diagnostic import * from earthdiagnostics.frequency import Frequencies import iris import iris.coord_categorisation -import iris.time +from iris.time import PartialDateTime +import iris.analysis class DaysOverPercentile(Diagnostic): @@ -89,20 +90,34 @@ class DaysOverPercentile(Diagnostic): Runs the diagnostic """ percentiles = iris.load_cube(self.percentiles_file.local_file) - percentile = percentiles.extract(iris.Constraint(percentile=self.percentile)) var = iris.load_cube(self.variable_file.local_file) - date = add_months(parse_date(self.startdate), self.leadtime, 'standard') - leadtime = iris.time.PartialDateTime(date.year, date.month) + date = parse_date(self.startdate) + leadtimes = {0: PartialDateTime(date.year, date.month)} + + def assign_leadtime(coord, x): + try: + leadtime_month = 0 + partial_date = leadtimes[leadtime_month] + while coord.units.num2date(x) != partial_date: + leadtime_month += 1 + try: + partial_date = leadtimes[leadtime_month] + except KeyError: + new_date = add_months(date, leadtime_month, self.experiment_config.calendar) + partial_date = PartialDateTime(new_date.year, new_date.month) + leadtimes[leadtime_month] = partial_date + return leadtime_month + except Exception as ex: + pass + + for leadtime_slice in var.slices_over('leadtime'): + percentiles_leadtime = percentiles.extract(iris.Constraint(leadtime=leadtime_slice.coord('leadtime').points[0])) + for percentile_slice in percentiles_leadtime.slices_over('percentile'): + over = leadtime_slice > percentile_slice + print(over) - iris.coord_categorisation.add_categorised_coord(var, 'leadtime', 'time', - lambda coord, x: coord.units.num2date(x) == leadtime) - - var = var.extract(iris.Constraint(leadtime=True)) - - days_over = var > percentile - print(days_over) diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index cc1cb4aa..6f118d5c 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -152,10 +152,10 @@ class WorkManager(object): @staticmethod def _run_job(job): - Log.info('Starting {0}', job) - job.status = DiagnosticStatus.RUNNING - time = datetime.datetime.now() try: + Log.info('Starting {0}', job) + job.status = DiagnosticStatus.RUNNING + time = datetime.datetime.now() job.compute() except Exception as ex: job.consumed_time = datetime.datetime.now() - time @@ -195,6 +195,7 @@ class WorkManager(object): Diagnostic.register(Attribute) Diagnostic.register(Module) Diagnostic.register(VerticalMeanMetersIris) + Diagnostic.register(SimplifyDimensions) @staticmethod def _register_ocean_diagnostics(): -- GitLab From 74c42c6991e51fe515a8b0165e15074d5734d9c4 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 28 Jun 2017 17:46:36 +0200 Subject: [PATCH 48/82] Days over finished --- diags.conf | 3 +- .../statistics/climatologicalpercentile.py | 100 +++++++++-------- .../statistics/daysoverpercentile.py | 105 +++++++++++++----- 3 files changed, 133 insertions(+), 75 deletions(-) diff --git a/diags.conf b/diags.conf index d03aed9d..cf1c04ed 100644 --- a/diags.conf +++ b/diags.conf @@ -16,8 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -# DIAGS = climpercent,atmos,sfcWind -DIAGS = daysover,atmos,sfcWind +DIAGS = climpercent,atmos,sfcWind,2000,2000,1 daysover,atmos,sfcWind,2000,2000,1 # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. diff --git a/earthdiagnostics/statistics/climatologicalpercentile.py b/earthdiagnostics/statistics/climatologicalpercentile.py index 39594902..87b75e21 100644 --- a/earthdiagnostics/statistics/climatologicalpercentile.py +++ b/earthdiagnostics/statistics/climatologicalpercentile.py @@ -29,7 +29,8 @@ class ClimatologicalPercentile(Diagnostic): alias = 'climpercent' "Diagnostic alias for the configuration file" - def __init__(self, data_manager, domain, variable, num_bins, experiment_config): + def __init__(self, data_manager, domain, variable, num_bins, start_year, end_year, forecast_month, + experiment_config): Diagnostic.__init__(self, data_manager) self.variable = variable self.domain = domain @@ -40,6 +41,9 @@ class ClimatologicalPercentile(Diagnostic): self.lon_coord = None self.num_bins = num_bins self._bins = None + self.start_year = start_year + self.end_year = end_year + self.forecast_month = forecast_month self.percentiles = np.array([0.1, 0.25, 0.33, 0.5, 0.66, 0.75, 0.9]) self.cmor_var = data_manager.variable_list.get_variable(variable, silent=True) if self.cmor_var and self.cmor_var.valid_max and self.cmor_var.valid_min: @@ -71,25 +75,33 @@ class ClimatologicalPercentile(Diagnostic): """ options_available = (DiagnosticDomainOption(), DiagnosticVariableOption(), - DiagnosticIntOption('bins', 2000)) + DiagnosticIntOption('start_year'), + DiagnosticIntOption('end_year'), + DiagnosticIntOption('forecast_month'), + DiagnosticIntOption('bins', 2000), + ) options = cls.process_options(options, options_available) job_list = list() job_list.append(ClimatologicalPercentile(diags.data_manager, options['domain'], options['variable'], - options['bins'], diags.config.experiment)) + options['bins'], options['start_year'], options['end_year'], + options['forecast_month'], + diags.config.experiment)) return job_list + def requested_startdates(self): + return ['{0}{1:02}01'.format(year, self.forecast_month) for year in range(self.start_year, self.end_year+1)] + def request_data(self): self.leadtime_files = {} - for startdate, member in self.experiment_config.get_member_list(): + for startdate in self.requested_startdates(): if startdate not in self.leadtime_files: self.leadtime_files[startdate] = {} Log.debug('Retrieving startdate {0}', startdate) - self.leadtime_files[startdate][member] = self.request_chunk(self.domain, self.variable, startdate, - member, 1) + self.leadtime_files[startdate] = self.request_chunk(self.domain, self.variable, startdate, None, None) def declare_data_generated(self): - var_name = self.variable + 'prct' + var_name = '{0.variable}prct{0.start_year}{0.forecast_month}-{0.end_year}{0.forecast_month}'.format(self) self.percentiles_file = self.declare_chunk(self.domain, var_name, None, None, None, frequency=Frequencies.climatology, vartype=VariableType.STATISTIC) @@ -100,44 +112,45 @@ class ClimatologicalPercentile(Diagnostic): iris.FUTURE.netcdf_promote = True for startdate in self.leadtime_files.keys(): Log.debug('Getting data for startdate {0}', startdate) - for member in self.leadtime_files[startdate].keys(): - data_cube = self._load_cube(startdate, member) - self._get_value_interval(data_cube) - self._get_realizations_present(data_cube) - if self.lat_coord is None: - self.units = data_cube.units - self.lat_coord = data_cube.coord('latitude') - self.lon_coord = data_cube.coord('longitude') + data_cube = self._load_cube(startdate) + self._get_value_interval(data_cube) + self._get_realizations_present(data_cube) + if self.lat_coord is None: + self.units = data_cube.units + self.lat_coord = data_cube.coord('latitude') + self.lon_coord = data_cube.coord('longitude') distribution = self._get_distribution() percentile_values = self._calculate_percentiles(distribution) self._save_results(percentile_values) - def _load_cube(self, startdate, member): + def _load_cube(self, startdate): - handler = Utils.openCdf(self.leadtime_files[startdate][member].local_file) + handler = Utils.openCdf(self.leadtime_files[startdate].local_file) if 'realization' in handler.variables: handler.variables[self.variable].coordinates = 'realization' handler.close() - data_cube = iris.load_cube(self.leadtime_files[startdate][member].local_file) + data_cube = iris.load_cube(self.leadtime_files[startdate].local_file) date = parse_date(startdate) - leadtimes = {0: PartialDateTime(date.year, date.month)} + lead_date = add_months(date, 1, self.data_manager.config.experiment.calendar) + leadtimes = {1: PartialDateTime(lead_date.year, lead_date.month, lead_date.day)} + def assign_leadtime(coord, x): try: - leadtime_month = 0 + leadtime_month = 1 partial_date = leadtimes[leadtime_month] - while coord.units.num2date(x) != partial_date: + while coord.units.num2date(x) >= partial_date: leadtime_month += 1 try: partial_date = leadtimes[leadtime_month] except KeyError: - new_date = add_months(date, leadtime_month, self.experiment_config.calendar) - partial_date = PartialDateTime(new_date.year, new_date.month) + new_date = add_months(date, leadtime_month, self.data_manager.config.experiment.calendar) + partial_date = PartialDateTime(new_date.year, new_date.month, new_date.day) leadtimes[leadtime_month] = partial_date return leadtime_month - except Exception as ex: - pass + except Exception: + return -1 iris.coord_categorisation.add_categorised_coord(data_cube, 'leadtime', 'time', assign_leadtime) return data_cube @@ -174,30 +187,27 @@ class ClimatologicalPercentile(Diagnostic): def _get_distribution(self): distribution = {} - for startdate in self.leadtime_files.keys(): + for startdate in self.leadtime_files: Log.debug('Getting data for startdate {0}', startdate) - for member in self.leadtime_files[startdate].keys(): - data_cube = self._load_cube(startdate, member) - Log.debug('Discretizing file {0}', data_cube) - - for leadtime in set(data_cube.coord('leadtime').points): - Log.debug('Discretizing leadtime {0}', leadtime) - leadtime_cube = data_cube.extract(iris.Constraint(leadtime=leadtime)) - for realization in range(self.realizations): - Log.debug('Discretizing realization {0}', realization) - try: - realization_cube = leadtime_cube.extract(iris.Constraint(realization=realization+1)) - except iris.exceptions.CoordinateNotFoundError: - realization_cube = data_cube - if leadtime not in distribution: - distribution[leadtime] = self._calculate_distribution(realization_cube) - else: - distribution[leadtime] += self._calculate_distribution(realization_cube) + data_cube = self._load_cube(startdate) + Log.debug('Discretizing file {0}', data_cube) + + for leadtime in set(data_cube.coord('leadtime').points): + Log.debug('Discretizing leadtime {0}', leadtime) + leadtime_cube = data_cube.extract(iris.Constraint(leadtime=leadtime)) + for realization in range(self.realizations): + Log.debug('Discretizing realization {0}', realization) + try: + realization_cube = leadtime_cube.extract(iris.Constraint(realization=realization+1)) + except iris.exceptions.CoordinateNotFoundError: + realization_cube = data_cube + if leadtime not in distribution: + distribution[leadtime] = self._calculate_distribution(realization_cube) + else: + distribution[leadtime] += self._calculate_distribution(realization_cube) return distribution def _get_realizations_present(self, data_cube): - self.realizations = 1 - return realizations = 1 ensemble_dimensions = ('realization', 'ensemble') for ensemble_dimension in ensemble_dimensions: diff --git a/earthdiagnostics/statistics/daysoverpercentile.py b/earthdiagnostics/statistics/daysoverpercentile.py index ba3d71a1..26318a48 100644 --- a/earthdiagnostics/statistics/daysoverpercentile.py +++ b/earthdiagnostics/statistics/daysoverpercentile.py @@ -7,6 +7,10 @@ import iris import iris.coord_categorisation from iris.time import PartialDateTime import iris.analysis +import iris.coords +import numpy as np + +from earthdiagnostics.utils import Utils, TempFile class DaysOverPercentile(Diagnostic): @@ -28,25 +32,24 @@ class DaysOverPercentile(Diagnostic): alias = 'daysover' "Diagnostic alias for the configuration file" - def __init__(self, data_manager, startdate, member, chunk, domain, variable, leadtime, percentile): + def __init__(self, data_manager, startdate, member, chunk, domain, variable, start_year, end_year, forecast_month): Diagnostic.__init__(self, data_manager) self.startdate = startdate self.member = member self.chunk = chunk self.variable = variable self.domain = domain - self.leadtime = leadtime - self.percentile = percentile + self.start_year = start_year + self.end_year = end_year + self.forecast_month = forecast_month def __eq__(self, other): return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ - self.domain == other.domain and self.variable == other.variable and self.leadtime == other.leadtime and \ - self.percentile == other.percentile + self.domain == other.domain and self.variable == other.variable def __str__(self): return 'Days over percentile Startdate: {0} Member: {1} Chunk: {2} ' \ - 'Variable: {3}:{4} Leadtime: {5}'.format(self.startdate, self.member, self.chunk, self.domain, - self.variable, self.leadtime) + 'Variable: {3}:{4}'.format(self.startdate, self.member, self.chunk, self.domain, self.variable) @classmethod def generate_jobs(cls, diags, options): @@ -61,62 +64,108 @@ class DaysOverPercentile(Diagnostic): """ options_available = (DiagnosticDomainOption(), DiagnosticOption('variable'), - DiagnosticListIntOption('leadtimes'), - DiagnosticFloatOption('percentile')) + DiagnosticIntOption('start_year'), + DiagnosticIntOption('end_year'), + DiagnosticIntOption('forecast_month'),) options = cls.process_options(options, options_available) job_list = list() for startdate, member, chunk in diags.config.experiment.get_chunk_list(): - for leadtime in options['leadtimes']: - job_list.append(DaysOverPercentile(diags.data_manager, startdate, member, chunk, - options['domain'], options['variable'], leadtime, - options['percentile'])) + job_list.append(DaysOverPercentile(diags.data_manager, startdate, member, chunk, + options['domain'], options['variable'], + options['start_year'], options['end_year'], options['forecast_month'],)) return job_list def request_data(self): - var_name = self.variable + 'prct' + var_name = '{0.variable}prct{0.start_year}{0.forecast_month}-{0.end_year}{0.forecast_month}'.format(self) self.percentiles_file = self.request_chunk(self.domain, var_name, None, None, None, frequency=Frequencies.climatology) self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk) def declare_data_generated(self): - var_name = self.variable + '_daysover'.format(self.leadtime) - self.percentiles_file = self.declare_chunk(self.domain, var_name, None, None, None, - frequency=Frequencies.climatology, vartype=VariableType.STATISTIC) + var_over = self.variable + '_daysover' + var_below = self.variable + '_daysbelow' + self.days_over_file = self.declare_chunk(self.domain, var_over, None, None, None, + frequency=Frequencies.monthly, vartype=VariableType.STATISTIC) + + self.days_below_file = self.declare_chunk(self.domain, var_below, None, None, None, + frequency=Frequencies.monthly, vartype=VariableType.STATISTIC) def compute(self): """ Runs the diagnostic """ + iris.FUTURE.netcdf_promote = True percentiles = iris.load_cube(self.percentiles_file.local_file) + handler = Utils.openCdf(self.variable_file.local_file) + if 'realization' in handler.variables: + handler.variables[self.variable].coordinates = 'realization' + handler.close() var = iris.load_cube(self.variable_file.local_file) - date = parse_date(self.startdate) - leadtimes = {0: PartialDateTime(date.year, date.month)} + lead_date = add_months(date, 1, self.data_manager.config.experiment.calendar) + leadtimes = {1: PartialDateTime(lead_date.year, lead_date.month, lead_date.day)} def assign_leadtime(coord, x): try: - leadtime_month = 0 + leadtime_month = 1 partial_date = leadtimes[leadtime_month] - while coord.units.num2date(x) != partial_date: + while coord.units.num2date(x) >= partial_date: leadtime_month += 1 try: partial_date = leadtimes[leadtime_month] except KeyError: - new_date = add_months(date, leadtime_month, self.experiment_config.calendar) - partial_date = PartialDateTime(new_date.year, new_date.month) + new_date = add_months(date, leadtime_month, self.data_manager.config.experiment.calendar) + partial_date = PartialDateTime(new_date.year, new_date.month, new_date.day) leadtimes[leadtime_month] = partial_date return leadtime_month except Exception as ex: pass - - for leadtime_slice in var.slices_over('leadtime'): - percentiles_leadtime = percentiles.extract(iris.Constraint(leadtime=leadtime_slice.coord('leadtime').points[0])) + iris.coord_categorisation.add_categorised_coord(var, 'leadtime', 'time', assign_leadtime) + iris.coord_categorisation.add_year(var, 'time') + iris.coord_categorisation.add_day_of_year(var, 'time') + realization_coord = var.coord('realization') + lat_coord = var.coord('latitude') + lon_coord = var.coord('longitude') + results_over = iris.cube.CubeList() + results_below = iris.cube.CubeList() + for leadtime in leadtimes.keys(): + leadtime_slice = var.extract(iris.Constraint(leadtime=leadtime)) + timesteps = leadtime_slice.coord('time').shape[0] + percentiles_leadtime = percentiles.extract(iris.Constraint(leadtime=leadtime)) for percentile_slice in percentiles_leadtime.slices_over('percentile'): - over = leadtime_slice > percentile_slice - print(over) + percentile = percentile_slice.coord('percentile').points[0] + + days_over = np.sum(leadtime_slice.data > percentile_slice.data, 0) / float(timesteps) + result = iris.cube.Cube(days_over.astype(np.float32), var_name='daysover', units=1.0) + result.add_dim_coord(lat_coord, 1) + result.add_dim_coord(lon_coord, 2) + result.add_aux_coord(realization_coord, 0) + result.add_aux_coord(iris.coords.AuxCoord(percentile, long_name='percentile')) + result.add_aux_coord(iris.coords.AuxCoord(np.int8(leadtime), long_name='leadtime')) + results_over.append(result) + + days_below = np.sum(leadtime_slice.data < percentile_slice.data, 0) / float(timesteps) + result = iris.cube.Cube(days_below.astype(np.float32), var_name='daysbelow', units=1.0) + result.add_dim_coord(lat_coord, 1) + result.add_dim_coord(lon_coord, 2) + result.add_aux_coord(realization_coord, 0) + result.add_aux_coord(iris.coords.AuxCoord(percentile, long_name='percentile')) + result.add_aux_coord(iris.coords.AuxCoord(np.int8(leadtime), long_name='leadtime')) + results_below.append(result) + + iris.FUTURE.netcdf_no_unlimited = True + temp = TempFile.get() + iris.save(results_over.merge_cube(), temp, zlib=True) + Utils.rename_variable(temp, 'dim2', 'ensemble', must_exist=False, rename_dimension=True) + self.days_over_file.set_local_file(temp, rename_var='daysover') + + temp = TempFile.get() + iris.save(results_below.merge_cube(), temp, zlib=True) + Utils.rename_variable(temp, 'dim2', 'ensemble', must_exist=False, rename_dimension=True) + self.days_below_file.set_local_file(temp, rename_var='daysbelow') -- GitLab From 39001ab9ca3a5c9a3b163dd78e4d38adb2a2fc0f Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 17 Jul 2017 10:37:24 +0200 Subject: [PATCH 49/82] Merged --- earthdiagnostics/cmormanager.py | 2 +- earthdiagnostics/config.py | 3 +- earthdiagnostics/general/select_levels.py | 4 +-- .../general/verticalmeanmetersiris.py | 14 ++++---- earthdiagnostics/obsreconmanager.py | 11 ++----- earthdiagnostics/ocean/interpolate.py | 2 +- earthdiagnostics/ocean/rotation.py | 3 -- earthdiagnostics/ocean/siasiesiv.py | 1 - earthdiagnostics/publisher.py | 2 +- .../statistics/climatologicalpercentile.py | 33 ++++++++++--------- .../statistics/daysoverpercentile.py | 2 +- earthdiagnostics/work_manager.py | 2 +- setup.py | 2 +- 13 files changed, 36 insertions(+), 45 deletions(-) diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index c66740d0..c2d30f2f 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -73,7 +73,7 @@ class CMORManager(DataManager): """ Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy - :param **kwargs: + :param vartype: :param domain: CMOR domain :type domain: Domain :param var: variable name diff --git a/earthdiagnostics/config.py b/earthdiagnostics/config.py index 139b54ee..cfc8b690 100644 --- a/earthdiagnostics/config.py +++ b/earthdiagnostics/config.py @@ -25,7 +25,8 @@ class Config(object): parser.read(path) # Read diags config - self.data_adaptor = parser.get_choice_option('DIAGNOSTICS', 'DATA_ADAPTOR', ('CMOR', 'THREDDS', 'OBSRECON'), 'CMOR') + self.data_adaptor = parser.get_choice_option('DIAGNOSTICS', 'DATA_ADAPTOR', ('CMOR', 'THREDDS', 'OBSRECON'), + 'CMOR') "Scratch folder path" self.scratch_dir = parser.get_path_option('DIAGNOSTICS', 'SCRATCH_DIR') "Scratch folder path" diff --git a/earthdiagnostics/general/select_levels.py b/earthdiagnostics/general/select_levels.py index 3ee5b2b2..39a01ca6 100644 --- a/earthdiagnostics/general/select_levels.py +++ b/earthdiagnostics/general/select_levels.py @@ -2,7 +2,7 @@ from earthdiagnostics.diagnostic import Diagnostic, DiagnosticOption, DiagnosticDomainOption, \ DiagnosticVariableListOption, DiagnosticIntOption from earthdiagnostics.modelingrealm import ModelingRealm -from earthdiagnostics.utils import Utils +from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.box import Box @@ -48,7 +48,7 @@ class SelectLevels(Diagnostic): return 'Select levels Startdate: {0} Member: {1} Chunk: {2} ' \ 'Variable: {3}:{4} Levels: {6}-{7} ' \ 'Grid: {5}'.format(self.startdate, self.member, self.chunk, self.domain, self.variable, - self.grid, self.box.min_depth, self.box.max_depth) + self.grid, self.box.min_depth, self.box.max_depth) def __eq__(self, other): return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ diff --git a/earthdiagnostics/general/verticalmeanmetersiris.py b/earthdiagnostics/general/verticalmeanmetersiris.py index 70c14334..0d0633cc 100644 --- a/earthdiagnostics/general/verticalmeanmetersiris.py +++ b/earthdiagnostics/general/verticalmeanmetersiris.py @@ -5,8 +5,8 @@ import iris.exceptions from earthdiagnostics.box import Box from earthdiagnostics.diagnostic import Diagnostic, DiagnosticFloatOption, DiagnosticDomainOption, \ - DiagnosticVariableOption, DiagnosticChoiceOption -from earthdiagnostics.utils import Utils, TempFile + DiagnosticVariableOption +from earthdiagnostics.utils import TempFile from earthdiagnostics.modelingrealm import ModelingRealms @@ -84,7 +84,7 @@ class VerticalMeanMetersIris(Diagnostic): job_list = list() for startdate, member, chunk in diags.config.experiment.get_chunk_list(): job_list.append(VerticalMeanMetersIris(diags.data_manager, startdate, member, chunk, - options['domain'], options['variable'], box, options['grid_point'])) + options['domain'], options['variable'], box, options['grid_point'])) return job_list def request_data(self): @@ -108,9 +108,9 @@ class VerticalMeanMetersIris(Diagnostic): coord = None for coord_name in lev_names: try: - coord = var_cube.coord(coord_name) + coord = var_cube.coord(coord_name) except iris.exceptions.CoordinateNotFoundError: - pass + pass if self.box.min_depth is None: lev_min = coord.points[0] @@ -121,8 +121,8 @@ class VerticalMeanMetersIris(Diagnostic): lev_max = coord.points[-1] else: lev_max = self.box.max_depth - var_cube = var_cube.extract(iris.Constraint(coord_values={coord.var_name: - lambda cell: lev_min <= cell <= lev_max})) + var_cube = var_cube.extract(iris.Constraint(coord_values= + {coord.var_name: lambda cell: lev_min <= cell <= lev_max})) var_cube = var_cube.collapsed(coord, iris.analysis.MEAN) temp = TempFile.get() iris.save(var_cube, temp, zlib=True) diff --git a/earthdiagnostics/obsreconmanager.py b/earthdiagnostics/obsreconmanager.py index 6e729e69..5c269dbc 100644 --- a/earthdiagnostics/obsreconmanager.py +++ b/earthdiagnostics/obsreconmanager.py @@ -1,15 +1,8 @@ # coding=utf-8 import os -from bscearth.utils.date import parse_date, add_months, chunk_start_date, chunk_end_date - -from earthdiagnostics.datafile import NetCDFFile, StorageStatus, LocalStatus from earthdiagnostics.datamanager import DataManager -from earthdiagnostics.utils import TempFile, Utils -from datetime import datetime - from earthdiagnostics.variable_type import VariableType -from frequency import Frequencies class ObsReconManager(DataManager): @@ -33,11 +26,11 @@ class ObsReconManager(DataManager): raise Exception('For obs and recon data chunk_size must be always 1') # noinspection PyUnusedLocal - def request_leadtimes(self, domain, variable, startdate, member, leadtimes, frequency=None, vartype=VariableType.MEAN): + def request_leadtimes(self, domain, variable, startdate, member, leadtimes, frequency=None, + vartype=VariableType.MEAN): filepath = self.get_file_path(startdate, domain, variable, frequency, vartype) return self._get_file_from_storage(filepath) - # noinspection PyUnusedLocal def file_exists(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=VariableType.MEAN): diff --git a/earthdiagnostics/ocean/interpolate.py b/earthdiagnostics/ocean/interpolate.py index 91b4a1e5..2fe03587 100644 --- a/earthdiagnostics/ocean/interpolate.py +++ b/earthdiagnostics/ocean/interpolate.py @@ -84,7 +84,7 @@ class Interpolate(Diagnostic): :return: """ options_available = (DiagnosticOption('target_grid'), - DiagnosticVariableListOption(), + DiagnosticVariableListOption('variable'), DiagnosticDomainOption(default_value=ModelingRealms.ocean), DiagnosticBoolOption('invert_lat', False), DiagnosticOption('original_grid', '')) diff --git a/earthdiagnostics/ocean/rotation.py b/earthdiagnostics/ocean/rotation.py index c7712fd3..54b794ce 100644 --- a/earthdiagnostics/ocean/rotation.py +++ b/earthdiagnostics/ocean/rotation.py @@ -1,7 +1,6 @@ # coding=utf-8 import shutil from bscearth.utils.log import Log -import os from earthdiagnostics.diagnostic import Diagnostic, DiagnosticOption, DiagnosticDomainOption, DiagnosticVariableOption from earthdiagnostics.utils import Utils, TempFile @@ -118,8 +117,6 @@ class Rotation(Diagnostic): vfile_handler.close() self.vrotated_file.set_local_file(urotated) - - def _merge_levels(self, var, direction): temp = TempFile.get() if self.has_levels: diff --git a/earthdiagnostics/ocean/siasiesiv.py b/earthdiagnostics/ocean/siasiesiv.py index b665701e..d16295a4 100644 --- a/earthdiagnostics/ocean/siasiesiv.py +++ b/earthdiagnostics/ocean/siasiesiv.py @@ -12,7 +12,6 @@ import numpy as np from earthdiagnostics.modelingrealm import ModelingRealms from earthdiagnostics.constants import Basins -from earthdiagnostics.variable import VariableManager class Siasiesiv(Diagnostic): diff --git a/earthdiagnostics/publisher.py b/earthdiagnostics/publisher.py index d2b6ecc0..6fc9e4c8 100644 --- a/earthdiagnostics/publisher.py +++ b/earthdiagnostics/publisher.py @@ -13,7 +13,7 @@ class Publisher(object): :param who: subscriber to add :type who: object :param callback: method to execute when publisher updates - :type callback: callable + :type callback: callable | NoneType """ if callback is None: callback = getattr(who, 'update') diff --git a/earthdiagnostics/statistics/climatologicalpercentile.py b/earthdiagnostics/statistics/climatologicalpercentile.py index c823a95b..827d2970 100644 --- a/earthdiagnostics/statistics/climatologicalpercentile.py +++ b/earthdiagnostics/statistics/climatologicalpercentile.py @@ -1,8 +1,10 @@ # coding=utf-8 +import six from bscearth.utils.date import parse_date, add_months from bscearth.utils.log import Log -from earthdiagnostics.diagnostic import Diagnostic, DiagnosticVariableOption, DiagnosticDomainOption, DiagnosticIntOption +from earthdiagnostics.diagnostic import Diagnostic, DiagnosticVariableOption, DiagnosticDomainOption, \ + DiagnosticIntOption from earthdiagnostics.frequency import Frequencies from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.variable_type import VariableType @@ -137,20 +139,18 @@ class ClimatologicalPercentile(Diagnostic): leadtimes = {1: PartialDateTime(lead_date.year, lead_date.month, lead_date.day)} def assign_leadtime(coord, x): - try: - leadtime_month = 1 - partial_date = leadtimes[leadtime_month] - while coord.units.num2date(x) >= partial_date: - leadtime_month += 1 - try: - partial_date = leadtimes[leadtime_month] - except KeyError: - new_date = add_months(date, leadtime_month, self.data_manager.config.experiment.calendar) - partial_date = PartialDateTime(new_date.year, new_date.month, new_date.day) - leadtimes[leadtime_month] = partial_date - return leadtime_month - except Exception: - return -1 + leadtime_month = 1 + partial_date = leadtimes[leadtime_month] + while coord.units.num2date(x) >= partial_date: + leadtime_month += 1 + try: + partial_date = leadtimes[leadtime_month] + except KeyError: + new_date = add_months(date, leadtime_month, self.data_manager.config.experiment.calendar) + partial_date = PartialDateTime(new_date.year, new_date.month, new_date.day) + leadtimes[leadtime_month] = partial_date + return leadtime_month + iris.coord_categorisation.add_categorised_coord(data_cube, 'leadtime', 'time', assign_leadtime) return data_cube @@ -174,13 +174,14 @@ class ClimatologicalPercentile(Diagnostic): def _calculate_percentiles(self, distribution): Log.debug('Calculating percentiles') percentiles = {} + def calculate(point_distribution): cs = np.cumsum(point_distribution) total = cs[-1] percentile_values = self.percentiles * total index = np.searchsorted(cs, percentile_values) return [(self._bins[i + 1] + self._bins[i]) / 2 for i in index] - for leadtime, dist in distribution.iteritems(): + for leadtime, dist in six.iteritems(distribution): Log.debug('Calculating leadtime {0}', leadtime) percentiles[leadtime] = np.apply_along_axis(calculate, 0, dist) return percentiles diff --git a/earthdiagnostics/statistics/daysoverpercentile.py b/earthdiagnostics/statistics/daysoverpercentile.py index 26318a48..7242316a 100644 --- a/earthdiagnostics/statistics/daysoverpercentile.py +++ b/earthdiagnostics/statistics/daysoverpercentile.py @@ -121,7 +121,7 @@ class DaysOverPercentile(Diagnostic): partial_date = PartialDateTime(new_date.year, new_date.month, new_date.day) leadtimes[leadtime_month] = partial_date return leadtime_month - except Exception as ex: + except Exception: pass iris.coord_categorisation.add_categorised_coord(var, 'leadtime', 'time', assign_leadtime) iris.coord_categorisation.add_year(var, 'time') diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index 6f118d5c..450cc9e5 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -152,10 +152,10 @@ class WorkManager(object): @staticmethod def _run_job(job): + time = datetime.datetime.now() try: Log.info('Starting {0}', job) job.status = DiagnosticStatus.RUNNING - time = datetime.datetime.now() job.compute() except Exception as ex: job.consumed_time = datetime.datetime.now() - time diff --git a/setup.py b/setup.py index 4d30a399..a7558830 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ setup( keywords=['climate', 'weather', 'diagnostic'], setup_requires=['pyproj'], install_requires=['numpy', 'netCDF4', 'bscearth.utils', 'cdo', 'nco>=0.0.3', 'iris>=1.12.0', 'coverage', - 'pygrib', 'openpyxl', 'mock', 'futures', 'cf_units'], + 'pygrib', 'openpyxl', 'mock', 'futures', 'cf_units', 'cfunits'], packages=find_packages(), include_package_data=True, scripts=['bin/earthdiags'] -- GitLab From 360c82699a78559a5da2b001ef3a444a666e8dee Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 27 Jul 2017 15:40:44 +0200 Subject: [PATCH 50/82] Days over ready --- diags.conf | 3 +- .../statistics/climatologicalpercentile.py | 7 +- .../statistics/daysoverpercentile.py | 73 ++++++++++++------- earthdiagnostics/variable_alias/cmip6.csv | 8 ++ earthdiagnostics/variable_alias/default.csv | 9 +-- earthdiagnostics/variable_alias/primavera.csv | 7 ++ launch_diags.sh | 14 ++-- 7 files changed, 76 insertions(+), 45 deletions(-) diff --git a/diags.conf b/diags.conf index cf1c04ed..166e59bb 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,8 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = climpercent,atmos,sfcWind,2000,2000,1 daysover,atmos,sfcWind,2000,2000,1 +# DIAGS = climpercent,atmos,sfcWind,2000,2000,1 +DIAGS = daysover,atmos,sfcWind,2000,2000,1 # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. diff --git a/earthdiagnostics/statistics/climatologicalpercentile.py b/earthdiagnostics/statistics/climatologicalpercentile.py index 827d2970..7efe03c3 100644 --- a/earthdiagnostics/statistics/climatologicalpercentile.py +++ b/earthdiagnostics/statistics/climatologicalpercentile.py @@ -31,6 +31,8 @@ class ClimatologicalPercentile(Diagnostic): alias = 'climpercent' "Diagnostic alias for the configuration file" + Percentiles = np.array([0.1, 0.25, 0.33, 0.5, 0.66, 0.75, 0.9]) + def __init__(self, data_manager, domain, variable, num_bins, start_year, end_year, forecast_month, experiment_config): Diagnostic.__init__(self, data_manager) @@ -46,7 +48,6 @@ class ClimatologicalPercentile(Diagnostic): self.start_year = start_year self.end_year = end_year self.forecast_month = forecast_month - self.percentiles = np.array([0.1, 0.25, 0.33, 0.5, 0.66, 0.75, 0.9]) self.cmor_var = data_manager.variable_list.get_variable(variable, silent=True) if self.cmor_var and self.cmor_var.valid_max and self.cmor_var.valid_min: self.max_value = float(self.cmor_var.valid_max) @@ -157,7 +158,7 @@ class ClimatologicalPercentile(Diagnostic): def _save_results(self, percentile_values): temp = TempFile.get() - percentile_coord = iris.coords.DimCoord(self.percentiles, long_name='percentile') + percentile_coord = iris.coords.DimCoord(ClimatologicalPercentile.Percentiles, long_name='percentile') results = iris.cube.CubeList() for leadtime in percentile_values.keys(): result = iris.cube.Cube(percentile_values[leadtime], var_name='percent', units=self.units) @@ -178,7 +179,7 @@ class ClimatologicalPercentile(Diagnostic): def calculate(point_distribution): cs = np.cumsum(point_distribution) total = cs[-1] - percentile_values = self.percentiles * total + percentile_values = ClimatologicalPercentile.Percentiles * total index = np.searchsorted(cs, percentile_values) return [(self._bins[i + 1] + self._bins[i]) / 2 for i in index] for leadtime, dist in six.iteritems(distribution): diff --git a/earthdiagnostics/statistics/daysoverpercentile.py b/earthdiagnostics/statistics/daysoverpercentile.py index 7242316a..c6fd35d2 100644 --- a/earthdiagnostics/statistics/daysoverpercentile.py +++ b/earthdiagnostics/statistics/daysoverpercentile.py @@ -1,6 +1,7 @@ # coding=utf-8 from bscearth.utils.date import parse_date, add_months +from earthdiagnostics.statistics.climatologicalpercentile import ClimatologicalPercentile from earthdiagnostics.diagnostic import * from earthdiagnostics.frequency import Frequencies import iris @@ -84,13 +85,18 @@ class DaysOverPercentile(Diagnostic): self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk) def declare_data_generated(self): - var_over = self.variable + '_daysover' - var_below = self.variable + '_daysbelow' - self.days_over_file = self.declare_chunk(self.domain, var_over, None, None, None, - frequency=Frequencies.monthly, vartype=VariableType.STATISTIC) - - self.days_below_file = self.declare_chunk(self.domain, var_below, None, None, None, - frequency=Frequencies.monthly, vartype=VariableType.STATISTIC) + var_over = self.variable + '_daysover_q{0}' + var_below = self.variable + '_daysbelow_q{0}' + self.days_over_file = {} + self.days_below_file = {} + for perc in ClimatologicalPercentile.Percentiles: + self.days_over_file[perc] = self.declare_chunk(self.domain, var_over.format(int(perc * 100)), None, None, + None, frequency=Frequencies.monthly, + vartype=VariableType.STATISTIC) + + self.days_below_file[perc] = self.declare_chunk(self.domain, var_below.format(int(perc * 100)), None, None, + None, frequency=Frequencies.monthly, + vartype=VariableType.STATISTIC) def compute(self): """ @@ -129,43 +135,56 @@ class DaysOverPercentile(Diagnostic): realization_coord = var.coord('realization') lat_coord = var.coord('latitude') lon_coord = var.coord('longitude') - results_over = iris.cube.CubeList() - results_below = iris.cube.CubeList() + results_over = {perc: iris.cube.CubeList() for perc in ClimatologicalPercentile.Percentiles} + results_below = {perc: iris.cube.CubeList() for perc in ClimatologicalPercentile.Percentiles} + for leadtime in leadtimes.keys(): leadtime_slice = var.extract(iris.Constraint(leadtime=leadtime)) - timesteps = leadtime_slice.coord('time').shape[0] + percentiles_leadtime = percentiles.extract(iris.Constraint(leadtime=leadtime)) + time_coord = iris.coords.AuxCoord.from_coord(leadtime_slice.coord('time')) + first_time = time_coord.points[0] + last_time = time_coord.points[-1] + timesteps = leadtime_slice.coord('time').shape[0] + days = time_coord.units.num2date(last_time) - time_coord.units.num2date(first_time) + if days.seconds > 0: + days = days.days + 1 + else: + days = days.days + timesteps_per_day = timesteps / days + time_coord = time_coord.copy(first_time + (last_time - first_time) / 2, (first_time, last_time)) for percentile_slice in percentiles_leadtime.slices_over('percentile'): percentile = percentile_slice.coord('percentile').points[0] - days_over = np.sum(leadtime_slice.data > percentile_slice.data, 0) / float(timesteps) + days_over = np.sum(leadtime_slice.data > percentile_slice.data, 0) / float(timesteps_per_day) result = iris.cube.Cube(days_over.astype(np.float32), var_name='daysover', units=1.0) result.add_dim_coord(lat_coord, 1) result.add_dim_coord(lon_coord, 2) result.add_aux_coord(realization_coord, 0) result.add_aux_coord(iris.coords.AuxCoord(percentile, long_name='percentile')) - result.add_aux_coord(iris.coords.AuxCoord(np.int8(leadtime), long_name='leadtime')) - results_over.append(result) + result.add_aux_coord(time_coord) + results_over[percentile].append(result) - days_below = np.sum(leadtime_slice.data < percentile_slice.data, 0) / float(timesteps) + days_below = np.sum(leadtime_slice.data < percentile_slice.data, 0) / float(timesteps_per_day) result = iris.cube.Cube(days_below.astype(np.float32), var_name='daysbelow', units=1.0) result.add_dim_coord(lat_coord, 1) result.add_dim_coord(lon_coord, 2) result.add_aux_coord(realization_coord, 0) result.add_aux_coord(iris.coords.AuxCoord(percentile, long_name='percentile')) - result.add_aux_coord(iris.coords.AuxCoord(np.int8(leadtime), long_name='leadtime')) - results_below.append(result) - - iris.FUTURE.netcdf_no_unlimited = True - temp = TempFile.get() - iris.save(results_over.merge_cube(), temp, zlib=True) - Utils.rename_variable(temp, 'dim2', 'ensemble', must_exist=False, rename_dimension=True) - self.days_over_file.set_local_file(temp, rename_var='daysover') - - temp = TempFile.get() - iris.save(results_below.merge_cube(), temp, zlib=True) - Utils.rename_variable(temp, 'dim2', 'ensemble', must_exist=False, rename_dimension=True) - self.days_below_file.set_local_file(temp, rename_var='daysbelow') + result.add_aux_coord(time_coord) + results_below[percentile].append(result) + + for perc in ClimatologicalPercentile.Percentiles: + iris.FUTURE.netcdf_no_unlimited = True + temp = TempFile.get() + iris.save(results_over[perc].merge_cube(), temp, zlib=True) + Utils.rename_variable(temp, 'dim2', 'ensemble', must_exist=False, rename_dimension=True) + self.days_over_file[perc].set_local_file(temp, rename_var='daysover') + + temp = TempFile.get() + iris.save(results_below[perc].merge_cube(), temp, zlib=True) + Utils.rename_variable(temp, 'dim2', 'ensemble', must_exist=False, rename_dimension=True) + self.days_below_file[perc].set_local_file(temp, rename_var='daysbelow') diff --git a/earthdiagnostics/variable_alias/cmip6.csv b/earthdiagnostics/variable_alias/cmip6.csv index 63708fcb..e6c8a679 100644 --- a/earthdiagnostics/variable_alias/cmip6.csv +++ b/earthdiagnostics/variable_alias/cmip6.csv @@ -71,3 +71,11 @@ mun,mun,, mud,mud,, ppnewn,ppnewn,, ppnewd,ppnewd,, +alb_ice,sialb,, +qsr3d,rsdo,, +hflx_rnf_cea,hfrunoffds2d,, +hflx_rain_cea,hfrainds,, +hflx_cal_cea,hfibthermds2d,, +rain,prra,, +calving,ficeberg2d,, + diff --git a/earthdiagnostics/variable_alias/default.csv b/earthdiagnostics/variable_alias/default.csv index 274b0b75..1baafce7 100644 --- a/earthdiagnostics/variable_alias/default.csv +++ b/earthdiagnostics/variable_alias/default.csv @@ -298,12 +298,5 @@ qtr_ice,qtr,, var78,tclw,, var79,tciw,, rho,rhopoto,, -alb_ice,sialb,, qsr,rsntds,, -qsr3d,rsdo,, -hflx_rnf_cea,hfrunoffds2d,, -hflx_rain_cea,hfrainds,, -hflx_cal_cea,hfibthermds2d,, -rain,prra,, -runoffs,friver,, -calving,ficeberg2d,, \ No newline at end of file +runoffs,friver,, \ No newline at end of file diff --git a/earthdiagnostics/variable_alias/primavera.csv b/earthdiagnostics/variable_alias/primavera.csv index cc704fb7..db38cd5c 100644 --- a/earthdiagnostics/variable_alias/primavera.csv +++ b/earthdiagnostics/variable_alias/primavera.csv @@ -1,2 +1,9 @@ Aliases,Shortname,Basin,Grid iiceconc:siconc:soicecov:ileadfra:sic,siconc,, +alb_ice,sialb,, +qsr3d,rsdo,, +hflx_rnf_cea,hfrunoffds2d,, +hflx_rain_cea,hfrainds,, +hflx_cal_cea,hfibthermds2d,, +rain,prra,, +calving,ficeberg2d,, diff --git a/launch_diags.sh b/launch_diags.sh index bbce66f3..74f00dc3 100755 --- a/launch_diags.sh +++ b/launch_diags.sh @@ -6,18 +6,20 @@ #SBATCH --error=job.%J.err #SBATCH --output=job.%J.out -set -xv -PATH_TO_CONF_FILE=~/earthdiagnostics/diags.conf -PATH_TO_DIAGNOSTICS=~/earthdiagnostics -PATH_TO_VIRTUALENV=~jvegas/virtualenvs/diags/bin + +PATH_TO_CONF_FILE=~jvegas/earthdiagnostics/diags.conf +PATH_TO_DIAGNOSTICS=~jvegas/earthdiagnostics +PATH_TO_CONDAENV=~jvegas/anaconda/venvs/earthdiags/bin module purge module load NCO/4.5.4-foss-2015a module load CDO/1.7.2-foss-2015a -module load CDFTOOLS/3.0a5-foss-2015a +module load CDFTOOLS/3.0a8-foss-2015a + +set -xv -source ${PATH_TO_VIRTUALENV}/activate +source ${PATH_TO_CONDAENV}/activate earthdiags export PYTHONPATH=${PATH_TO_DIAGNOSTICS}:${PYTHONPATH} cd ${PATH_TO_DIAGNOSTICS}/earthdiagnostics/ -- GitLab From 3baa4b4c9a88faec793bb6cb41e49f14d6b61a48 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Fri, 28 Jul 2017 16:00:31 +0200 Subject: [PATCH 51/82] Days over for erainterim ready --- diags.conf | 9 ++- .../statistics/climatologicalpercentile.py | 18 ++--- .../statistics/daysoverpercentile.py | 72 ++++++++++++------- launch_diags.sh | 2 +- 4 files changed, 60 insertions(+), 41 deletions(-) diff --git a/diags.conf b/diags.conf index 166e59bb..703765e3 100644 --- a/diags.conf +++ b/diags.conf @@ -6,7 +6,7 @@ SCRATCH_DIR = /scratch/Earth/$USER # Root path for the cmorized data to use DATA_DIR = /esnas:/esarchive # Specify if your data is from an experiment (exp), observation (obs) or reconstructions (recon) -DATA_TYPE = exp +DATA_TYPE = recon # CMORization type to use. Important also for THREDDS as it affects variable name conventions. # Options: SPECS (default), PRIMAVERA, CMIP6 DATA_CONVENTION = SPECS @@ -16,8 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -# DIAGS = climpercent,atmos,sfcWind,2000,2000,1 -DIAGS = daysover,atmos,sfcWind,2000,2000,1 +DIAGS = climpercent,atmos,sfcWind,2000,2000,1-2 daysover,atmos,sfcWind,2000,2000,1-2 # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. @@ -71,7 +70,7 @@ SERVER_URL = https://earth.bsc.es/thredds [EXPERIMENT] # Experiments parameters as defined in CMOR standard INSTITUTE = ecmwf -MODEL = system4_m1 +MODEL = erainterim # Model version: Available versions MODEL_VERSION = Ec3.2_O1L75 # Atmospheric output timestep in hours @@ -87,7 +86,7 @@ OCEAN_TIMESTEP = 6 # if 2, fc00 # CHUNK_SIZE is the size of each data file, given in months # CHUNKS is the number of chunks. You can specify less chunks than present on the experiment -EXPID = testing_recon +EXPID = testing_erainterim STARTDATES = 20000101 MEMBERS = 0 MEMBER_DIGITS = 1 diff --git a/earthdiagnostics/statistics/climatologicalpercentile.py b/earthdiagnostics/statistics/climatologicalpercentile.py index 7efe03c3..7f51baa4 100644 --- a/earthdiagnostics/statistics/climatologicalpercentile.py +++ b/earthdiagnostics/statistics/climatologicalpercentile.py @@ -4,7 +4,7 @@ from bscearth.utils.date import parse_date, add_months from bscearth.utils.log import Log from earthdiagnostics.diagnostic import Diagnostic, DiagnosticVariableOption, DiagnosticDomainOption, \ - DiagnosticIntOption + DiagnosticIntOption, DiagnosticListIntOption from earthdiagnostics.frequency import Frequencies from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.variable_type import VariableType @@ -80,16 +80,16 @@ class ClimatologicalPercentile(Diagnostic): DiagnosticVariableOption(), DiagnosticIntOption('start_year'), DiagnosticIntOption('end_year'), - DiagnosticIntOption('forecast_month'), + DiagnosticListIntOption('forecast_month'), DiagnosticIntOption('bins', 2000), ) options = cls.process_options(options, options_available) job_list = list() - job_list.append(ClimatologicalPercentile(diags.data_manager, options['domain'], options['variable'], - options['bins'], options['start_year'], options['end_year'], - options['forecast_month'], - diags.config.experiment)) + for forecast_month in options['forecast_month']: + job_list.append(ClimatologicalPercentile(diags.data_manager, options['domain'], options['variable'], + options['bins'], options['start_year'], options['end_year'], + forecast_month, diags.config.experiment)) return job_list def requested_startdates(self): @@ -152,7 +152,6 @@ class ClimatologicalPercentile(Diagnostic): leadtimes[leadtime_month] = partial_date return leadtime_month - iris.coord_categorisation.add_categorised_coord(data_cube, 'leadtime', 'time', assign_leadtime) return data_cube @@ -197,12 +196,15 @@ class ClimatologicalPercentile(Diagnostic): for leadtime in set(data_cube.coord('leadtime').points): Log.debug('Discretizing leadtime {0}', leadtime) leadtime_cube = data_cube.extract(iris.Constraint(leadtime=leadtime)) + for realization in range(self.realizations): Log.debug('Discretizing realization {0}', realization) try: realization_cube = leadtime_cube.extract(iris.Constraint(realization=realization+1)) except iris.exceptions.CoordinateNotFoundError: - realization_cube = data_cube + realization_cube = leadtime_cube + if realization_cube is None and realization == 0: + realization_cube = leadtime_cube if leadtime not in distribution: distribution[leadtime] = self._calculate_distribution(realization_cube) else: diff --git a/earthdiagnostics/statistics/daysoverpercentile.py b/earthdiagnostics/statistics/daysoverpercentile.py index c6fd35d2..dd417c2d 100644 --- a/earthdiagnostics/statistics/daysoverpercentile.py +++ b/earthdiagnostics/statistics/daysoverpercentile.py @@ -5,6 +5,7 @@ from earthdiagnostics.statistics.climatologicalpercentile import ClimatologicalP from earthdiagnostics.diagnostic import * from earthdiagnostics.frequency import Frequencies import iris +import iris.exceptions import iris.coord_categorisation from iris.time import PartialDateTime import iris.analysis @@ -33,24 +34,22 @@ class DaysOverPercentile(Diagnostic): alias = 'daysover' "Diagnostic alias for the configuration file" - def __init__(self, data_manager, startdate, member, chunk, domain, variable, start_year, end_year, forecast_month): + def __init__(self, data_manager, domain, variable, start_year, end_year, year_to_compute, forecast_month): Diagnostic.__init__(self, data_manager) - self.startdate = startdate - self.member = member - self.chunk = chunk self.variable = variable self.domain = domain self.start_year = start_year self.end_year = end_year + self.year_to_compute = year_to_compute self.forecast_month = forecast_month + self.startdate = '{0}{1:02}01'.format(self.start_year, self.forecast_month) def __eq__(self, other): - return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ - self.domain == other.domain and self.variable == other.variable + return self.startdate == other.startdate and self.domain == other.domain and self.variable == other.variable def __str__(self): - return 'Days over percentile Startdate: {0} Member: {1} Chunk: {2} ' \ - 'Variable: {3}:{4}'.format(self.startdate, self.member, self.chunk, self.domain, self.variable) + return 'Days over percentile Startdate: {0} ' \ + 'Variable: {1}:{2}'.format(self.startdate, self.domain, self.variable) @classmethod def generate_jobs(cls, diags, options): @@ -67,14 +66,17 @@ class DaysOverPercentile(Diagnostic): DiagnosticOption('variable'), DiagnosticIntOption('start_year'), DiagnosticIntOption('end_year'), - DiagnosticIntOption('forecast_month'),) + DiagnosticListIntOption('forecast_month'),) options = cls.process_options(options, options_available) job_list = list() - for startdate, member, chunk in diags.config.experiment.get_chunk_list(): - job_list.append(DaysOverPercentile(diags.data_manager, startdate, member, chunk, - options['domain'], options['variable'], - options['start_year'], options['end_year'], options['forecast_month'],)) + year = options['start_year'] + while year <= options['end_year']: + for forecast_month in options['forecast_month']: + job_list.append(DaysOverPercentile(diags.data_manager, options['domain'], options['variable'], + options['start_year'], options['end_year'], + year, forecast_month)) + year += 1 return job_list def request_data(self): @@ -82,7 +84,7 @@ class DaysOverPercentile(Diagnostic): self.percentiles_file = self.request_chunk(self.domain, var_name, None, None, None, frequency=Frequencies.climatology) - self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, self.member, self.chunk) + self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, None, None) def declare_data_generated(self): var_over = self.variable + '_daysover_q{0}' @@ -90,11 +92,11 @@ class DaysOverPercentile(Diagnostic): self.days_over_file = {} self.days_below_file = {} for perc in ClimatologicalPercentile.Percentiles: - self.days_over_file[perc] = self.declare_chunk(self.domain, var_over.format(int(perc * 100)), None, None, + self.days_over_file[perc] = self.declare_chunk(self.domain, var_over.format(int(perc * 100)), self.startdate, None, None, frequency=Frequencies.monthly, vartype=VariableType.STATISTIC) - self.days_below_file[perc] = self.declare_chunk(self.domain, var_below.format(int(perc * 100)), None, None, + self.days_below_file[perc] = self.declare_chunk(self.domain, var_below.format(int(perc * 100)), self.startdate, None, None, frequency=Frequencies.monthly, vartype=VariableType.STATISTIC) @@ -132,7 +134,10 @@ class DaysOverPercentile(Diagnostic): iris.coord_categorisation.add_categorised_coord(var, 'leadtime', 'time', assign_leadtime) iris.coord_categorisation.add_year(var, 'time') iris.coord_categorisation.add_day_of_year(var, 'time') - realization_coord = var.coord('realization') + try: + realization_coord = var.coord('realization') + except iris.exceptions.CoordinateNotFoundError: + realization_coord = None lat_coord = var.coord('latitude') lon_coord = var.coord('longitude') results_over = {perc: iris.cube.CubeList() for perc in ClimatologicalPercentile.Percentiles} @@ -140,8 +145,10 @@ class DaysOverPercentile(Diagnostic): for leadtime in leadtimes.keys(): leadtime_slice = var.extract(iris.Constraint(leadtime=leadtime)) - - percentiles_leadtime = percentiles.extract(iris.Constraint(leadtime=leadtime)) + if len(percentiles.coords('leadtime')) >0: + percentiles_leadtime = percentiles.extract(iris.Constraint(leadtime=leadtime)) + else: + percentiles_leadtime = percentiles time_coord = iris.coords.AuxCoord.from_coord(leadtime_slice.coord('time')) first_time = time_coord.points[0] last_time = time_coord.points[-1] @@ -158,18 +165,27 @@ class DaysOverPercentile(Diagnostic): days_over = np.sum(leadtime_slice.data > percentile_slice.data, 0) / float(timesteps_per_day) result = iris.cube.Cube(days_over.astype(np.float32), var_name='daysover', units=1.0) - result.add_dim_coord(lat_coord, 1) - result.add_dim_coord(lon_coord, 2) - result.add_aux_coord(realization_coord, 0) + if realization_coord is not None: + result.add_aux_coord(realization_coord, 0) + result.add_dim_coord(lat_coord, 1) + result.add_dim_coord(lon_coord, 2) + else: + result.add_dim_coord(lat_coord, 0) + result.add_dim_coord(lon_coord, 1) result.add_aux_coord(iris.coords.AuxCoord(percentile, long_name='percentile')) result.add_aux_coord(time_coord) results_over[percentile].append(result) days_below = np.sum(leadtime_slice.data < percentile_slice.data, 0) / float(timesteps_per_day) result = iris.cube.Cube(days_below.astype(np.float32), var_name='daysbelow', units=1.0) - result.add_dim_coord(lat_coord, 1) - result.add_dim_coord(lon_coord, 2) - result.add_aux_coord(realization_coord, 0) + + if realization_coord is not None: + result.add_aux_coord(realization_coord, 0) + result.add_dim_coord(lat_coord, 1) + result.add_dim_coord(lon_coord, 2) + else: + result.add_dim_coord(lat_coord, 0) + result.add_dim_coord(lon_coord, 1) result.add_aux_coord(iris.coords.AuxCoord(percentile, long_name='percentile')) result.add_aux_coord(time_coord) results_below[percentile].append(result) @@ -178,12 +194,14 @@ class DaysOverPercentile(Diagnostic): iris.FUTURE.netcdf_no_unlimited = True temp = TempFile.get() iris.save(results_over[perc].merge_cube(), temp, zlib=True) - Utils.rename_variable(temp, 'dim2', 'ensemble', must_exist=False, rename_dimension=True) + Utils.rename_variables(temp, {'dim2': 'ensemble', 'dim1': 'ensemble'}, + must_exist=False, rename_dimension=True) self.days_over_file[perc].set_local_file(temp, rename_var='daysover') temp = TempFile.get() iris.save(results_below[perc].merge_cube(), temp, zlib=True) - Utils.rename_variable(temp, 'dim2', 'ensemble', must_exist=False, rename_dimension=True) + Utils.rename_variables(temp, {'dim2': 'ensemble', 'dim1': 'ensemble'}, + must_exist=False, rename_dimension=True) self.days_below_file[perc].set_local_file(temp, rename_var='daysbelow') diff --git a/launch_diags.sh b/launch_diags.sh index 74f00dc3..9fef131e 100755 --- a/launch_diags.sh +++ b/launch_diags.sh @@ -10,7 +10,7 @@ PATH_TO_CONF_FILE=~jvegas/earthdiagnostics/diags.conf PATH_TO_DIAGNOSTICS=~jvegas/earthdiagnostics -PATH_TO_CONDAENV=~jvegas/anaconda/venvs/earthdiags/bin +PATH_TO_CONDAENV=~jvegas/anaconda/envs/earthdiags/bin module purge module load NCO/4.5.4-foss-2015a -- GitLab From 3c6eb55102665520955841f199153018b98c1eb7 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 21 Aug 2017 12:03:33 +0200 Subject: [PATCH 52/82] Richer metadata for days over and output now proportional --- diags.conf | 6 +- earthdiagnostics/obsreconmanager.py | 3 + .../statistics/daysoverpercentile.py | 61 +++++++++---------- launch_diags.sh | 4 +- 4 files changed, 38 insertions(+), 36 deletions(-) diff --git a/diags.conf b/diags.conf index 703765e3..b0c16c93 100644 --- a/diags.conf +++ b/diags.conf @@ -6,7 +6,7 @@ SCRATCH_DIR = /scratch/Earth/$USER # Root path for the cmorized data to use DATA_DIR = /esnas:/esarchive # Specify if your data is from an experiment (exp), observation (obs) or reconstructions (recon) -DATA_TYPE = recon +DATA_TYPE = exp # CMORization type to use. Important also for THREDDS as it affects variable name conventions. # Options: SPECS (default), PRIMAVERA, CMIP6 DATA_CONVENTION = SPECS @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = climpercent,atmos,sfcWind,2000,2000,1-2 daysover,atmos,sfcWind,2000,2000,1-2 +DIAGS = climpercent,atmos,sfcWind,2000,2000,1 daysover,atmos,sfcWind,2000,2000,1 # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. @@ -70,7 +70,7 @@ SERVER_URL = https://earth.bsc.es/thredds [EXPERIMENT] # Experiments parameters as defined in CMOR standard INSTITUTE = ecmwf -MODEL = erainterim +MODEL = system4_m1 # Model version: Available versions MODEL_VERSION = Ec3.2_O1L75 # Atmospheric output timestep in hours diff --git a/earthdiagnostics/obsreconmanager.py b/earthdiagnostics/obsreconmanager.py index 5c269dbc..749d0cf4 100644 --- a/earthdiagnostics/obsreconmanager.py +++ b/earthdiagnostics/obsreconmanager.py @@ -31,6 +31,9 @@ class ObsReconManager(DataManager): filepath = self.get_file_path(startdate, domain, variable, frequency, vartype) return self._get_file_from_storage(filepath) + def create_link(self, domain, filepath, frequency, var, grid, move_old, vartype): + pass + # noinspection PyUnusedLocal def file_exists(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=VariableType.MEAN): diff --git a/earthdiagnostics/statistics/daysoverpercentile.py b/earthdiagnostics/statistics/daysoverpercentile.py index dd417c2d..238592d3 100644 --- a/earthdiagnostics/statistics/daysoverpercentile.py +++ b/earthdiagnostics/statistics/daysoverpercentile.py @@ -42,14 +42,15 @@ class DaysOverPercentile(Diagnostic): self.end_year = end_year self.year_to_compute = year_to_compute self.forecast_month = forecast_month - self.startdate = '{0}{1:02}01'.format(self.start_year, self.forecast_month) + self.startdate = '{0}{1:02}01'.format(self.year_to_compute, self.forecast_month) def __eq__(self, other): return self.startdate == other.startdate and self.domain == other.domain and self.variable == other.variable def __str__(self): return 'Days over percentile Startdate: {0} ' \ - 'Variable: {1}:{2}'.format(self.startdate, self.domain, self.variable) + 'Variable: {1}:{2} Climatology: {3}-{4}'.format(self.startdate, self.domain, self.variable, + self.start_year, self.end_year) @classmethod def generate_jobs(cls, diags, options): @@ -143,6 +144,13 @@ class DaysOverPercentile(Diagnostic): results_over = {perc: iris.cube.CubeList() for perc in ClimatologicalPercentile.Percentiles} results_below = {perc: iris.cube.CubeList() for perc in ClimatologicalPercentile.Percentiles} + var_daysover = 'daysover' + var_days_below = 'daysbelow' + long_name_days_over = 'Proportion of days over a given percentile for {0.start_year}-{0.end_year} ' \ + 'climatology'.format(self) + long_name_days_below = 'Proportion of days below a given percentile for {0.start_year}-{0.end_year} ' \ + 'climatology'.format(self) + for leadtime in leadtimes.keys(): leadtime_slice = var.extract(iris.Constraint(leadtime=leadtime)) if len(percentiles.coords('leadtime')) >0: @@ -153,41 +161,18 @@ class DaysOverPercentile(Diagnostic): first_time = time_coord.points[0] last_time = time_coord.points[-1] timesteps = leadtime_slice.coord('time').shape[0] - days = time_coord.units.num2date(last_time) - time_coord.units.num2date(first_time) - if days.seconds > 0: - days = days.days + 1 - else: - days = days.days - timesteps_per_day = timesteps / days time_coord = time_coord.copy(first_time + (last_time - first_time) / 2, (first_time, last_time)) for percentile_slice in percentiles_leadtime.slices_over('percentile'): percentile = percentile_slice.coord('percentile').points[0] - days_over = np.sum(leadtime_slice.data > percentile_slice.data, 0) / float(timesteps_per_day) - result = iris.cube.Cube(days_over.astype(np.float32), var_name='daysover', units=1.0) - if realization_coord is not None: - result.add_aux_coord(realization_coord, 0) - result.add_dim_coord(lat_coord, 1) - result.add_dim_coord(lon_coord, 2) - else: - result.add_dim_coord(lat_coord, 0) - result.add_dim_coord(lon_coord, 1) - result.add_aux_coord(iris.coords.AuxCoord(percentile, long_name='percentile')) - result.add_aux_coord(time_coord) + days_over = np.sum(leadtime_slice.data > percentile_slice.data, 0) / float(timesteps) + result = self.create_results_cube(days_over, lat_coord, lon_coord, percentile, realization_coord, + time_coord, var_daysover, long_name_days_over) results_over[percentile].append(result) - days_below = np.sum(leadtime_slice.data < percentile_slice.data, 0) / float(timesteps_per_day) - result = iris.cube.Cube(days_below.astype(np.float32), var_name='daysbelow', units=1.0) - - if realization_coord is not None: - result.add_aux_coord(realization_coord, 0) - result.add_dim_coord(lat_coord, 1) - result.add_dim_coord(lon_coord, 2) - else: - result.add_dim_coord(lat_coord, 0) - result.add_dim_coord(lon_coord, 1) - result.add_aux_coord(iris.coords.AuxCoord(percentile, long_name='percentile')) - result.add_aux_coord(time_coord) + days_below = np.sum(leadtime_slice.data < percentile_slice.data, 0) / float(timesteps) + result = self.create_results_cube(days_below, lat_coord, lon_coord, percentile, realization_coord, + time_coord, var_days_below, long_name_days_below) results_below[percentile].append(result) for perc in ClimatologicalPercentile.Percentiles: @@ -204,6 +189,20 @@ class DaysOverPercentile(Diagnostic): must_exist=False, rename_dimension=True) self.days_below_file[perc].set_local_file(temp, rename_var='daysbelow') + def create_results_cube(self, days_over, lat_coord, lon_coord, percentile, realization_coord, time_coord, + var_name, long_name): + result = iris.cube.Cube(days_over.astype(np.float32), var_name=var_name, long_name=long_name, units=1.0) + if realization_coord is not None: + result.add_aux_coord(realization_coord, 0) + result.add_dim_coord(lat_coord, 1) + result.add_dim_coord(lon_coord, 2) + else: + result.add_dim_coord(lat_coord, 0) + result.add_dim_coord(lon_coord, 1) + result.add_aux_coord(iris.coords.AuxCoord(percentile, long_name='percentile')) + result.add_aux_coord(time_coord) + return result + diff --git a/launch_diags.sh b/launch_diags.sh index 9fef131e..3c8797f0 100755 --- a/launch_diags.sh +++ b/launch_diags.sh @@ -2,13 +2,13 @@ #SBATCH -n 1 #SBATCH -w gustafson -#SBATCH --time 72:00:00 +#SBATCH --time 2:00:00 #SBATCH --error=job.%J.err #SBATCH --output=job.%J.out -PATH_TO_CONF_FILE=~jvegas/earthdiagnostics/diags.conf +PATH_TO_CONF_FILE=~vtorralb/diags-erai.conf PATH_TO_DIAGNOSTICS=~jvegas/earthdiagnostics PATH_TO_CONDAENV=~jvegas/anaconda/envs/earthdiags/bin -- GitLab From 690e315cb63a045ac5fd785be6b4a0c779316aea Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Fri, 25 Aug 2017 13:17:26 +0200 Subject: [PATCH 53/82] Optimized hash routines. Added option to pass max and min values to climatological percentiles --- earthdiagnostics/datafile.py | 3 +- .../statistics/climatologicalpercentile.py | 60 +++++++++++++------ earthdiagnostics/utils.py | 51 ++++++++++++---- setup.py | 2 +- 4 files changed, 84 insertions(+), 32 deletions(-) diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index 3c2e2215..80484ace 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -143,7 +143,7 @@ class DataFile(Publisher): def upload(self): self.storage_status = StorageStatus.UPLOADING try: - Utils.copy_file(self.local_file, self.remote_file) + Utils.copy_file(self.local_file, self.remote_file, save_hash=True) except Exception as ex: Log.error('File {0} can not be uploaded: {1}', self.remote_file, ex) self.storage_status = StorageStatus.FAILED @@ -431,6 +431,7 @@ class NetCDFFile(DataFile): self.local_status = LocalStatus.DOWNLOADING if not self.local_file: self.local_file = TempFile.get() + Utils.get_file_hash(self.remote_file, use_stored=True, save=True) Utils.copy_file(self.remote_file, self.local_file) Log.info('File {0} ready!', self.remote_file) self.local_status = LocalStatus.READY diff --git a/earthdiagnostics/statistics/climatologicalpercentile.py b/earthdiagnostics/statistics/climatologicalpercentile.py index 7f51baa4..74687b8b 100644 --- a/earthdiagnostics/statistics/climatologicalpercentile.py +++ b/earthdiagnostics/statistics/climatologicalpercentile.py @@ -4,7 +4,7 @@ from bscearth.utils.date import parse_date, add_months from bscearth.utils.log import Log from earthdiagnostics.diagnostic import Diagnostic, DiagnosticVariableOption, DiagnosticDomainOption, \ - DiagnosticIntOption, DiagnosticListIntOption + DiagnosticIntOption, DiagnosticListIntOption, DiagnosticFloatOption from earthdiagnostics.frequency import Frequencies from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.variable_type import VariableType @@ -14,6 +14,8 @@ import iris.coord_categorisation from iris.time import PartialDateTime import iris.exceptions import iris.coords +import math + class ClimatologicalPercentile(Diagnostic): @@ -33,8 +35,8 @@ class ClimatologicalPercentile(Diagnostic): Percentiles = np.array([0.1, 0.25, 0.33, 0.5, 0.66, 0.75, 0.9]) - def __init__(self, data_manager, domain, variable, num_bins, start_year, end_year, forecast_month, - experiment_config): + def __init__(self, data_manager, domain, variable, num_bins, start_year, end_year, min_value, max_value, + forecast_month, experiment_config): Diagnostic.__init__(self, data_manager) self.variable = variable self.domain = domain @@ -49,14 +51,26 @@ class ClimatologicalPercentile(Diagnostic): self.end_year = end_year self.forecast_month = forecast_month self.cmor_var = data_manager.variable_list.get_variable(variable, silent=True) - if self.cmor_var and self.cmor_var.valid_max and self.cmor_var.valid_min: - self.max_value = float(self.cmor_var.valid_max) + + if not math.isnan(min_value): + self.min_value = min_value + self.check_min_value = False + elif self.cmor_var and self.cmor_var.valid_min: self.min_value = float(self.cmor_var.valid_min) - self.check_limit_values = False + self.check_min_value = False else: self.min_value = None + self.check_min_value = True + + if not math.isnan(max_value): + self.max_value = max_value + self.check_max_value = False + elif self.cmor_var and self.cmor_var.valid_min: + self.max_value = float(self.cmor_var.valid_max) + self.check_max_value = False + else: self.max_value = None - self.check_limit_values = True + self.check_max_value = True def __eq__(self, other): return self.domain == other.domain and self.variable == other.variable and self.num_bins == other.num_bins @@ -82,6 +96,8 @@ class ClimatologicalPercentile(Diagnostic): DiagnosticIntOption('end_year'), DiagnosticListIntOption('forecast_month'), DiagnosticIntOption('bins', 2000), + DiagnosticFloatOption('min_value', float('nan')), + DiagnosticFloatOption('max_value', float('nan')), ) options = cls.process_options(options, options_available) @@ -89,6 +105,7 @@ class ClimatologicalPercentile(Diagnostic): for forecast_month in options['forecast_month']: job_list.append(ClimatologicalPercentile(diags.data_manager, options['domain'], options['variable'], options['bins'], options['start_year'], options['end_year'], + options['min_value'], options['max_value'], forecast_month, diags.config.experiment)) return job_list @@ -122,7 +139,7 @@ class ClimatologicalPercentile(Diagnostic): self.units = data_cube.units self.lat_coord = data_cube.coord('latitude') self.lon_coord = data_cube.coord('longitude') - + Log.info('Range: [{0}, {1}]', self.min_value, self.max_value) distribution = self._get_distribution() percentile_values = self._calculate_percentiles(distribution) self._save_results(percentile_values) @@ -229,16 +246,25 @@ class ClimatologicalPercentile(Diagnostic): Log.warning('Different number of realizations in the data used by diagnostic {0}', self) def _get_value_interval(self, data_cube): - if not self.check_limit_values: - return - for time_slice in data_cube.slices_over('time'): - file_max = np.amax(time_slice.data) - file_min = np.amin(time_slice.data) - self.max_value = max(self.min_value, file_max) - if self.min_value is None: - self.min_value = file_min + if self.check_min_value: + if self.check_max_value: + for time_slice in data_cube.slices_over('time'): + for value in time_slice.data.flat: + if value < self.min_value: + self.min_value = value + if value > self.max_value: + self.max_value = value else: - self.min_value = min(self.min_value, file_min) + for time_slice in data_cube.slices_over('time'): + file_min = np.amin(time_slice.data) + if self.min_value is None: + self.min_value = file_min + self.max_value = min(self.min_value, file_min) + else: + if self.check_max_value: + for time_slice in data_cube.slices_over('time'): + file_max = np.amax(time_slice.data) + self.max_value = max(self.min_value, file_max) def _calculate_distribution(self, data_cube): def calculate_histogram(time_series): diff --git a/earthdiagnostics/utils.py b/earthdiagnostics/utils.py index 71269e06..bdca5d52 100644 --- a/earthdiagnostics/utils.py +++ b/earthdiagnostics/utils.py @@ -1,8 +1,8 @@ # coding=utf-8 -import hashlib import shutil import subprocess import tarfile +import datetime import netCDF4 import numpy as np @@ -20,6 +20,7 @@ from nco import Nco from earthdiagnostics.constants import Basins from contextlib import contextmanager import sys +import xxhash @contextmanager @@ -219,7 +220,7 @@ class Utils(object): handler.sync() @staticmethod - def copy_file(source, destiny): + def copy_file(source, destiny, save_hash=False): """ Copies a file from source to destiny, creating dirs if necessary @@ -238,18 +239,22 @@ class Utils(object): if not os.path.exists(dirname_path): raise ex hash_destiny = None - hash_original = Utils.get_file_hash(source) + Log.debug('Hashing original file... {0}', datetime.datetime.now()) + hash_original = Utils.get_file_hash(source, use_stored=True) retrials = 3 while hash_original != hash_destiny: if retrials == 0: raise Exception('Can not copy {0} to {1}'.format(source, destiny)) + Log.debug('Copying... {0}', datetime.datetime.now()) shutil.copyfile(source, destiny) - hash_destiny = Utils.get_file_hash(destiny) + Log.debug('Hashing copy ... {0}', datetime.datetime.now()) + hash_destiny = Utils.get_file_hash(destiny, save=save_hash) retrials -= 1 + Log.info('Finished {0}', datetime.datetime.now()) @staticmethod - def move_file(source, destiny): + def move_file(source, destiny, save_hash=False): """ Moves a file from source to destiny, creating dirs if necessary @@ -258,7 +263,7 @@ class Utils(object): :param destiny: path to destiny :type destiny: str """ - Utils.copy_file(source, destiny) + Utils.copy_file(source, destiny, save_hash) os.remove(source) @staticmethod @@ -292,27 +297,46 @@ class Utils(object): shutil.rmtree(source) @staticmethod - def get_file_hash(filepath): + def get_file_hash(filepath, use_stored=False, save=False): """ - Returns the MD5 hash for the given filepath + Returns the xxHash hash for the given filepath :param filepath: path to the file to compute hash on :type filepath:str - :return: file's MD5 hash + :return: file's xxHash hash :rtype: str """ - blocksize = 65536 - hasher = hashlib.md5() + if use_stored: + hash_file = Utils._get_hash_filename(filepath) + if os.path.isfile(hash_file): + hash_value = open(hash_file, 'r').readline() + return hash_value + + blocksize = 104857600 + hasher = xxhash.xxh64() with open(filepath, 'rb') as afile: buf = afile.read(blocksize) while len(buf) > 0: hasher.update(buf) buf = afile.read(blocksize) - return hasher.hexdigest() + hash_value = hasher.hexdigest() + if save: + hash_file = open(Utils._get_hash_filename(filepath), 'w') + hash_file.write(hash_value) + hash_file.close() + + return hash + + @staticmethod + def _get_hash_filename(filepath): + dir = os.path.dirname(filepath) + filename = os.path.basename(filepath) + hash_file = os.path.join(dir, '.{0}.xxhash64.hash'.format(filename)) + return hash_file @staticmethod def execute_shell_command(command, log_level=Log.DEBUG): """ - Executes a sheel command + Executes a sheel commandsi :param command: command to execute Log.info('Detailed time for diagnostic class') @@ -729,3 +753,4 @@ class TempFile(object): if os.path.exists(temp_file): os.remove(temp_file) TempFile.files = list() + diff --git a/setup.py b/setup.py index a7558830..3a4c38c5 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ setup( keywords=['climate', 'weather', 'diagnostic'], setup_requires=['pyproj'], install_requires=['numpy', 'netCDF4', 'bscearth.utils', 'cdo', 'nco>=0.0.3', 'iris>=1.12.0', 'coverage', - 'pygrib', 'openpyxl', 'mock', 'futures', 'cf_units', 'cfunits'], + 'pygrib', 'openpyxl', 'mock', 'futures', 'cf_units', 'cfunits', 'xxhash'], packages=find_packages(), include_package_data=True, scripts=['bin/earthdiags'] -- GitLab From 6b9c50de79ced7e3496511d037d3b97432056e74 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 28 Aug 2017 11:21:27 +0200 Subject: [PATCH 54/82] Fixed bug on climatological percentile --- .../statistics/climatologicalpercentile.py | 20 ++++++------------- .../statistics/daysoverpercentile.py | 4 ++-- earthdiagnostics/utils.py | 2 +- 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/earthdiagnostics/statistics/climatologicalpercentile.py b/earthdiagnostics/statistics/climatologicalpercentile.py index 74687b8b..3537a634 100644 --- a/earthdiagnostics/statistics/climatologicalpercentile.py +++ b/earthdiagnostics/statistics/climatologicalpercentile.py @@ -246,23 +246,15 @@ class ClimatologicalPercentile(Diagnostic): Log.warning('Different number of realizations in the data used by diagnostic {0}', self) def _get_value_interval(self, data_cube): - if self.check_min_value: - if self.check_max_value: - for time_slice in data_cube.slices_over('time'): - for value in time_slice.data.flat: - if value < self.min_value: - self.min_value = value - if value > self.max_value: - self.max_value = value - else: - for time_slice in data_cube.slices_over('time'): + if self.check_min_value or self.check_max_value: + for time_slice in data_cube.slices_over('time'): + if self.check_min_value: file_min = np.amin(time_slice.data) if self.min_value is None: self.min_value = file_min - self.max_value = min(self.min_value, file_min) - else: - if self.check_max_value: - for time_slice in data_cube.slices_over('time'): + self.min_value = min(self.min_value, file_min) + + if self.check_max_value: file_max = np.amax(time_slice.data) self.max_value = max(self.min_value, file_max) diff --git a/earthdiagnostics/statistics/daysoverpercentile.py b/earthdiagnostics/statistics/daysoverpercentile.py index 238592d3..499fb50f 100644 --- a/earthdiagnostics/statistics/daysoverpercentile.py +++ b/earthdiagnostics/statistics/daysoverpercentile.py @@ -178,13 +178,13 @@ class DaysOverPercentile(Diagnostic): for perc in ClimatologicalPercentile.Percentiles: iris.FUTURE.netcdf_no_unlimited = True temp = TempFile.get() - iris.save(results_over[perc].merge_cube(), temp, zlib=True) + iris.save(results_over[perc].merge_cube(), temp, zlib=True, unlimited_dimensions=['time']) Utils.rename_variables(temp, {'dim2': 'ensemble', 'dim1': 'ensemble'}, must_exist=False, rename_dimension=True) self.days_over_file[perc].set_local_file(temp, rename_var='daysover') temp = TempFile.get() - iris.save(results_below[perc].merge_cube(), temp, zlib=True) + iris.save(results_below[perc].merge_cube(), temp, zlib=True, unlimited_dimensions=['time']) Utils.rename_variables(temp, {'dim2': 'ensemble', 'dim1': 'ensemble'}, must_exist=False, rename_dimension=True) self.days_below_file[perc].set_local_file(temp, rename_var='daysbelow') diff --git a/earthdiagnostics/utils.py b/earthdiagnostics/utils.py index bdca5d52..ed4351a8 100644 --- a/earthdiagnostics/utils.py +++ b/earthdiagnostics/utils.py @@ -324,7 +324,7 @@ class Utils(object): hash_file.write(hash_value) hash_file.close() - return hash + return hash_value @staticmethod def _get_hash_filename(filepath): -- GitLab From 9924874045a4ab17cdbb34e880f4e09881eb699f Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 28 Aug 2017 16:55:11 +0200 Subject: [PATCH 55/82] Fixed tests --- earthdiagnostics/cdftools.py | 4 +- earthdiagnostics/modelingrealm.py | 42 ++++++++------ earthdiagnostics/ocean/gyres.py | 3 +- .../statistics/climatologicalpercentile.py | 12 ++-- test/unit/__init__.py | 2 +- test/unit/test_areamoc.py | 30 +++++++--- test/unit/test_cdftools.py | 56 +++++++++---------- test/unit/test_climatologicalpercentile.py | 39 ++++++++++--- test/unit/test_cutsection.py | 7 ++- test/unit/test_gyres.py | 4 +- test/unit/test_heatcontentlayer.py | 2 +- test/unit/test_maxmoc.py | 31 +++++++--- test/unit/test_modelling_realm.py | 10 ++-- test/unit/test_monthlymean.py | 13 +++-- test/unit/test_monthlypercentile.py | 2 +- test/unit/test_rewrite.py | 11 +++- test/unit/test_siasiesiv.py | 4 +- test/unit/test_verticalmean.py | 11 +++- test/unit/test_verticalmeanmeters.py | 25 +++++---- 19 files changed, 195 insertions(+), 113 deletions(-) diff --git a/earthdiagnostics/cdftools.py b/earthdiagnostics/cdftools.py index df2b4bf5..c262eb94 100644 --- a/earthdiagnostics/cdftools.py +++ b/earthdiagnostics/cdftools.py @@ -28,7 +28,7 @@ class CDFTools(object): :param output: output file. Not all tools support this parameter :type options: str :param options: options for the tool. - :type options: str | list[str] | Tuple[str] + :type options: str | [str] | Tuple[str] :param log_level: log level at which the output of the cdftool command will be added :type log_level: int :param input_option: option to add before input file @@ -91,4 +91,4 @@ class CDFTools(object): exe_file = os.path.join(path, command) if self.is_exe(exe_file): return - raise ValueError('Error executing {0}\n Command does not exist in {1}', command, self.path) + raise ValueError('Error executing {0}\n Command does not exist in {1}'.format(command, self.path)) diff --git a/earthdiagnostics/modelingrealm.py b/earthdiagnostics/modelingrealm.py index a7a76573..703d5b47 100644 --- a/earthdiagnostics/modelingrealm.py +++ b/earthdiagnostics/modelingrealm.py @@ -5,16 +5,16 @@ from earthdiagnostics.frequency import Frequencies class ModelingRealm(object): def __init__(self, domain_name): - domain_name = domain_name.lower() - if domain_name == 'seaice': + lower_name = domain_name.lower() + if lower_name == 'seaice': self.name = 'seaIce' - elif domain_name == 'landice': + elif lower_name == 'landice': self.name = 'landIce' - elif domain_name == 'atmoschem': + elif lower_name == 'atmoschem': self.name = 'atmosChem' - elif domain_name == 'ocnbgchem': + elif lower_name == 'ocnbgchem': self.name = 'ocnBgchem' - elif domain_name in ['ocean', 'atmos', 'land', 'aerosol']: + elif lower_name in ['ocean', 'atmos', 'land', 'aerosol']: self.name = domain_name else: raise ValueError('Modelling realm {0} not recognized!'.format(domain_name)) @@ -28,6 +28,9 @@ class ModelingRealm(object): def __str__(self): return self.name + def __repr__(self): + return str(self) + def get_table_name(self, frequency, data_convention): """ Returns the table name for a domain-frequency pair @@ -38,21 +41,24 @@ class ModelingRealm(object): :return: variable's table name :rtype: str """ - if frequency in (Frequencies.monthly, Frequencies.climatology, Frequencies.daily): - if self.name == 'seaIce': - if data_convention in ('specs', 'preface'): - prefix = 'OI' - else: - prefix = 'SI' - elif self.name == 'landIce': - prefix = 'LI' + if self.name == 'seaIce': + if data_convention in ('specs', 'preface'): + prefix = 'OI' else: - prefix = self.name[0].upper() - table_name = prefix + str(frequency) - elif frequency == Frequencies.six_hourly: + prefix = 'SI' + elif self.name == 'landIce': + prefix = 'LI' + else: + prefix = self.name[0].upper() + + if frequency == Frequencies.six_hourly: table_name = '6hrPlev' else: - table_name = frequency.frequency + if (frequency in (Frequencies.monthly, Frequencies.climatology)) or data_convention not in ('specs', + 'preface'): + table_name = prefix + str(frequency) + else: + table_name = frequency.frequency return table_name def get_table(self, frequency, data_convention): diff --git a/earthdiagnostics/ocean/gyres.py b/earthdiagnostics/ocean/gyres.py index b0e1dd6e..b0197148 100644 --- a/earthdiagnostics/ocean/gyres.py +++ b/earthdiagnostics/ocean/gyres.py @@ -46,7 +46,8 @@ class Gyres(Diagnostic): self.model_version == other.model_version def __str__(self): - return 'Gyres Startdate: {0} Member: {1} Chunk: {2}'.format(self.startdate, self.member, self.chunk) + return 'Gyres Startdate: {0.startdate} Member: {0.member} Chunk: {0.chunk} ' \ + 'Model version: {0.model_version}'.format(self) @classmethod def generate_jobs(cls, diags, options): diff --git a/earthdiagnostics/statistics/climatologicalpercentile.py b/earthdiagnostics/statistics/climatologicalpercentile.py index 3537a634..a4de5472 100644 --- a/earthdiagnostics/statistics/climatologicalpercentile.py +++ b/earthdiagnostics/statistics/climatologicalpercentile.py @@ -17,7 +17,6 @@ import iris.coords import math - class ClimatologicalPercentile(Diagnostic): """ Calculates the climatological percentiles for the given leadtime @@ -73,11 +72,14 @@ class ClimatologicalPercentile(Diagnostic): self.check_max_value = True def __eq__(self, other): - return self.domain == other.domain and self.variable == other.variable and self.num_bins == other.num_bins + return self.domain == other.domain and self.variable == other.variable and self.num_bins == other.num_bins and \ + self.min_value == other.min_value and self.max_value == other.max_value and \ + self.start_year == other.start_year and self.end_year == other.end_year and \ + self.forecast_month == other.forecast_month def __str__(self): - return 'Climatological percentile Variable: {0}:{1} ' \ - 'Bins: {2}'.format(self.domain, self.variable, self.num_bins) + return 'Climatological percentile Variable: {0.domain}:{0.variable} Period: {0.start_year}-{0.end_year} ' \ + 'Forecast month: {0.forecast_month} Bins: {0.num_bins}'.format(self) @classmethod def generate_jobs(cls, diags, options): @@ -206,7 +208,7 @@ class ClimatologicalPercentile(Diagnostic): def _get_distribution(self): distribution = {} for startdate in self.leadtime_files: - Log.debug('Getting data for startdate {0}', startdate) + Log.info('Getting data for startdate {0}', startdate) data_cube = self._load_cube(startdate) Log.debug('Discretizing file {0}', data_cube) diff --git a/test/unit/__init__.py b/test/unit/__init__.py index d511e5d2..162b0b52 100644 --- a/test/unit/__init__.py +++ b/test/unit/__init__.py @@ -1,7 +1,7 @@ # coding=utf-8 from test_data_manager import TestConversion # from test.unit.test_variable import TestVariable -from test_constants import TestBasin, TestBasins +from test_constants import TestBasin from test_box import TestBox from test_diagnostic import * from test_cdftools import TestCDFTools diff --git a/test/unit/test_areamoc.py b/test/unit/test_areamoc.py index 15d4bcd7..c1361b83 100644 --- a/test/unit/test_areamoc.py +++ b/test/unit/test_areamoc.py @@ -2,9 +2,9 @@ from unittest import TestCase from earthdiagnostics.box import Box -from earthdiagnostics.constants import Basins +from earthdiagnostics.constants import Basins, Basin from earthdiagnostics.ocean.areamoc import AreaMoc -from mock import Mock +from mock import Mock, patch class TestAreaMoc(TestCase): @@ -12,6 +12,9 @@ class TestAreaMoc(TestCase): def setUp(self): self.data_manager = Mock() self.diags = Mock() + self.basins = Mock() + self.basins.Global = Basin('Global') + self.basins.Atlantic = Basin('Atlantic') self.box = Box() self.box.min_lat = 0 @@ -20,18 +23,29 @@ class TestAreaMoc(TestCase): self.box.max_depth = 0 self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) - self.psi = AreaMoc(self.data_manager, '20000101', 1, 1, Basins.Antarctic, self.box) + self.psi = AreaMoc(self.data_manager, '20000101', 1, 1, self.basins.Atlantic, self.box) + def fake_parse(self, value): + if type(value) is Basin: + return value + if value == 'atl': + value = 'Atlantic' + else: + value = 'Global' + + return Basin(value) + + @patch.object(Basins, 'parse', fake_parse) def test_generate_jobs(self): jobs = AreaMoc.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0']) self.assertEqual(len(jobs), 2) - self.assertEqual(jobs[0], AreaMoc(self.data_manager, '20010101', 0, 0, Basins.Global, self.box)) - self.assertEqual(jobs[1], AreaMoc(self.data_manager, '20010101', 0, 1, Basins.Global, self.box)) + self.assertEqual(jobs[0], AreaMoc(self.data_manager, '20010101', 0, 0, self.basins.Global, self.box)) + self.assertEqual(jobs[1], AreaMoc(self.data_manager, '20010101', 0, 1, self.basins.Global, self.box)) jobs = AreaMoc.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', 'atl']) self.assertEqual(len(jobs), 2) - self.assertEqual(jobs[0], AreaMoc(self.data_manager, '20010101', 0, 0, Basins.Atlantic, self.box)) - self.assertEqual(jobs[1], AreaMoc(self.data_manager, '20010101', 0, 1, Basins.Atlantic, self.box)) + self.assertEqual(jobs[0], AreaMoc(self.data_manager, '20010101', 0, 0, self.basins.Atlantic, self.box)) + self.assertEqual(jobs[1], AreaMoc(self.data_manager, '20010101', 0, 1, self.basins.Atlantic, self.box)) with self.assertRaises(Exception): AreaMoc.generate_jobs(self.diags, ['diagnostic']) @@ -40,4 +54,4 @@ class TestAreaMoc(TestCase): def test_str(self): self.assertEquals(str(self.psi), 'Area MOC Startdate: 20000101 Member: 1 Chunk: 1 Box: 0N0 ' - 'Basin: Antarctic_Ocean') + 'Basin: Atlantic') diff --git a/test/unit/test_cdftools.py b/test/unit/test_cdftools.py index 9ebdc65f..3367fe59 100644 --- a/test/unit/test_cdftools.py +++ b/test/unit/test_cdftools.py @@ -7,38 +7,34 @@ from earthdiagnostics.cdftools import CDFTools import mock +def mock_exists(path, access=None): + return not os.path.basename(path.startswith('bad')) + + class TestCDFTools(TestCase): def setUp(self): - self.cdftools = CDFTools('') + self.cdftools = CDFTools('/test/path') mock.patch('os.path.join') - # noinspection PyTypeChecker + @mock.patch('os.path.exists', side_effect=mock_exists) + @mock.patch('os.access', side_effect=mock_exists) def test_run(self): - # noinspection PyUnusedLocal - def mock_exists(path, access=None): - return not os.path.basename(path.startswith('bad')) - - with mock.patch('os.path.exists') as exists_mock: - with mock.patch('os.access') as access_mock: - exists_mock.side_effect = mock_exists - access_mock.side_effect = mock_exists - - with mock.patch('earthdiagnostics.utils.Utils.execute_shell_command') as execute_mock: - execute_mock.return_value = ['Command output'] - with self.assertRaises(ValueError): - self.cdftools.run('badcommand', input='input_file', output='output_file') - with self.assertRaises(ValueError): - self.cdftools.run('command', input='badinput_file', output='output_file') - with self.assertRaises(ValueError): - self.cdftools.run('command', input=['input_file', 'badinput_file'], output='output_file') - with self.assertRaises(ValueError): - self.cdftools.run('command', input='input_file', output='input_file') - with self.assertRaises(Exception): - self.cdftools.run('command', input='input_file', output='badoutput_file') - - self.cdftools.run('command', input='input_file', output='output_file') - self.cdftools.run('command', input='input_file') - self.cdftools.run('command', input=None) - self.cdftools.run('command', input=['input_file', 'input_file2']) - self.cdftools.run('command', input='input_file', options='-o -p') - self.cdftools.run('command', input='input_file', options=('-o', '-p')) + with mock.patch('earthdiagnostics.utils.Utils.execute_shell_command') as execute_mock: + execute_mock.return_value = ['Command output'] + with self.assertRaises(ValueError): + self.cdftools.run('badcommand', input='input_file', output='output_file') + with self.assertRaises(ValueError): + self.cdftools.run('command', input='badinput_file', output='output_file') + with self.assertRaises(ValueError): + self.cdftools.run('command', input=['input_file', 'badinput_file'], output='output_file') + with self.assertRaises(ValueError): + self.cdftools.run('command', input='input_file', output='input_file') + with self.assertRaises(Exception): + self.cdftools.run('command', input='input_file', output='badoutput_file') + + self.cdftools.run('command', input='input_file', output='output_file') + self.cdftools.run('command', input='input_file') + self.cdftools.run('command', input=None) + self.cdftools.run('command', input=['input_file', 'input_file2']) + self.cdftools.run('command', input='input_file', options='-o -p') + self.cdftools.run('command', input='input_file', options=('-o', '-p')) diff --git a/test/unit/test_climatologicalpercentile.py b/test/unit/test_climatologicalpercentile.py index 95afc38b..76fd8553 100644 --- a/test/unit/test_climatologicalpercentile.py +++ b/test/unit/test_climatologicalpercentile.py @@ -2,7 +2,8 @@ from unittest import TestCase from earthdiagnostics.statistics.climatologicalpercentile import ClimatologicalPercentile -from mock import Mock +from earthdiagnostics.diagnostic import DiagnosticVariableOption +from mock import Mock, patch from earthdiagnostics.modelingrealm import ModelingRealms @@ -16,20 +17,40 @@ class TestClimatologicalPercentile(TestCase): self.diags = Mock() self.diags.data_manager = self.data_manager - self.diagnostic = ClimatologicalPercentile(self.data_manager, ModelingRealms.ocean, 'var', - [10, 90], 1000, self.diags.config.experiment) + def fake_parse(self, value): + return value + @patch.object(DiagnosticVariableOption, 'parse', fake_parse) def test_generate_jobs(self): - jobs = ClimatologicalPercentile.generate_jobs(self.diags, ['climpercent', 'ocean', 'var', '1-2', '1000']) + jobs = ClimatologicalPercentile.generate_jobs(self.diags, ['climpercent', 'ocean', 'var', '2000', '2001', '11']) self.assertEqual(len(jobs), 1) - self.assertEqual(jobs[0], ClimatologicalPercentile(self.data_manager, ModelingRealms.ocean, 'var', [1, 2], - 1000, self.diags.config.experiment)) + self.assertEqual(jobs[0], ClimatologicalPercentile(self.data_manager, ModelingRealms.ocean, 'var', + 2000, 2000, 2001, float('nan'), float('nan'), 11, + self.diags.config.experiment)) + + jobs = ClimatologicalPercentile.generate_jobs(self.diags, ['climpercent', 'ocean', 'var', '2000', '2001', '11', + '', '0', '40']) + self.assertEqual(len(jobs), 1) + self.assertEqual(jobs[0], ClimatologicalPercentile(self.data_manager, ModelingRealms.ocean, 'var', + 2000, 2000, 2001, 0.0, 40.0, 11, + self.diags.config.experiment)) + + jobs = ClimatologicalPercentile.generate_jobs(self.diags, ['climpercent', 'ocean', 'var', '2000', '2001', '7', + '1000', '', '10']) + self.assertEqual(len(jobs), 1) + self.assertEqual(jobs[0], ClimatologicalPercentile(self.data_manager, ModelingRealms.ocean, 'var', + 1000, 2000, 2001, float('nan'), 10.0, 7, + self.diags.config.experiment)) with self.assertRaises(Exception): ClimatologicalPercentile.generate_jobs(self.diags, ['climpercent']) with self.assertRaises(Exception): - ClimatologicalPercentile.generate_jobs(self.diags, ['climpercent', '0', '0', '0', '0', '0', '0', '0']) + ClimatologicalPercentile.generate_jobs(self.diags, ['climpercent', '0', '0', '0', '0', '0', '0', '0', '0']) def test_str(self): - self.assertEquals(str(self.diagnostic), 'Climatological percentile Variable: ocean:var Leadtimes: [10, 90] ' - 'Bins: 1000') + diagnostic = ClimatologicalPercentile(self.data_manager, ModelingRealms.ocean, 'var', + 1000, 2000, 2001, float('nan'), float('nan'), 11, + self.diags.config.experiment) + + self.assertEquals(str(diagnostic), 'Climatological percentile Variable: ocean:var Period: 2000-2001 ' + 'Forecast month: 11 Bins: 1000') diff --git a/test/unit/test_cutsection.py b/test/unit/test_cutsection.py index 170d0426..e0c14363 100644 --- a/test/unit/test_cutsection.py +++ b/test/unit/test_cutsection.py @@ -1,9 +1,10 @@ # coding=utf-8 from unittest import TestCase +from earthdiagnostics.diagnostic import DiagnosticVariableOption from earthdiagnostics.box import Box from earthdiagnostics.ocean.cutsection import CutSection -from mock import Mock +from mock import Mock, patch from earthdiagnostics.modelingrealm import ModelingRealms @@ -23,6 +24,10 @@ class TestCutSection(TestCase): self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) self.psi = CutSection(self.data_manager, '20000101', 1, 1, ModelingRealms.atmos, 'var', True, 0) + def fake_parse(self, value): + return value + + @patch.object(DiagnosticVariableOption, 'parse', fake_parse) def test_generate_jobs(self): jobs = CutSection.generate_jobs(self.diags, ['diagnostic', 'var', 'true', '10']) self.assertEqual(len(jobs), 2) diff --git a/test/unit/test_gyres.py b/test/unit/test_gyres.py index becc4e7d..8a025583 100644 --- a/test/unit/test_gyres.py +++ b/test/unit/test_gyres.py @@ -11,7 +11,7 @@ class TestGyres(TestCase): self.data_manager = Mock() self.diags = Mock() - self.diags.model_version = 'model_version' + self.diags.config.experiment.model_version = 'model_version' self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) self.gyres = Gyres(self.data_manager, '20000101', 1, 1, 'model_version') @@ -26,4 +26,4 @@ class TestGyres(TestCase): Gyres.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) def test_str(self): - self.assertEquals(str(self.gyres), 'Gyres Startdate: 20000101 Member: 1 Chunk: 1') + self.assertEquals(str(self.gyres), 'Gyres Startdate: 20000101 Member: 1 Chunk: 1 Model version: model_version') diff --git a/test/unit/test_heatcontentlayer.py b/test/unit/test_heatcontentlayer.py index 60b6dd8b..74876fc3 100644 --- a/test/unit/test_heatcontentlayer.py +++ b/test/unit/test_heatcontentlayer.py @@ -23,4 +23,4 @@ class TestHeatContentLayer(TestCase): self.psi = HeatContentLayer(self.data_manager, '20000101', 1, 1, self.box, self.weight, 0, 10) def test_str(self): - self.assertEquals(str(self.psi), 'Heat content layer Startdate: 20000101 Member: 1 Chunk: 1 Box: 0m-100m') + self.assertEquals(str(self.psi), 'Heat content layer Startdate: 20000101 Member: 1 Chunk: 1 Box: 0-100m') diff --git a/test/unit/test_maxmoc.py b/test/unit/test_maxmoc.py index f9542b29..4284750e 100644 --- a/test/unit/test_maxmoc.py +++ b/test/unit/test_maxmoc.py @@ -2,15 +2,18 @@ from unittest import TestCase from earthdiagnostics.box import Box -from earthdiagnostics.constants import Basins +from earthdiagnostics.constants import Basins, Basin from earthdiagnostics.ocean.maxmoc import MaxMoc -from mock import Mock +from mock import Mock, patch class TestMaxMoc(TestCase): def setUp(self): self.data_manager = Mock() + self.basins = Mock() + self.basins.Global = Basin('Global') + self.basins.Atlantic = Basin('Atlantic') self.box = Box(True) self.box.min_lat = 0.0 @@ -18,8 +21,20 @@ class TestMaxMoc(TestCase): self.box.min_depth = 0.0 self.box.max_depth = 0.0 - self.maxmoc = MaxMoc(self.data_manager, '20000101', 1, 2000, Basins.Global, self.box) + self.maxmoc = MaxMoc(self.data_manager, '20000101', 1, 2000, self.basins.Global, self.box) + + def fake_parse(self, value): + if type(value) is Basin: + return value + if value == 'atl': + value = 'Atlantic' + else: + value = 'Global' + + return Basin(value) + + @patch.object(Basins, 'parse', fake_parse) def test_generate_jobs(self): self.diags = Mock() self.diags.model_version = 'model_version' @@ -29,13 +44,13 @@ class TestMaxMoc(TestCase): jobs = MaxMoc.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0']) self.assertEqual(len(jobs), 2) - self.assertEqual(jobs[0], MaxMoc(self.data_manager, '20010101', 0, 2000, Basins.Global, self.box)) - self.assertEqual(jobs[1], MaxMoc(self.data_manager, '20010101', 0, 2001, Basins.Global, self.box)) + self.assertEqual(jobs[0], MaxMoc(self.data_manager, '20010101', 0, 2000, self.basins.Global, self.box)) + self.assertEqual(jobs[1], MaxMoc(self.data_manager, '20010101', 0, 2001, self.basins.Global, self.box)) jobs = MaxMoc.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', 'atl']) self.assertEqual(len(jobs), 2) - self.assertEqual(jobs[0], MaxMoc(self.data_manager, '20010101', 0, 2000, Basins.Atlantic, self.box)) - self.assertEqual(jobs[1], MaxMoc(self.data_manager, '20010101', 0, 2001, Basins.Atlantic, self.box)) + self.assertEqual(jobs[0], MaxMoc(self.data_manager, '20010101', 0, 2000, self.basins.Atlantic, self.box)) + self.assertEqual(jobs[1], MaxMoc(self.data_manager, '20010101', 0, 2001, self.basins.Atlantic, self.box)) self.diags.config.experiment.get_full_years.return_value = list() jobs = MaxMoc.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0']) @@ -49,4 +64,4 @@ class TestMaxMoc(TestCase): def test_str(self): self.assertEquals(str(self.maxmoc), 'Max moc Startdate: 20000101 Member: 1 Year: 2000 ' - 'Box: 0N0 Basin: Global_Ocean') + 'Box: 0.0N0.0m Basin: Global') diff --git a/test/unit/test_modelling_realm.py b/test/unit/test_modelling_realm.py index 2d44e6a6..eec9d0ce 100644 --- a/test/unit/test_modelling_realm.py +++ b/test/unit/test_modelling_realm.py @@ -18,19 +18,19 @@ class TestModellingRealms(TestCase): class TestModellingRealm(TestCase): def setUp(self): - self.basin = ModelingRealm('ocean') + self.realm = ModelingRealm('ocean') def test_constructor_fail_on_bad_realm(self): with self.assertRaises(ValueError): ModelingRealm('badrealm') def test_comparison(self): - self.assertEqual(ModelingRealm('ocean'), self.basin) - self.assertNotEqual(ModelingRealm('OCEAN'), self.basin) - self.assertNotEqual(ModelingRealm('atmos'), self.basin) + self.assertEqual(ModelingRealm('ocean'), self.realm) + self.assertNotEqual(ModelingRealm('OCEAN'), self.realm) + self.assertNotEqual(ModelingRealm('atmos'), self.realm) def test_get_omon(self): - self.assertEqual(self.basin.get_table_name(Frequencies.monthly, 'specs'), 'Omon') + self.assertEqual(self.realm.get_table_name(Frequencies.monthly, 'specs'), 'Omon') def test_get_oimon(self): self.assertEqual(ModelingRealm('seaIce').get_table_name(Frequencies.monthly, 'specs'), 'OImon') diff --git a/test/unit/test_monthlymean.py b/test/unit/test_monthlymean.py index e2165f5d..b31561ac 100644 --- a/test/unit/test_monthlymean.py +++ b/test/unit/test_monthlymean.py @@ -1,10 +1,11 @@ # coding=utf-8 from unittest import TestCase +from earthdiagnostics.diagnostic import DiagnosticVariableOption from earthdiagnostics.box import Box from earthdiagnostics.frequency import Frequencies from earthdiagnostics.general.monthlymean import MonthlyMean -from mock import Mock +from mock import Mock, patch from earthdiagnostics.modelingrealm import ModelingRealms @@ -24,23 +25,27 @@ class TestMonthlyMean(TestCase): self.mixed = MonthlyMean(self.data_manager, '20000101', 1, 1, ModelingRealms.ocean, 'var', 'freq', '') + def fake_parse(self, value): + return value + + @patch.object(DiagnosticVariableOption, 'parse', fake_parse) def test_generate_jobs(self): - jobs = MonthlyMean.generate_jobs(self.diags, ['diagnostic', 'var', 'ocean']) + jobs = MonthlyMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], MonthlyMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', Frequencies.daily, '')) self.assertEqual(jobs[1], MonthlyMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', Frequencies.daily, '')) - jobs = MonthlyMean.generate_jobs(self.diags, ['diagnostic', 'var', 'atmos', 'monthly']) + jobs = MonthlyMean.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', 'monthly']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], MonthlyMean(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', Frequencies.monthly, '')) self.assertEqual(jobs[1], MonthlyMean(self.data_manager, '20010101', 0, 1, ModelingRealms.atmos, 'var', Frequencies.monthly, '')) - jobs = MonthlyMean.generate_jobs(self.diags, ['diagnostic', 'var', 'seaice', 'mon', 'grid']) + jobs = MonthlyMean.generate_jobs(self.diags, ['diagnostic', 'seaice', 'var', 'mon', 'grid']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], MonthlyMean(self.data_manager, '20010101', 0, 0, ModelingRealms.seaIce, 'var', Frequencies.monthly, 'grid')) diff --git a/test/unit/test_monthlypercentile.py b/test/unit/test_monthlypercentile.py index 4b9bbfb0..a902ec89 100644 --- a/test/unit/test_monthlypercentile.py +++ b/test/unit/test_monthlypercentile.py @@ -24,7 +24,7 @@ class TestMonthlyPercentile(TestCase): self.diagnostic = MonthlyPercentile(self.data_manager, '20000101', 1, 1, ModelingRealms.ocean, 'var', [10, 90]) def test_generate_jobs(self): - jobs = MonthlyPercentile.generate_jobs(self.diags, ['monpercent', 'var', 'ocean', '10-90']) + jobs = MonthlyPercentile.generate_jobs(self.diags, ['monpercent', 'ocean', 'var', '10-90']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], MonthlyPercentile(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', [10, 90])) diff --git a/test/unit/test_rewrite.py b/test/unit/test_rewrite.py index 25380fcd..202e2c39 100644 --- a/test/unit/test_rewrite.py +++ b/test/unit/test_rewrite.py @@ -1,9 +1,10 @@ # coding=utf-8 from unittest import TestCase +from earthdiagnostics.diagnostic import DiagnosticVariableOption from earthdiagnostics.box import Box from earthdiagnostics.general.rewrite import Rewrite -from mock import Mock +from mock import Mock, patch from earthdiagnostics.modelingrealm import ModelingRealms @@ -23,14 +24,18 @@ class TestRewrite(TestCase): self.mixed = Rewrite(self.data_manager, '20000101', 1, 1, ModelingRealms.atmos, 'var', 'grid') + def fake_parse(self, value): + return value + + @patch.object(DiagnosticVariableOption, 'parse', fake_parse) def test_generate_jobs(self): - jobs = Rewrite.generate_jobs(self.diags, ['diagnostic', 'var', 'atmos']) + jobs = Rewrite.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], Rewrite(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', 'original')) self.assertEqual(jobs[1], Rewrite(self.data_manager, '20010101', 0, 1, ModelingRealms.atmos, 'var', 'original')) - jobs = Rewrite.generate_jobs(self.diags, ['diagnostic', 'var', 'ocean', 'grid']) + jobs = Rewrite.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', 'grid']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], Rewrite(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 'grid')) self.assertEqual(jobs[1], Rewrite(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', 'grid')) diff --git a/test/unit/test_siasiesiv.py b/test/unit/test_siasiesiv.py index 2ed28426..00f7c68c 100644 --- a/test/unit/test_siasiesiv.py +++ b/test/unit/test_siasiesiv.py @@ -15,7 +15,7 @@ class TestSiasiesiv(TestCase): self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) self.mask = Mock() - self.psi = Siasiesiv(self.data_manager, '20000101', 1, 1, Basins.Global, self.mask) + self.psi = Siasiesiv(self.data_manager, '20000101', 1, 1, Basins().Global, self.mask) def test_str(self): - self.assertEquals(str(self.psi), 'Siasiesiv Startdate: 20000101 Member: 1 Chunk: 1 Basin: Global_Ocean') + self.assertEquals(str(self.psi), 'Siasiesiv Startdate: 20000101 Member: 1 Chunk: 1 Basin: Global') diff --git a/test/unit/test_verticalmean.py b/test/unit/test_verticalmean.py index 59d0fb50..448d8d7c 100644 --- a/test/unit/test_verticalmean.py +++ b/test/unit/test_verticalmean.py @@ -1,9 +1,10 @@ # coding=utf-8 from unittest import TestCase +from earthdiagnostics.diagnostic import DiagnosticVariableOption from earthdiagnostics.box import Box from earthdiagnostics.ocean.verticalmean import VerticalMean -from mock import Mock +from mock import Mock, patch class TestVerticalMean(TestCase): @@ -21,6 +22,12 @@ class TestVerticalMean(TestCase): self.mixed = VerticalMean(self.data_manager, '20000101', 1, 1, 'var', self.box) + def fake_parse(self, value): + if value is None: + raise Exception + return value + + @patch.object(DiagnosticVariableOption, 'parse', fake_parse) def test_generate_jobs(self): jobs = VerticalMean.generate_jobs(self.diags, ['diagnostic', 'var', '0', '100']) self.assertEqual(len(jobs), 2) @@ -43,7 +50,7 @@ class TestVerticalMean(TestCase): VerticalMean.generate_jobs(self.diags, ['diagnostic']) with self.assertRaises(Exception): - VerticalMean.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) + VerticalMean.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0', '0']) def test_str(self): self.assertEquals(str(self.mixed), 'Vertical mean Startdate: 20000101 Member: 1 Chunk: 1 Variable: var ' diff --git a/test/unit/test_verticalmeanmeters.py b/test/unit/test_verticalmeanmeters.py index 39a86c3a..08625c18 100644 --- a/test/unit/test_verticalmeanmeters.py +++ b/test/unit/test_verticalmeanmeters.py @@ -1,10 +1,11 @@ # coding=utf-8 from unittest import TestCase +from earthdiagnostics.diagnostic import DiagnosticVariableOption from earthdiagnostics.box import Box from earthdiagnostics.ocean.verticalmeanmeters import VerticalMeanMeters from earthdiagnostics.modelingrealm import ModelingRealms -from mock import Mock +from mock import Mock, patch class TestVerticalMeanMeters(TestCase): @@ -20,38 +21,42 @@ class TestVerticalMeanMeters(TestCase): self.box.min_depth = 0 self.box.max_depth = 100 - self.mixed = VerticalMeanMeters(self.data_manager, '20000101', 1, 1, ModelingRealms.ocean, 'var', self.box) + self.mixed = VerticalMeanMeters(self.data_manager, '20000101', 1, 1, ModelingRealms.ocean, 'var', self.box, 'T') + def fake_parse(self, value): + return value + + @patch.object(DiagnosticVariableOption, 'parse', fake_parse) def test_generate_jobs(self): jobs = VerticalMeanMeters.generate_jobs(self.diags, ['diagnostic', 'var', '0', '100']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], VerticalMeanMeters(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', - self.box)) + self.box, 'T')) self.assertEqual(jobs[1], VerticalMeanMeters(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', - self.box)) + self.box, 'T')) jobs = VerticalMeanMeters.generate_jobs(self.diags, ['diagnostic', 'var', '0']) box = Box(True) box.min_depth = 0 self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], VerticalMeanMeters(self.data_manager, '20010101', 0, 0, 'var', ModelingRealms.ocean, - box)) + box, 'T')) self.assertEqual(jobs[1], VerticalMeanMeters(self.data_manager, '20010101', 0, 1, 'var', ModelingRealms.ocean, - box)) + box, 'T')) jobs = VerticalMeanMeters.generate_jobs(self.diags, ['diagnostic', 'var']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], VerticalMeanMeters(self.data_manager, '20010101', 0, 0, 'var', ModelingRealms.ocean, - Box(True))) + Box(True), 'T')) self.assertEqual(jobs[1], VerticalMeanMeters(self.data_manager, '20010101', 0, 1, 'var', ModelingRealms.ocean, - Box(True))) + Box(True), 'T')) with self.assertRaises(Exception): VerticalMeanMeters.generate_jobs(self.diags, ['diagnostic']) with self.assertRaises(Exception): - VerticalMeanMeters.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) + VerticalMeanMeters.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0']) def test_str(self): self.assertEquals(str(self.mixed), 'Vertical mean meters Startdate: 20000101 Member: 1 Chunk: 1 ' - 'Variable: ocean:var Box: 0m-100m') + 'Variable: ocean:var Box: 0-100m') -- GitLab From 7c4ad07383ab31d1a37ca310e82590b66d95bcb2 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 29 Aug 2017 15:14:55 +0200 Subject: [PATCH 56/82] Fixed all tests --- earthdiagnostics/cdftools.py | 6 +-- earthdiagnostics/ocean/verticalmeanmeters.py | 2 - test/unit/test_cdftools.py | 45 ++++++++++---------- test/unit/test_cutsection.py | 8 ++-- test/unit/test_verticalmean.py | 10 ++--- test/unit/test_verticalmeanmeters.py | 18 ++++---- 6 files changed, 45 insertions(+), 44 deletions(-) diff --git a/earthdiagnostics/cdftools.py b/earthdiagnostics/cdftools.py index c262eb94..5c78dd19 100644 --- a/earthdiagnostics/cdftools.py +++ b/earthdiagnostics/cdftools.py @@ -60,7 +60,7 @@ class CDFTools(object): @staticmethod def _check_output_was_created(line, output): if output: - if not os.path.exists(output): + if not os.path.isfile(output): raise Exception('Error executing {0}\n Output file not created', ' '.join(line)) # noinspection PyShadowingBuiltins @@ -69,12 +69,12 @@ class CDFTools(object): if input: if isinstance(input, six.string_types): line.append(input) - if not os.path.exists(input): + if not os.path.isfile(input): raise ValueError('Error executing {0}\n Input file {1} file does not exist', command, input) else: for element in input: line.append(element) - if not os.path.exists(element): + if not os.path.isfile(element): raise ValueError('Error executing {0}\n Input file {1} file does not exist', command, element) # noinspection PyMethodMayBeStatic diff --git a/earthdiagnostics/ocean/verticalmeanmeters.py b/earthdiagnostics/ocean/verticalmeanmeters.py index 0e7ee28f..d3141af5 100644 --- a/earthdiagnostics/ocean/verticalmeanmeters.py +++ b/earthdiagnostics/ocean/verticalmeanmeters.py @@ -43,8 +43,6 @@ class VerticalMeanMeters(Diagnostic): self.domain = domain self.variable = variable self.box = box - self.required_vars = [variable] - self.generated_vars = [variable + 'vmean'] self.grid_point = grid_point def __eq__(self, other): diff --git a/test/unit/test_cdftools.py b/test/unit/test_cdftools.py index 3367fe59..2f1d7277 100644 --- a/test/unit/test_cdftools.py +++ b/test/unit/test_cdftools.py @@ -8,33 +8,32 @@ import mock def mock_exists(path, access=None): - return not os.path.basename(path.startswith('bad')) + return not os.path.basename(path).startswith('bad') class TestCDFTools(TestCase): def setUp(self): self.cdftools = CDFTools('/test/path') - mock.patch('os.path.join') - @mock.patch('os.path.exists', side_effect=mock_exists) + @mock.patch('os.path.isfile', side_effect=mock_exists) @mock.patch('os.access', side_effect=mock_exists) - def test_run(self): - with mock.patch('earthdiagnostics.utils.Utils.execute_shell_command') as execute_mock: - execute_mock.return_value = ['Command output'] - with self.assertRaises(ValueError): - self.cdftools.run('badcommand', input='input_file', output='output_file') - with self.assertRaises(ValueError): - self.cdftools.run('command', input='badinput_file', output='output_file') - with self.assertRaises(ValueError): - self.cdftools.run('command', input=['input_file', 'badinput_file'], output='output_file') - with self.assertRaises(ValueError): - self.cdftools.run('command', input='input_file', output='input_file') - with self.assertRaises(Exception): - self.cdftools.run('command', input='input_file', output='badoutput_file') - - self.cdftools.run('command', input='input_file', output='output_file') - self.cdftools.run('command', input='input_file') - self.cdftools.run('command', input=None) - self.cdftools.run('command', input=['input_file', 'input_file2']) - self.cdftools.run('command', input='input_file', options='-o -p') - self.cdftools.run('command', input='input_file', options=('-o', '-p')) + @mock.patch('earthdiagnostics.utils.Utils.execute_shell_command') + def test_run(self, mock_path, mock_exists, execute_mock): + execute_mock.return_value = ['Command output'] + with self.assertRaises(ValueError): + self.cdftools.run('badcommand', input='input_file', output='output_file') + with self.assertRaises(ValueError): + self.cdftools.run('command', input='badinput_file', output='output_file') + with self.assertRaises(ValueError): + self.cdftools.run('command', input=['input_file', 'badinput_file'], output='output_file') + with self.assertRaises(ValueError): + self.cdftools.run('command', input='input_file', output='input_file') + with self.assertRaises(Exception): + self.cdftools.run('command', input='input_file', output='badoutput_file') + + self.cdftools.run('command', input='input_file', output='output_file') + self.cdftools.run('command', input='input_file') + self.cdftools.run('command', input=None) + self.cdftools.run('command', input=['input_file', 'input_file2']) + self.cdftools.run('command', input='input_file', options='-o -p') + self.cdftools.run('command', input='input_file', options=('-o', '-p')) diff --git a/test/unit/test_cutsection.py b/test/unit/test_cutsection.py index e0c14363..a658b06d 100644 --- a/test/unit/test_cutsection.py +++ b/test/unit/test_cutsection.py @@ -1,7 +1,7 @@ # coding=utf-8 from unittest import TestCase -from earthdiagnostics.diagnostic import DiagnosticVariableOption +from earthdiagnostics.diagnostic import DiagnosticVariableOption, DiagnosticOptionError from earthdiagnostics.box import Box from earthdiagnostics.ocean.cutsection import CutSection from mock import Mock, patch @@ -25,6 +25,8 @@ class TestCutSection(TestCase): self.psi = CutSection(self.data_manager, '20000101', 1, 1, ModelingRealms.atmos, 'var', True, 0) def fake_parse(self, value): + if not value: + raise DiagnosticOptionError return value @patch.object(DiagnosticVariableOption, 'parse', fake_parse) @@ -43,9 +45,9 @@ class TestCutSection(TestCase): self.assertEqual(jobs[1], CutSection(self.data_manager, '20010101', 0, 1, ModelingRealms.atmos, 'var', False, 0)) - with self.assertRaises(Exception): + with self.assertRaises(DiagnosticOptionError): CutSection.generate_jobs(self.diags, ['diagnostic']) - with self.assertRaises(Exception): + with self.assertRaises(DiagnosticOptionError): CutSection.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) def test_str(self): diff --git a/test/unit/test_verticalmean.py b/test/unit/test_verticalmean.py index 448d8d7c..fc501afd 100644 --- a/test/unit/test_verticalmean.py +++ b/test/unit/test_verticalmean.py @@ -1,7 +1,7 @@ # coding=utf-8 from unittest import TestCase -from earthdiagnostics.diagnostic import DiagnosticVariableOption +from earthdiagnostics.diagnostic import DiagnosticVariableOption, DiagnosticOptionError from earthdiagnostics.box import Box from earthdiagnostics.ocean.verticalmean import VerticalMean from mock import Mock, patch @@ -23,8 +23,8 @@ class TestVerticalMean(TestCase): self.mixed = VerticalMean(self.data_manager, '20000101', 1, 1, 'var', self.box) def fake_parse(self, value): - if value is None: - raise Exception + if not value: + raise DiagnosticOptionError return value @patch.object(DiagnosticVariableOption, 'parse', fake_parse) @@ -46,10 +46,10 @@ class TestVerticalMean(TestCase): self.assertEqual(jobs[0], VerticalMean(self.data_manager, '20010101', 0, 0, 'var', Box())) self.assertEqual(jobs[1], VerticalMean(self.data_manager, '20010101', 0, 1, 'var', Box())) - with self.assertRaises(Exception): + with self.assertRaises(DiagnosticOptionError): VerticalMean.generate_jobs(self.diags, ['diagnostic']) - with self.assertRaises(Exception): + with self.assertRaises(DiagnosticOptionError): VerticalMean.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0', '0']) def test_str(self): diff --git a/test/unit/test_verticalmeanmeters.py b/test/unit/test_verticalmeanmeters.py index 08625c18..f696d20d 100644 --- a/test/unit/test_verticalmeanmeters.py +++ b/test/unit/test_verticalmeanmeters.py @@ -1,7 +1,7 @@ # coding=utf-8 from unittest import TestCase -from earthdiagnostics.diagnostic import DiagnosticVariableOption +from earthdiagnostics.diagnostic import DiagnosticVariableOption, DiagnosticOptionError from earthdiagnostics.box import Box from earthdiagnostics.ocean.verticalmeanmeters import VerticalMeanMeters from earthdiagnostics.modelingrealm import ModelingRealms @@ -24,6 +24,8 @@ class TestVerticalMeanMeters(TestCase): self.mixed = VerticalMeanMeters(self.data_manager, '20000101', 1, 1, ModelingRealms.ocean, 'var', self.box, 'T') def fake_parse(self, value): + if not value: + raise DiagnosticOptionError return value @patch.object(DiagnosticVariableOption, 'parse', fake_parse) @@ -39,23 +41,23 @@ class TestVerticalMeanMeters(TestCase): box = Box(True) box.min_depth = 0 self.assertEqual(len(jobs), 2) - self.assertEqual(jobs[0], VerticalMeanMeters(self.data_manager, '20010101', 0, 0, 'var', ModelingRealms.ocean, + self.assertEqual(jobs[0], VerticalMeanMeters(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', box, 'T')) - self.assertEqual(jobs[1], VerticalMeanMeters(self.data_manager, '20010101', 0, 1, 'var', ModelingRealms.ocean, + self.assertEqual(jobs[1], VerticalMeanMeters(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', box, 'T')) jobs = VerticalMeanMeters.generate_jobs(self.diags, ['diagnostic', 'var']) self.assertEqual(len(jobs), 2) - self.assertEqual(jobs[0], VerticalMeanMeters(self.data_manager, '20010101', 0, 0, 'var', ModelingRealms.ocean, + self.assertEqual(jobs[0], VerticalMeanMeters(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', Box(True), 'T')) - self.assertEqual(jobs[1], VerticalMeanMeters(self.data_manager, '20010101', 0, 1, 'var', ModelingRealms.ocean, + self.assertEqual(jobs[1], VerticalMeanMeters(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', Box(True), 'T')) - with self.assertRaises(Exception): + with self.assertRaises(DiagnosticOptionError): VerticalMeanMeters.generate_jobs(self.diags, ['diagnostic']) - with self.assertRaises(Exception): - VerticalMeanMeters.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0']) + with self.assertRaises(DiagnosticOptionError): + VerticalMeanMeters.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) def test_str(self): self.assertEquals(str(self.mixed), 'Vertical mean meters Startdate: 20000101 Member: 1 Chunk: 1 ' -- GitLab From f5ca425f79e928f1ce8b42f5dc2aa81eca3ca032 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 29 Aug 2017 16:03:34 +0200 Subject: [PATCH 57/82] Reorganized tests and added some more --- earthdiagnostics/general/attribute.py | 8 +-- test.py | 30 ---------- test/run_test.py | 26 +++++++++ test/unit/__init__.py | 33 +---------- test/unit/general/__init__.py | 0 test/unit/general/test_attribute.py | 55 +++++++++++++++++++ test/unit/general/test_dailymean.py | 55 +++++++++++++++++++ test/unit/{ => general}/test_monthlymean.py | 0 test/unit/{ => general}/test_rewrite.py | 0 test/unit/ocean/__init__.py | 0 test/unit/{ => ocean}/test_areamoc.py | 0 test/unit/{ => ocean}/test_averagesection.py | 0 test/unit/{ => ocean}/test_convectionsites.py | 0 test/unit/{ => ocean}/test_cutsection.py | 0 test/unit/{ => ocean}/test_gyres.py | 0 test/unit/{ => ocean}/test_heatcontent.py | 0 .../unit/{ => ocean}/test_heatcontentlayer.py | 0 test/unit/{ => ocean}/test_interpolate.py | 0 test/unit/{ => ocean}/test_maxmoc.py | 0 .../{ => ocean}/test_mixedlayerheatcontent.py | 0 .../{ => ocean}/test_mixedlayersaltcontent.py | 0 test/unit/{ => ocean}/test_moc.py | 0 test/unit/{ => ocean}/test_psi.py | 0 test/unit/{ => ocean}/test_siasiesiv.py | 0 test/unit/{ => ocean}/test_verticalmean.py | 0 .../{ => ocean}/test_verticalmeanmeters.py | 0 test/unit/statistics/__init__.py | 0 .../test_climatologicalpercentile.py | 0 .../test_monthlypercentile.py | 0 test/unit/test_cdftools.py | 25 ++++++++- 30 files changed, 165 insertions(+), 67 deletions(-) delete mode 100644 test.py create mode 100644 test/run_test.py create mode 100644 test/unit/general/__init__.py create mode 100644 test/unit/general/test_attribute.py create mode 100644 test/unit/general/test_dailymean.py rename test/unit/{ => general}/test_monthlymean.py (100%) rename test/unit/{ => general}/test_rewrite.py (100%) create mode 100644 test/unit/ocean/__init__.py rename test/unit/{ => ocean}/test_areamoc.py (100%) rename test/unit/{ => ocean}/test_averagesection.py (100%) rename test/unit/{ => ocean}/test_convectionsites.py (100%) rename test/unit/{ => ocean}/test_cutsection.py (100%) rename test/unit/{ => ocean}/test_gyres.py (100%) rename test/unit/{ => ocean}/test_heatcontent.py (100%) rename test/unit/{ => ocean}/test_heatcontentlayer.py (100%) rename test/unit/{ => ocean}/test_interpolate.py (100%) rename test/unit/{ => ocean}/test_maxmoc.py (100%) rename test/unit/{ => ocean}/test_mixedlayerheatcontent.py (100%) rename test/unit/{ => ocean}/test_mixedlayersaltcontent.py (100%) rename test/unit/{ => ocean}/test_moc.py (100%) rename test/unit/{ => ocean}/test_psi.py (100%) rename test/unit/{ => ocean}/test_siasiesiv.py (100%) rename test/unit/{ => ocean}/test_verticalmean.py (100%) rename test/unit/{ => ocean}/test_verticalmeanmeters.py (100%) create mode 100644 test/unit/statistics/__init__.py rename test/unit/{ => statistics}/test_climatologicalpercentile.py (100%) rename test/unit/{ => statistics}/test_monthlypercentile.py (100%) diff --git a/earthdiagnostics/general/attribute.py b/earthdiagnostics/general/attribute.py index c2946517..3fcda66c 100644 --- a/earthdiagnostics/general/attribute.py +++ b/earthdiagnostics/general/attribute.py @@ -44,13 +44,13 @@ class Attribute(Diagnostic): self.attributte_value = attributte_value def __str__(self): - return 'Write attributte output Startdate: {0} Member: {1} Chunk: {2} ' \ - 'Variable: {3}:{4} Attributte:{5}:{6}'.format(self.startdate, self.member, self.chunk, self.domain, - self.variable, self.attributte_name, self.attributte_value) + return 'Write attributte output Startdate: {0.startdate} Member: {0.member} Chunk: {0.chunk} ' \ + 'Variable: {0.domain}:{0.variable} Attributte: {0.attributte_name}:{0.attributte_value} ' \ + 'Grid: {0.grid}'.format(self) def __eq__(self, other): return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ - self.domain == other.domain and self.variable == other.variable and \ + self.domain == other.domain and self.variable == other.variable and self.grid == other.grid and \ self.attributte_name == other.attributte_name and self.attributte_value == other.attributte_value @classmethod diff --git a/test.py b/test.py deleted file mode 100644 index a6785ecb..00000000 --- a/test.py +++ /dev/null @@ -1,30 +0,0 @@ -# coding=utf-8 -""" -Script to run the tests for EarthDiagnostics and generate the code coverage report -""" -import coverage -import unittest -import os -cov = coverage.Coverage() -cov.set_option("run:branch", True) -cov.start() - -# noinspection PyPep8 -import test.unit - -suite = unittest.TestLoader().loadTestsFromModule(test.unit) -unittest.TextTestRunner(verbosity=2).run(suite) -cov.stop() -cov.save() - -source_files = list() -for path, dirs, files in os.walk('earthdiagnostics'): - for filename in files: - if filename.endswith('.py'): - source_files.append(os.path.join(path, filename)) - -cov.report(source_files) -cov.html_report(source_files) - - - diff --git a/test/run_test.py b/test/run_test.py new file mode 100644 index 00000000..90245bf9 --- /dev/null +++ b/test/run_test.py @@ -0,0 +1,26 @@ +# coding=utf-8 +""" +Script to run the tests for EarthDiagnostics and generate the code coverage report +""" + +import coverage +import unittest +import os +work_path = os.path.abspath('.') +source_path = os.path.join(work_path, '..', 'earthdiagnostics', '*') +print(source_path) +cov = coverage.Coverage(include=source_path) +cov.set_option("run:branch", True) +cov.set_option("html:title", 'Coverage report for ESMValTool') + +cov.start() +suite = unittest.TestLoader().discover('.') +unittest.TextTestRunner(verbosity=2).run(suite) +cov.stop() + +cov.save() +cov.report() +cov.html_report() + + + diff --git a/test/unit/__init__.py b/test/unit/__init__.py index 162b0b52..8b137891 100644 --- a/test/unit/__init__.py +++ b/test/unit/__init__.py @@ -1,32 +1 @@ -# coding=utf-8 -from test_data_manager import TestConversion -# from test.unit.test_variable import TestVariable -from test_constants import TestBasin -from test_box import TestBox -from test_diagnostic import * -from test_cdftools import TestCDFTools -from test_utils import TestTempFile, TestUtils -from test_psi import TestPsi -from test_areamoc import TestAreaMoc -# from test_averagesection import TestAverageSection -from test_cutsection import TestCutSection -from test_convectionsites import TestConvectionSites -from test_frequency import TestFrequency -from test_gyres import TestGyres -# from test_heatcontent import TestHeatContent -from test_heatcontentlayer import TestHeatContentLayer -# from test_interpolate import TestInterpolate -from test_maxmoc import TestMaxMoc -from test_mixedlayerheatcontent import TestMixedLayerHeatContent -from test_mixedlayersaltcontent import TestMixedLayerSaltContent -from test_moc import TestMoc -from test_modelling_realm import TestModellingRealms, TestModellingRealm -from test_siasiesiv import TestSiasiesiv -from test_verticalmean import TestVerticalMean -from test_verticalmeanmeters import TestVerticalMeanMeters -from test_monthlymean import TestMonthlyMean -from test_rewrite import TestRewrite -from test_variable_type import TestVariableType -from test_monthlypercentile import TestMonthlyPercentile -from test_climatologicalpercentile import TestClimatologicalPercentile -from test_variable import TestCMORTable, TestVariableAlias + diff --git a/test/unit/general/__init__.py b/test/unit/general/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/unit/general/test_attribute.py b/test/unit/general/test_attribute.py new file mode 100644 index 00000000..ee9a0118 --- /dev/null +++ b/test/unit/general/test_attribute.py @@ -0,0 +1,55 @@ +# coding=utf-8 +from unittest import TestCase + +from earthdiagnostics.diagnostic import DiagnosticVariableOption +from earthdiagnostics.box import Box +from earthdiagnostics.frequency import Frequencies +from earthdiagnostics.general.attribute import Attribute +from mock import Mock, patch + +from earthdiagnostics.modelingrealm import ModelingRealms + + +class TestAttribute(TestCase): + + def setUp(self): + self.data_manager = Mock() + + self.diags = Mock() + self.diags.model_version = 'model_version' + self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) + + self.box = Box() + self.box.min_depth = 0 + self.box.max_depth = 100 + + def fake_parse(self, value): + return value + + @patch.object(DiagnosticVariableOption, 'parse', fake_parse) + def test_generate_jobs(self): + + jobs = Attribute.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', 'att', 'value']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], Attribute(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', '', + 'att', 'value')) + self.assertEqual(jobs[1], Attribute(self.data_manager, '20010101', 0, 1, ModelingRealms.atmos, 'var', '', + 'att', 'value')) + + jobs = Attribute.generate_jobs(self.diags, ['diagnostic', 'seaice', 'var', 'att', 'value', 'grid']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], Attribute(self.data_manager, '20010101', 0, 0, ModelingRealms.seaIce, 'var', 'grid', + 'att', 'value')) + self.assertEqual(jobs[1], Attribute(self.data_manager, '20010101', 0, 1, ModelingRealms.seaIce, 'var', 'grid', + 'att', 'value')) + + with self.assertRaises(Exception): + Attribute.generate_jobs(self.diags, ['diagnostic']) + + with self.assertRaises(Exception): + Attribute.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) + + def test_str(self): + mixed = Attribute(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', 'grid', 'att', 'value') + self.assertEquals(str(mixed), 'Write attributte output Startdate: 20010101 Member: 0 Chunk: 0 Variable: atmos:var ' + 'Attributte: att:value Grid: grid') diff --git a/test/unit/general/test_dailymean.py b/test/unit/general/test_dailymean.py new file mode 100644 index 00000000..b58fe146 --- /dev/null +++ b/test/unit/general/test_dailymean.py @@ -0,0 +1,55 @@ +# coding=utf-8 +from unittest import TestCase + +from earthdiagnostics.diagnostic import DiagnosticVariableOption +from earthdiagnostics.box import Box +from earthdiagnostics.frequency import Frequencies +from earthdiagnostics.general.dailymean import DailyMean +from mock import Mock, patch + +from earthdiagnostics.modelingrealm import ModelingRealms + + +class TestDailyMean(TestCase): + + def setUp(self): + self.data_manager = Mock() + + self.diags = Mock() + self.diags.model_version = 'model_version' + self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) + + self.box = Box() + self.box.min_depth = 0 + self.box.max_depth = 100 + + def fake_parse(self, value): + return value + + @patch.object(DiagnosticVariableOption, 'parse', fake_parse) + def test_generate_jobs(self): + + jobs = DailyMean.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', '6hr']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], DailyMean(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', + Frequencies.six_hourly, '')) + self.assertEqual(jobs[1], DailyMean(self.data_manager, '20010101', 0, 1, ModelingRealms.atmos, 'var', + Frequencies.six_hourly, '')) + + jobs = DailyMean.generate_jobs(self.diags, ['diagnostic', 'seaice', 'var', '3h', 'grid']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], DailyMean(self.data_manager, '20010101', 0, 0, ModelingRealms.seaIce, 'var', + Frequencies.three_hourly, 'grid')) + self.assertEqual(jobs[1], DailyMean(self.data_manager, '20010101', 0, 1, ModelingRealms.seaIce, 'var', + Frequencies.three_hourly, 'grid')) + + with self.assertRaises(Exception): + DailyMean.generate_jobs(self.diags, ['diagnostic']) + + with self.assertRaises(Exception): + DailyMean.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) + + def test_str(self): + mixed = DailyMean(self.data_manager, '20000101', 1, 1, ModelingRealms.ocean, 'var', 'freq', '') + self.assertEquals(str(mixed), 'Calculate daily mean Startdate: 20000101 Member: 1 Chunk: 1 ' + 'Variable: ocean:var Original frequency: freq Grid: ') diff --git a/test/unit/test_monthlymean.py b/test/unit/general/test_monthlymean.py similarity index 100% rename from test/unit/test_monthlymean.py rename to test/unit/general/test_monthlymean.py diff --git a/test/unit/test_rewrite.py b/test/unit/general/test_rewrite.py similarity index 100% rename from test/unit/test_rewrite.py rename to test/unit/general/test_rewrite.py diff --git a/test/unit/ocean/__init__.py b/test/unit/ocean/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/unit/test_areamoc.py b/test/unit/ocean/test_areamoc.py similarity index 100% rename from test/unit/test_areamoc.py rename to test/unit/ocean/test_areamoc.py diff --git a/test/unit/test_averagesection.py b/test/unit/ocean/test_averagesection.py similarity index 100% rename from test/unit/test_averagesection.py rename to test/unit/ocean/test_averagesection.py diff --git a/test/unit/test_convectionsites.py b/test/unit/ocean/test_convectionsites.py similarity index 100% rename from test/unit/test_convectionsites.py rename to test/unit/ocean/test_convectionsites.py diff --git a/test/unit/test_cutsection.py b/test/unit/ocean/test_cutsection.py similarity index 100% rename from test/unit/test_cutsection.py rename to test/unit/ocean/test_cutsection.py diff --git a/test/unit/test_gyres.py b/test/unit/ocean/test_gyres.py similarity index 100% rename from test/unit/test_gyres.py rename to test/unit/ocean/test_gyres.py diff --git a/test/unit/test_heatcontent.py b/test/unit/ocean/test_heatcontent.py similarity index 100% rename from test/unit/test_heatcontent.py rename to test/unit/ocean/test_heatcontent.py diff --git a/test/unit/test_heatcontentlayer.py b/test/unit/ocean/test_heatcontentlayer.py similarity index 100% rename from test/unit/test_heatcontentlayer.py rename to test/unit/ocean/test_heatcontentlayer.py diff --git a/test/unit/test_interpolate.py b/test/unit/ocean/test_interpolate.py similarity index 100% rename from test/unit/test_interpolate.py rename to test/unit/ocean/test_interpolate.py diff --git a/test/unit/test_maxmoc.py b/test/unit/ocean/test_maxmoc.py similarity index 100% rename from test/unit/test_maxmoc.py rename to test/unit/ocean/test_maxmoc.py diff --git a/test/unit/test_mixedlayerheatcontent.py b/test/unit/ocean/test_mixedlayerheatcontent.py similarity index 100% rename from test/unit/test_mixedlayerheatcontent.py rename to test/unit/ocean/test_mixedlayerheatcontent.py diff --git a/test/unit/test_mixedlayersaltcontent.py b/test/unit/ocean/test_mixedlayersaltcontent.py similarity index 100% rename from test/unit/test_mixedlayersaltcontent.py rename to test/unit/ocean/test_mixedlayersaltcontent.py diff --git a/test/unit/test_moc.py b/test/unit/ocean/test_moc.py similarity index 100% rename from test/unit/test_moc.py rename to test/unit/ocean/test_moc.py diff --git a/test/unit/test_psi.py b/test/unit/ocean/test_psi.py similarity index 100% rename from test/unit/test_psi.py rename to test/unit/ocean/test_psi.py diff --git a/test/unit/test_siasiesiv.py b/test/unit/ocean/test_siasiesiv.py similarity index 100% rename from test/unit/test_siasiesiv.py rename to test/unit/ocean/test_siasiesiv.py diff --git a/test/unit/test_verticalmean.py b/test/unit/ocean/test_verticalmean.py similarity index 100% rename from test/unit/test_verticalmean.py rename to test/unit/ocean/test_verticalmean.py diff --git a/test/unit/test_verticalmeanmeters.py b/test/unit/ocean/test_verticalmeanmeters.py similarity index 100% rename from test/unit/test_verticalmeanmeters.py rename to test/unit/ocean/test_verticalmeanmeters.py diff --git a/test/unit/statistics/__init__.py b/test/unit/statistics/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/unit/test_climatologicalpercentile.py b/test/unit/statistics/test_climatologicalpercentile.py similarity index 100% rename from test/unit/test_climatologicalpercentile.py rename to test/unit/statistics/test_climatologicalpercentile.py diff --git a/test/unit/test_monthlypercentile.py b/test/unit/statistics/test_monthlypercentile.py similarity index 100% rename from test/unit/test_monthlypercentile.py rename to test/unit/statistics/test_monthlypercentile.py diff --git a/test/unit/test_cdftools.py b/test/unit/test_cdftools.py index 2f1d7277..eb45269c 100644 --- a/test/unit/test_cdftools.py +++ b/test/unit/test_cdftools.py @@ -12,13 +12,36 @@ def mock_exists(path, access=None): class TestCDFTools(TestCase): - def setUp(self): + + @mock.patch('os.path.isfile', side_effect=mock_exists) + @mock.patch('os.access', side_effect=mock_exists) + @mock.patch('earthdiagnostics.utils.Utils.execute_shell_command') + def test_run(self, mock_path, mock_exists, execute_mock): self.cdftools = CDFTools('/test/path') + execute_mock.return_value = ['Command output'] + with self.assertRaises(ValueError): + self.cdftools.run('badcommand', input='input_file', output='output_file') + with self.assertRaises(ValueError): + self.cdftools.run('command', input='badinput_file', output='output_file') + with self.assertRaises(ValueError): + self.cdftools.run('command', input=['input_file', 'badinput_file'], output='output_file') + with self.assertRaises(ValueError): + self.cdftools.run('command', input='input_file', output='input_file') + with self.assertRaises(Exception): + self.cdftools.run('command', input='input_file', output='badoutput_file') + + self.cdftools.run('command', input='input_file', output='output_file') + self.cdftools.run('command', input='input_file') + self.cdftools.run('command', input=None) + self.cdftools.run('command', input=['input_file', 'input_file2']) + self.cdftools.run('command', input='input_file', options='-o -p') + self.cdftools.run('command', input='input_file', options=('-o', '-p')) @mock.patch('os.path.isfile', side_effect=mock_exists) @mock.patch('os.access', side_effect=mock_exists) @mock.patch('earthdiagnostics.utils.Utils.execute_shell_command') def test_run(self, mock_path, mock_exists, execute_mock): + self.cdftools = CDFTools('') execute_mock.return_value = ['Command output'] with self.assertRaises(ValueError): self.cdftools.run('badcommand', input='input_file', output='output_file') -- GitLab From db2c5699b36b391953a066bc7bdc153a1db36e81 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 29 Aug 2017 17:56:07 +0200 Subject: [PATCH 58/82] Added missing tests for general diagnostics --- earthdiagnostics/general/relink.py | 5 +- .../general/verticalmeanmetersiris.py | 8 +- earthdiagnostics/general/yearlymean.py | 2 +- test/unit/general/test_module.py | 54 ++++++++++++++ test/unit/general/test_relink.py | 61 +++++++++++++++ test/unit/general/test_relinkall.py | 39 ++++++++++ test/unit/general/test_scale.py | 71 ++++++++++++++++++ test/unit/general/test_select_levels.py | 66 +++++++++++++++++ test/unit/general/test_simplify_dimensions.py | 55 ++++++++++++++ .../general/test_verticalmeanmetersiris.py | 74 +++++++++++++++++++ test/unit/general/test_yearlymean.py | 63 ++++++++++++++++ 11 files changed, 489 insertions(+), 9 deletions(-) create mode 100644 test/unit/general/test_module.py create mode 100644 test/unit/general/test_relink.py create mode 100644 test/unit/general/test_relinkall.py create mode 100644 test/unit/general/test_scale.py create mode 100644 test/unit/general/test_select_levels.py create mode 100644 test/unit/general/test_simplify_dimensions.py create mode 100644 test/unit/general/test_verticalmeanmetersiris.py create mode 100644 test/unit/general/test_yearlymean.py diff --git a/earthdiagnostics/general/relink.py b/earthdiagnostics/general/relink.py index b5dcad61..689aba87 100644 --- a/earthdiagnostics/general/relink.py +++ b/earthdiagnostics/general/relink.py @@ -43,9 +43,8 @@ class Relink(Diagnostic): self.grid = grid def __str__(self): - return 'Relink output Startdate: {0} Member: {1} Chunk: {2} Move old: {5} ' \ - 'Variable: {3}:{4} Grid: {6}'.format(self.startdate, self.member, self.chunk, self.domain, self.variable, - self.move_old, self.grid) + return 'Relink output Startdate: {0.startdate} Member: {0.member} Chunk: {0.chunk} Move old: {0.move_old} ' \ + 'Variable: {0.domain}:{0.variable} Grid: {0.grid}'.format(self) def __eq__(self, other): return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ diff --git a/earthdiagnostics/general/verticalmeanmetersiris.py b/earthdiagnostics/general/verticalmeanmetersiris.py index 0d0633cc..b86e12a4 100644 --- a/earthdiagnostics/general/verticalmeanmetersiris.py +++ b/earthdiagnostics/general/verticalmeanmetersiris.py @@ -3,6 +3,7 @@ import iris import iris.analysis import iris.exceptions +from diagnostic import DiagnosticOption from earthdiagnostics.box import Box from earthdiagnostics.diagnostic import Diagnostic, DiagnosticFloatOption, DiagnosticDomainOption, \ DiagnosticVariableOption @@ -38,7 +39,7 @@ class VerticalMeanMetersIris(Diagnostic): alias = 'vmean' "Diagnostic alias for the configuration file" - def __init__(self, data_manager, startdate, member, chunk, domain, variable, box, grid_point): + def __init__(self, data_manager, startdate, member, chunk, domain, variable, box): Diagnostic.__init__(self, data_manager) self.startdate = startdate self.member = member @@ -46,9 +47,6 @@ class VerticalMeanMetersIris(Diagnostic): self.domain = domain self.variable = variable self.box = box - self.required_vars = [variable] - self.generated_vars = [variable + 'vmean'] - self.grid_point = grid_point def __eq__(self, other): return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ @@ -84,7 +82,7 @@ class VerticalMeanMetersIris(Diagnostic): job_list = list() for startdate, member, chunk in diags.config.experiment.get_chunk_list(): job_list.append(VerticalMeanMetersIris(diags.data_manager, startdate, member, chunk, - options['domain'], options['variable'], box, options['grid_point'])) + options['domain'], options['variable'], box)) return job_list def request_data(self): diff --git a/earthdiagnostics/general/yearlymean.py b/earthdiagnostics/general/yearlymean.py index 97860a2e..517e844d 100644 --- a/earthdiagnostics/general/yearlymean.py +++ b/earthdiagnostics/general/yearlymean.py @@ -72,7 +72,7 @@ class YearlyMean(Diagnostic): options_available = (DiagnosticDomainOption(), DiagnosticVariableOption(), - DiagnosticFrequencyOption(), + DiagnosticFrequencyOption(default_value=diags.config.frequency), DiagnosticOption('grid', '')) options = cls.process_options(options, options_available) job_list = list() diff --git a/test/unit/general/test_module.py b/test/unit/general/test_module.py new file mode 100644 index 00000000..e01ecf8c --- /dev/null +++ b/test/unit/general/test_module.py @@ -0,0 +1,54 @@ +# coding=utf-8 +from unittest import TestCase + +from earthdiagnostics.diagnostic import DiagnosticVariableOption +from earthdiagnostics.box import Box +from earthdiagnostics.general.module import Module +from mock import Mock, patch + +from earthdiagnostics.modelingrealm import ModelingRealms + + +class TestModule(TestCase): + + def setUp(self): + self.data_manager = Mock() + + self.diags = Mock() + self.diags.model_version = 'model_version' + self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) + + self.box = Box() + self.box.min_depth = 0 + self.box.max_depth = 100 + + def fake_parse(self, value): + return value + + @patch.object(DiagnosticVariableOption, 'parse', fake_parse) + def test_generate_jobs(self): + + jobs = Module.generate_jobs(self.diags, ['diagnostic', 'atmos', 'varu', 'varv', 'varmodule']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], Module(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, + 'varu', 'varv', 'varmodule', '')) + self.assertEqual(jobs[1], Module(self.data_manager, '20010101', 0, 1, ModelingRealms.atmos, + 'varu', 'varv', 'varmodule', '')) + + jobs = Module.generate_jobs(self.diags, ['diagnostic', 'seaIce', 'varu', 'varv', 'varmodule', 'grid']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], Module(self.data_manager, '20010101', 0, 0, ModelingRealms.seaIce, + 'varu', 'varv', 'varmodule', 'grid')) + self.assertEqual(jobs[1], Module(self.data_manager, '20010101', 0, 1, ModelingRealms.seaIce, + 'varu', 'varv', 'varmodule', 'grid')) + + with self.assertRaises(Exception): + Module.generate_jobs(self.diags, ['diagnostic']) + + with self.assertRaises(Exception): + Module.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) + + def test_str(self): + mixed = Module(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'varu', 'varv', 'varmodule', 'grid') + self.assertEquals(str(mixed), 'Calculate module Startdate: 20010101 Member: 0 Chunk: 0 ' + 'Variables: atmos:varu,varv,varmodule Grid: grid') diff --git a/test/unit/general/test_relink.py b/test/unit/general/test_relink.py new file mode 100644 index 00000000..835b0bcd --- /dev/null +++ b/test/unit/general/test_relink.py @@ -0,0 +1,61 @@ +# coding=utf-8 +from unittest import TestCase + +from earthdiagnostics.diagnostic import DiagnosticVariableOption +from earthdiagnostics.box import Box +from earthdiagnostics.general.relink import Relink +from mock import Mock, patch + +from earthdiagnostics.modelingrealm import ModelingRealms + + +class TestRelink(TestCase): + + def setUp(self): + self.data_manager = Mock() + + self.diags = Mock() + self.diags.model_version = 'model_version' + self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) + + self.box = Box() + self.box.min_depth = 0 + self.box.max_depth = 100 + + def fake_parse(self, value): + return value + + @patch.object(DiagnosticVariableOption, 'parse', fake_parse) + def test_generate_jobs(self): + + jobs = Relink.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], Relink(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, + 'var', True, '')) + self.assertEqual(jobs[1], Relink(self.data_manager, '20010101', 0, 1, ModelingRealms.atmos, + 'var', True, '')) + + jobs = Relink.generate_jobs(self.diags, ['diagnostic', 'seaIce', 'var', 'False']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], Relink(self.data_manager, '20010101', 0, 0, ModelingRealms.seaIce, + 'var', False, '')) + self.assertEqual(jobs[1], Relink(self.data_manager, '20010101', 0, 1, ModelingRealms.seaIce, + 'var', False, '')) + + jobs = Relink.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', 'True', 'grid']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], Relink(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, + 'var', True, 'grid')) + self.assertEqual(jobs[1], Relink(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, + 'var', True, 'grid')) + + with self.assertRaises(Exception): + Relink.generate_jobs(self.diags, ['diagnostic']) + + with self.assertRaises(Exception): + Relink.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) + + def test_str(self): + mixed = Relink(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', True, 'grid') + self.assertEquals(str(mixed), 'Relink output Startdate: 20010101 Member: 0 Chunk: 0 Move old: True ' + 'Variable: ocean:var Grid: grid') diff --git a/test/unit/general/test_relinkall.py b/test/unit/general/test_relinkall.py new file mode 100644 index 00000000..cf8c9a16 --- /dev/null +++ b/test/unit/general/test_relinkall.py @@ -0,0 +1,39 @@ +# coding=utf-8 +from unittest import TestCase + +from earthdiagnostics.diagnostic import DiagnosticVariableOption +from earthdiagnostics.box import Box +from earthdiagnostics.general.relinkall import RelinkAll +from mock import Mock, patch + +from earthdiagnostics.modelingrealm import ModelingRealms + + +class TestRelinkAll(TestCase): + + def setUp(self): + self.data_manager = Mock() + + self.diags = Mock() + self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) + self.diags.config.experiment.startdates = ['20010101', ] + + self.box = Box() + self.box.min_depth = 0 + self.box.max_depth = 100 + + def fake_parse(self, value): + return value + + @patch.object(DiagnosticVariableOption, 'parse', fake_parse) + def test_generate_jobs(self): + jobs = RelinkAll.generate_jobs(self.diags, ['diagnostic']) + self.assertEqual(len(jobs), 1) + self.assertEqual(jobs[0], RelinkAll(self.data_manager, '20010101')) + + with self.assertRaises(Exception): + RelinkAll.generate_jobs(self.diags, ['diagnostic', '0']) + + def test_str(self): + mixed = RelinkAll(self.data_manager, '20010101') + self.assertEquals(str(mixed), 'Relink all output Startdate: 20010101') diff --git a/test/unit/general/test_scale.py b/test/unit/general/test_scale.py new file mode 100644 index 00000000..e7697cc2 --- /dev/null +++ b/test/unit/general/test_scale.py @@ -0,0 +1,71 @@ +# coding=utf-8 +from unittest import TestCase + +from earthdiagnostics.diagnostic import DiagnosticVariableOption, DiagnosticOptionError +from earthdiagnostics.box import Box +from earthdiagnostics.general.scale import Scale +from earthdiagnostics.frequency import Frequencies +from mock import Mock, patch + +from earthdiagnostics.modelingrealm import ModelingRealms + + +class TestScale(TestCase): + + def setUp(self): + self.data_manager = Mock() + + self.diags = Mock() + self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) + self.diags.config.experiment.startdates = ['20010101', ] + self.diags.config.frequency = Frequencies.monthly + + self.box = Box() + self.box.min_depth = 0 + self.box.max_depth = 100 + + def fake_parse(self, value): + return value + + @patch.object(DiagnosticVariableOption, 'parse', fake_parse) + def test_generate_jobs(self): + jobs = Scale.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', '0', '0']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], Scale(self.data_manager, '20010101', 0, 0, 0, 0, ModelingRealms.atmos, 'var', '', + float('nan'), float('nan'), Frequencies.monthly)) + self.assertEqual(jobs[1], Scale(self.data_manager, '20010101', 0, 1, 0, 0, ModelingRealms.atmos, 'var', '', + float('nan'), float('nan'), Frequencies.monthly)) + + jobs = Scale.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', '0', '0', 'grid']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], Scale(self.data_manager, '20010101', 0, 0, 0, 0, ModelingRealms.atmos, 'var', 'grid', + float('nan'), float('nan'), Frequencies.monthly)) + self.assertEqual(jobs[1], Scale(self.data_manager, '20010101', 0, 1, 0, 0, ModelingRealms.atmos, 'var', 'grid', + float('nan'), float('nan'), Frequencies.monthly)) + + jobs = Scale.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', '0', '0', 'grid', '0', '100']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], Scale(self.data_manager, '20010101', 0, 0, 0, 0, ModelingRealms.atmos, 'var', 'grid', + 0, 100, Frequencies.monthly)) + self.assertEqual(jobs[1], Scale(self.data_manager, '20010101', 0, 1, 0, 0, ModelingRealms.atmos, 'var', 'grid', + 0, 100, Frequencies.monthly)) + + jobs = Scale.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', '0', '0', 'grid', '0', '100', '3hr']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], Scale(self.data_manager, '20010101', 0, 0, 0, 0, ModelingRealms.atmos, 'var', 'grid', + 0, 100, Frequencies.three_hourly)) + self.assertEqual(jobs[1], Scale(self.data_manager, '20010101', 0, 1, 0, 0, ModelingRealms.atmos, 'var', 'grid', + 0, 100, Frequencies.three_hourly)) + + with self.assertRaises(DiagnosticOptionError): + Scale.generate_jobs(self.diags, ['diagnostic']) + + with self.assertRaises(DiagnosticOptionError): + Scale.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', '0', '0', 'grid', '0', '100', '3hr', + 'extra']) + + def test_str(self): + mixed = Scale(self.data_manager, '20010101', 0, 0, 0, 0, ModelingRealms.atmos, 'var', 'grid', 0, 100, + Frequencies.three_hourly) + self.assertEquals(str(mixed), 'Scale output Startdate: 20010101 Member: 0 Chunk: 0 Scale value: 0 Offset: 0 ' + 'Variable: atmos:var Frequency: 3hr') diff --git a/test/unit/general/test_select_levels.py b/test/unit/general/test_select_levels.py new file mode 100644 index 00000000..32e7424d --- /dev/null +++ b/test/unit/general/test_select_levels.py @@ -0,0 +1,66 @@ +# coding=utf-8 +from unittest import TestCase + +from earthdiagnostics.diagnostic import DiagnosticVariableListOption, DiagnosticOptionError +from earthdiagnostics.box import Box +from earthdiagnostics.general.select_levels import SelectLevels +from earthdiagnostics.frequency import Frequencies +from mock import Mock, patch + +from earthdiagnostics.modelingrealm import ModelingRealms + + +class TestSelectLevels(TestCase): + + def setUp(self): + self.data_manager = Mock() + + self.diags = Mock() + self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) + self.diags.config.experiment.startdates = ['20010101', ] + self.diags.config.frequency = Frequencies.monthly + + self.box = Box() + self.box.min_depth = 0 + self.box.max_depth = 100 + + def fake_parse(self, value): + return value.split('-') + + @patch.object(DiagnosticVariableListOption, 'parse', fake_parse) + def test_generate_jobs(self): + jobs = SelectLevels.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', '0', '20']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], SelectLevels(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', + '', 0, 20)) + self.assertEqual(jobs[1], SelectLevels(self.data_manager, '20010101', 0, 1, ModelingRealms.atmos, 'var', + '', 0, 20)) + + jobs = SelectLevels.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var1-var2', '0', '20']) + self.assertEqual(len(jobs), 4) + self.assertEqual(jobs[0], SelectLevels(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var1', + '', 0, 20)) + self.assertEqual(jobs[1], SelectLevels(self.data_manager, '20010101', 0, 1, ModelingRealms.atmos, 'var1', + '', 0, 20)) + self.assertEqual(jobs[2], SelectLevels(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var2', + '', 0, 20)) + self.assertEqual(jobs[3], SelectLevels(self.data_manager, '20010101', 0, 1, ModelingRealms.atmos, 'var2', + '', 0, 20)) + + jobs = SelectLevels.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', '0', '20', 'grid']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], SelectLevels(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', + 'grid', 0, 20)) + self.assertEqual(jobs[1], SelectLevels(self.data_manager, '20010101', 0, 1, ModelingRealms.atmos, 'var', + 'grid', 0, 20)) + + with self.assertRaises(DiagnosticOptionError): + SelectLevels.generate_jobs(self.diags, ['diagnostic']) + + with self.assertRaises(DiagnosticOptionError): + SelectLevels.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', '0', '20', 'grid', 'extra']) + + def test_str(self): + mixed = SelectLevels(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', 'grid', 0, 20) + self.assertEquals(str(mixed), 'Select levels Startdate: 20010101 Member: 0 Chunk: 0 Variable: atmos:var ' + 'Levels: 0-20 Grid: grid') diff --git a/test/unit/general/test_simplify_dimensions.py b/test/unit/general/test_simplify_dimensions.py new file mode 100644 index 00000000..429ad6f2 --- /dev/null +++ b/test/unit/general/test_simplify_dimensions.py @@ -0,0 +1,55 @@ +# coding=utf-8 +from unittest import TestCase + +from earthdiagnostics.diagnostic import DiagnosticVariableListOption, DiagnosticOptionError +from earthdiagnostics.box import Box +from earthdiagnostics.general.simplify_dimensions import SimplifyDimensions +from earthdiagnostics.frequency import Frequencies +from mock import Mock, patch + +from earthdiagnostics.modelingrealm import ModelingRealms + + +class TestSimplifyDimensions(TestCase): + + def setUp(self): + self.data_manager = Mock() + + self.diags = Mock() + self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) + self.diags.config.experiment.startdates = ['20010101', ] + self.diags.config.frequency = Frequencies.monthly + + self.box = Box() + self.box.min_depth = 0 + self.box.max_depth = 100 + + def fake_parse(self, value): + return value.split('-') + + @patch.object(DiagnosticVariableListOption, 'parse', fake_parse) + def test_generate_jobs(self): + jobs = SimplifyDimensions.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], SimplifyDimensions(self.data_manager, '20010101', 0, 0, + ModelingRealms.atmos, 'var', '')) + self.assertEqual(jobs[1], SimplifyDimensions(self.data_manager, '20010101', 0, 1, + ModelingRealms.atmos, 'var', '')) + + jobs = SimplifyDimensions.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', 'grid']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], SimplifyDimensions(self.data_manager, '20010101', 0, 0, + ModelingRealms.atmos, 'var', 'grid')) + self.assertEqual(jobs[1], SimplifyDimensions(self.data_manager, '20010101', 0, 1, + ModelingRealms.atmos, 'var', 'grid')) + + with self.assertRaises(DiagnosticOptionError): + SimplifyDimensions.generate_jobs(self.diags, ['diagnostic']) + + with self.assertRaises(DiagnosticOptionError): + SimplifyDimensions.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', 'grid', 'extra']) + + def test_str(self): + mixed = SimplifyDimensions(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', 'grid') + self.assertEquals(str(mixed), 'Simplify dimension Startdate: 20010101 Member: 0 Chunk: 0 Variable: atmos:var ' + 'Grid: grid') diff --git a/test/unit/general/test_verticalmeanmetersiris.py b/test/unit/general/test_verticalmeanmetersiris.py new file mode 100644 index 00000000..cd2876fe --- /dev/null +++ b/test/unit/general/test_verticalmeanmetersiris.py @@ -0,0 +1,74 @@ +# coding=utf-8 +from unittest import TestCase + +from earthdiagnostics.diagnostic import DiagnosticVariableOption, DiagnosticOptionError +from earthdiagnostics.box import Box +from earthdiagnostics.general.verticalmeanmetersiris import VerticalMeanMetersIris +from earthdiagnostics.frequency import Frequencies +from mock import Mock, patch + +from earthdiagnostics.modelingrealm import ModelingRealms + + +class TestVerticalMeanMetersIris(TestCase): + + def setUp(self): + self.data_manager = Mock() + + self.diags = Mock() + self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) + self.diags.config.experiment.startdates = ['20010101', ] + self.diags.config.frequency = Frequencies.monthly + + self.box = Box() + self.box.min_depth = 0 + self.box.max_depth = 100 + + def fake_parse(self, value): + if not value: + raise DiagnosticOptionError + return value + + @patch.object(DiagnosticVariableOption, 'parse', fake_parse) + def test_generate_jobs(self): + + box = Box(True) + + jobs = VerticalMeanMetersIris.generate_jobs(self.diags, ['diagnostic', 'var']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], VerticalMeanMetersIris(self.data_manager, '20010101', 0, 0, + ModelingRealms.ocean, 'var', box)) + self.assertEqual(jobs[1], VerticalMeanMetersIris(self.data_manager, '20010101', 0, 1, + ModelingRealms.ocean, 'var', box)) + + box = Box(True) + box.min_depth = 0 + box.max_depth = 100 + + jobs = VerticalMeanMetersIris.generate_jobs(self.diags, ['diagnostic', 'var', '0', '100']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], VerticalMeanMetersIris(self.data_manager, '20010101', 0, 0, + ModelingRealms.ocean, 'var', box)) + self.assertEqual(jobs[1], VerticalMeanMetersIris(self.data_manager, '20010101', 0, 1, + ModelingRealms.ocean, 'var', box)) + + jobs = VerticalMeanMetersIris.generate_jobs(self.diags, ['diagnostic', 'var', '0', '100', 'seaIce']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], VerticalMeanMetersIris(self.data_manager, '20010101', 0, 0, + ModelingRealms.seaIce, 'var', box)) + self.assertEqual(jobs[1], VerticalMeanMetersIris(self.data_manager, '20010101', 0, 1, + ModelingRealms.seaIce, 'var', box)) + + with self.assertRaises(DiagnosticOptionError): + VerticalMeanMetersIris.generate_jobs(self.diags, ['diagnostic']) + + with self.assertRaises(DiagnosticOptionError): + VerticalMeanMetersIris.generate_jobs(self.diags, ['diagnostic', 'var', '0', '100', 'seaIce', 'extra']) + + def test_str(self): + box = Box(True) + box.min_depth = 0 + box.max_depth = 100 + mixed = VerticalMeanMetersIris(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', box) + self.assertEquals(str(mixed), 'Vertical mean meters Startdate: 20010101 Member: 0 Chunk: 0 Variable: atmos:var ' + 'Box: 0-100m') diff --git a/test/unit/general/test_yearlymean.py b/test/unit/general/test_yearlymean.py new file mode 100644 index 00000000..dcf5ad75 --- /dev/null +++ b/test/unit/general/test_yearlymean.py @@ -0,0 +1,63 @@ +# coding=utf-8 +from unittest import TestCase + +from earthdiagnostics.diagnostic import DiagnosticVariableOption +from earthdiagnostics.box import Box +from earthdiagnostics.frequency import Frequencies +from earthdiagnostics.general.yearlymean import YearlyMean +from mock import Mock, patch + +from earthdiagnostics.modelingrealm import ModelingRealms + + +class TestYearlyMean(TestCase): + + def setUp(self): + self.data_manager = Mock() + + self.diags = Mock() + self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) + self.diags.config.frequency = Frequencies.monthly + + self.box = Box() + self.box.min_depth = 0 + self.box.max_depth = 100 + + self.mixed = YearlyMean(self.data_manager, '20000101', 1, 1, ModelingRealms.ocean, 'var', 'freq', '') + + def fake_parse(self, value): + return value + + @patch.object(DiagnosticVariableOption, 'parse', fake_parse) + def test_generate_jobs(self): + + jobs = YearlyMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], YearlyMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', + Frequencies.monthly, '')) + self.assertEqual(jobs[1], YearlyMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', + Frequencies.monthly, '')) + + jobs = YearlyMean.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', 'day']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], YearlyMean(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', + Frequencies.daily, '')) + self.assertEqual(jobs[1], YearlyMean(self.data_manager, '20010101', 0, 1, ModelingRealms.atmos, 'var', + Frequencies.daily, '')) + + jobs = YearlyMean.generate_jobs(self.diags, ['diagnostic', 'seaice', 'var', 'mon', 'grid']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], YearlyMean(self.data_manager, '20010101', 0, 0, ModelingRealms.seaIce, 'var', + Frequencies.monthly, 'grid')) + self.assertEqual(jobs[1], YearlyMean(self.data_manager, '20010101', 0, 1, ModelingRealms.seaIce, 'var', + Frequencies.monthly, 'grid')) + + with self.assertRaises(Exception): + YearlyMean.generate_jobs(self.diags, ['diagnostic']) + + with self.assertRaises(Exception): + YearlyMean.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) + + def test_str(self): + self.assertEquals(str(self.mixed), 'Calculate yearly mean Startdate: 20000101 Member: 1 Chunk: 1 ' + 'Variable: ocean:var Original frequency: freq Grid: ') -- GitLab From d18d90dd7cc4f5ad87c2084beb69bc85a09387ee Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 30 Aug 2017 13:09:55 +0200 Subject: [PATCH 59/82] Added missing tests for ocean and statistics diagnostics --- earthdiagnostics/ocean/averagesection.py | 4 +- earthdiagnostics/ocean/heatcontent.py | 7 +- earthdiagnostics/ocean/interpolate.py | 11 +- earthdiagnostics/ocean/interpolatecdo.py | 33 +++-- earthdiagnostics/ocean/mask_land.py | 24 ++-- earthdiagnostics/ocean/regionmean.py | 5 +- .../statistics/daysoverpercentile.py | 9 +- test/run_test.py | 2 +- test/unit/ocean/test_averagesection.py | 102 +++++++------- test/unit/ocean/test_heatcontent.py | 85 ++++++------ test/unit/ocean/test_heatcontentlayer.py | 4 +- test/unit/ocean/test_interpolate.py | 130 ++++++++++-------- test/unit/ocean/test_interpolatecdo.py | 89 ++++++++++++ test/unit/ocean/test_maskland.py | 65 +++++++++ test/unit/ocean/test_mxl.py | 29 ++++ test/unit/ocean/test_region_mean.py | 101 ++++++++++++++ test/unit/ocean/test_vertical_gradient.py | 66 +++++++++ .../statistics/test_daysoverpercentile.py | 32 +++++ 18 files changed, 613 insertions(+), 185 deletions(-) create mode 100644 test/unit/ocean/test_interpolatecdo.py create mode 100644 test/unit/ocean/test_maskland.py create mode 100644 test/unit/ocean/test_mxl.py create mode 100644 test/unit/ocean/test_region_mean.py create mode 100644 test/unit/ocean/test_vertical_gradient.py create mode 100644 test/unit/statistics/test_daysoverpercentile.py diff --git a/earthdiagnostics/ocean/averagesection.py b/earthdiagnostics/ocean/averagesection.py index 114d6d1b..18fafbb8 100644 --- a/earthdiagnostics/ocean/averagesection.py +++ b/earthdiagnostics/ocean/averagesection.py @@ -51,8 +51,8 @@ class AverageSection(Diagnostic): self.domain == other.domain and self.variable == other.variable and self.box == other.box def __str__(self): - return 'Average section Startdate: {0} Member: {1} Chunk: {2} Box: {3} ' \ - 'Variable: {4}:{5}'.format(self.startdate, self.member, self.chunk, self.box, self.domain, self.variable) + return 'Average section Startdate: {0.startdate} Member: {0.member} Chunk: {0.chunk} Box: {0.box} ' \ + 'Variable: {0.domain}:{0.variable} Grid: {0.grid}'.format(self) @classmethod def generate_jobs(cls, diags, options): diff --git a/earthdiagnostics/ocean/heatcontent.py b/earthdiagnostics/ocean/heatcontent.py index 8e322e43..31ac9268 100644 --- a/earthdiagnostics/ocean/heatcontent.py +++ b/earthdiagnostics/ocean/heatcontent.py @@ -48,17 +48,14 @@ class HeatContent(Diagnostic): self.box = box self.min_level = min_level self.max_level = max_level - self.required_vars = ['so', 'mlotst'] - self.generated_vars = ['scvertsum'] def __eq__(self, other): return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ self.box == other.box and self.basin == other.basin and self.mxloption == other.mxloption def __str__(self): - return 'Heat content Startdate: {0} Member: {1} Chunk: {2} Mixed layer: {3} Box: {4} ' \ - 'Basin: {5}'.format(self.startdate, self.member, self.chunk, self.mxloption, self.box, - self.basin.fullname) + return 'Heat content Startdate: {0.startdate} Member: {0.member} Chunk: {0.chunk} Mixed layer: {0.mxloption} ' \ + 'Box: {0.box} Basin: {0.basin}'.format(self) @classmethod def generate_jobs(cls, diags, options): diff --git a/earthdiagnostics/ocean/interpolate.py b/earthdiagnostics/ocean/interpolate.py index 2fe03587..26c577b0 100644 --- a/earthdiagnostics/ocean/interpolate.py +++ b/earthdiagnostics/ocean/interpolate.py @@ -91,11 +91,12 @@ class Interpolate(Diagnostic): options = cls.process_options(options, options_available) job_list = list() - for startdate, member, chunk in diags.config.experiment.get_chunk_list(): - job_list.append( - Interpolate(diags.data_manager, startdate, member, chunk, - options['domain'], options['variable'], options['target_grid'], - diags.config.experiment.model_version, options['invert_lat'], options['original_grid'])) + for var in options['variable']: + for startdate, member, chunk in diags.config.experiment.get_chunk_list(): + job_list.append( + Interpolate(diags.data_manager, startdate, member, chunk, + options['domain'], var , options['target_grid'], + diags.config.experiment.model_version, options['invert_lat'], options['original_grid'])) return job_list def request_data(self): diff --git a/earthdiagnostics/ocean/interpolatecdo.py b/earthdiagnostics/ocean/interpolatecdo.py index 443de5d4..c867dc31 100644 --- a/earthdiagnostics/ocean/interpolatecdo.py +++ b/earthdiagnostics/ocean/interpolatecdo.py @@ -62,16 +62,16 @@ class InterpolateCDO(Diagnostic): def __eq__(self, other): return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \ self.model_version == other.model_version and self.domain == other.domain and \ - self.variable == other.variable and self.grid == other.grid and self.original_grid == other.original_grid + self.variable == other.variable and self.mask_oceans == other.mask_oceans and self.grid == other.grid and \ + self.original_grid == other.original_grid def __str__(self): - return 'Interpolate with CDO Startdate: {0} Member: {1} Chunk: {2} ' \ - 'Variable: {3}:{4} Target grid: {5} ' \ - 'Model: {6}' .format(self.startdate, self.member, self.chunk, self.domain, self.variable, self.grid, - self.model_version) + return 'Interpolate with CDO Startdate: {0.startdate} Member: {0.member} Chunk: {0.chunk} ' \ + 'Variable: {0.domain}:{0.variable} Target grid: {0.grid} Original grid: {0.original_grid} ' \ + 'Mask ocean: {0.mask_oceans} Model: {0.model_version}'.format(self) @classmethod - def generate_jobs(cls, diags, options): + def generate_jobs(cls, diags, options ): """ Creates a job for each chunk to compute the diagnostic @@ -82,10 +82,9 @@ class InterpolateCDO(Diagnostic): :return: """ options_available = (DiagnosticDomainOption(default_value=ModelingRealms.ocean), - DiagnosticVariableOption(), + DiagnosticVariableListOption('variables'), DiagnosticOption('target_grid', diags.config.experiment.atmos_grid.lower()), DiagnosticChoiceOption('method', InterpolateCDO.METHODS, InterpolateCDO.BILINEAR), - DiagnosticChoiceOption('method', InterpolateCDO.METHODS, InterpolateCDO.BILINEAR), DiagnosticBoolOption('mask_oceans', True), DiagnosticOption('original_grid', ''), DiagnosticBoolOption('weights_from_mask', True) @@ -97,6 +96,17 @@ class InterpolateCDO(Diagnostic): job_list = list() weights = TempFile.get() method = options['method'].lower() + cls._compute_weights(diags, method, options, target_grid, weights) + for var in options['variables']: + for startdate, member, chunk in diags.config.experiment.get_chunk_list(): + job_list.append(InterpolateCDO(diags.data_manager, startdate, member, chunk, + options['domain'], var, target_grid, + diags.config.experiment.model_version, options['mask_oceans'], + options['original_grid'], weights)) + return job_list + + @classmethod + def _compute_weights(cls, diags, method, options, target_grid, weights): if options['weights_from_mask']: temp = cls.get_sample_grid_file() else: @@ -113,13 +123,6 @@ class InterpolateCDO(Diagnostic): Utils.cdo.gencon2(target_grid, input=temp, output=weights) os.remove(temp) - for startdate, member, chunk in diags.config.experiment.get_chunk_list(): - job_list.append(InterpolateCDO(diags.data_manager, startdate, member, chunk, - options['domain'], options['variable'], target_grid, - diags.config.experiment.model_version, options['mask_oceans'], - options['original_grid'], weights)) - return job_list - @classmethod def get_sample_grid_file(cls): temp = TempFile.get() diff --git a/earthdiagnostics/ocean/mask_land.py b/earthdiagnostics/ocean/mask_land.py index 76f0a156..71c54007 100644 --- a/earthdiagnostics/ocean/mask_land.py +++ b/earthdiagnostics/ocean/mask_land.py @@ -1,5 +1,5 @@ # coding=utf-8 -from earthdiagnostics.diagnostic import Diagnostic, DiagnosticVariableOption, \ +from earthdiagnostics.diagnostic import Diagnostic, DiagnosticVariableListOption, \ DiagnosticDomainOption, DiagnosticChoiceOption, DiagnosticOption from earthdiagnostics.utils import Utils, TempFile import numpy as np @@ -56,25 +56,33 @@ class MaskLand(Diagnostic): :return: """ options_available = (DiagnosticDomainOption('domain'), - DiagnosticVariableOption('variable'), + DiagnosticVariableListOption('variables'), DiagnosticChoiceOption('cell', ('t', 'u', 'v', 'f', 'w'), 't'), DiagnosticOption('grid', '')) options = cls.process_options(options, options_available) - mask_file = Utils.openCdf('mask.nc') cell_point = options['cell'] # W and T share the same mask if cell_point == 'w': cell_point = 't' - mask = mask_file.variables['{0}mask'.format(cell_point)][:].astype(float) - mask[mask == 0] = np.nan + + mask = cls._get_mask(cell_point) job_list = list() - for startdate, member, chunk in diags.config.experiment.get_chunk_list(): - job_list.append(MaskLand(diags.data_manager, startdate, member, chunk, - options['domain'], options['variable'], mask, options['grid'])) + for var in options['variables']: + for startdate, member, chunk in diags.config.experiment.get_chunk_list(): + job_list.append(MaskLand(diags.data_manager, startdate, member, chunk, + options['domain'], var, mask, options['grid'])) return job_list + @classmethod + def _get_mask(cls, cell_point): + mask_file = Utils.openCdf('mask.nc') + mask = mask_file.variables['{0}mask'.format(cell_point)][:].astype(float) + mask[mask == 0] = np.nan + mask_file.close() + return mask + "Diagnostic alias for the configuration file" def request_data(self): diff --git a/earthdiagnostics/ocean/regionmean.py b/earthdiagnostics/ocean/regionmean.py index ae2096b8..f4a0c02d 100644 --- a/earthdiagnostics/ocean/regionmean.py +++ b/earthdiagnostics/ocean/regionmean.py @@ -58,8 +58,9 @@ class RegionMean(Diagnostic): self.box == other.box and self.variable == other.variable def __str__(self): - return 'Region mean Startdate: {0} Member: {1} Chunk: {2} Variable: {3} ' \ - 'Box: {4}'.format(self.startdate, self.member, self.chunk, self.variable, self.box) + return 'Region mean Startdate: {0.startdate} Member: {0.member} Chunk: {0.chunk} Variable: {0.variable} ' \ + 'Grid point: {0.grid_point} Box: {0.box} Save 3D: {0.save3d} Save variance: {0.variance} ' \ + 'Original grid: {0.grid}'.format(self) @classmethod def generate_jobs(cls, diags, options): diff --git a/earthdiagnostics/statistics/daysoverpercentile.py b/earthdiagnostics/statistics/daysoverpercentile.py index 499fb50f..0cbd46f2 100644 --- a/earthdiagnostics/statistics/daysoverpercentile.py +++ b/earthdiagnostics/statistics/daysoverpercentile.py @@ -45,12 +45,13 @@ class DaysOverPercentile(Diagnostic): self.startdate = '{0}{1:02}01'.format(self.year_to_compute, self.forecast_month) def __eq__(self, other): - return self.startdate == other.startdate and self.domain == other.domain and self.variable == other.variable + return self.startdate == other.startdate and self.domain == other.domain and \ + self.variable == other.variable and self.start_year == other.start_year and \ + self.end_year == other.end_year def __str__(self): - return 'Days over percentile Startdate: {0} ' \ - 'Variable: {1}:{2} Climatology: {3}-{4}'.format(self.startdate, self.domain, self.variable, - self.start_year, self.end_year) + return 'Days over percentile Startdate: {0.startdate} Variable: {0.domain}:{0.variable} ' \ + 'Climatology: {0.start_year}-{0.end_year}'.format(self) @classmethod def generate_jobs(cls, diags, options): diff --git a/test/run_test.py b/test/run_test.py index 90245bf9..59eec7e9 100644 --- a/test/run_test.py +++ b/test/run_test.py @@ -11,7 +11,7 @@ source_path = os.path.join(work_path, '..', 'earthdiagnostics', '*') print(source_path) cov = coverage.Coverage(include=source_path) cov.set_option("run:branch", True) -cov.set_option("html:title", 'Coverage report for ESMValTool') +cov.set_option("html:title", 'Coverage report for EarthDiagnostics') cov.start() suite = unittest.TestLoader().discover('.') diff --git a/test/unit/ocean/test_averagesection.py b/test/unit/ocean/test_averagesection.py index c2f346fe..a5b29133 100644 --- a/test/unit/ocean/test_averagesection.py +++ b/test/unit/ocean/test_averagesection.py @@ -1,48 +1,54 @@ -# # coding=utf-8 -# from unittest import TestCase -# -# from earthdiagnostics.box import Box -# from earthdiagnostics.ocean.averagesection import AverageSection -# from mock import Mock -# -# from earthdiagnostics.modelingrealm import ModelingRealms -# -# -# class TestAverageSection(TestCase): -# -# def setUp(self): -# self.data_manager = Mock() -# self.diags = Mock() -# -# self.box = Box() -# self.box.min_lat = 0 -# self.box.max_lat = 0 -# self.box.min_lon = 0 -# self.box.max_lon = 0 -# -# self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) -# self.psi = AverageSection(self.data_manager, '20000101', 1, 1, ModelingRealms.ocean, 'var', self.box) -# -# def test_generate_jobs(self): -# jobs = AverageSection.generate_jobs(self.diags, ['diagnostic', 'var', '0', '0', '0', '0']) -# self.assertEqual(len(jobs), 2) -# self.assertEqual(jobs[0], AverageSection(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', -# self.box)) -# self.assertEqual(jobs[1], AverageSection(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', -# self.box)) -# -# jobs = AverageSection.generate_jobs(self.diags, ['diagnostic', 'var', '0', '0', '0', '0', 'ocean']) -# self.assertEqual(len(jobs), 2) -# self.assertEqual(jobs[0], AverageSection(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', -# self.box)) -# self.assertEqual(jobs[1], AverageSection(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', -# self.box)) -# -# with self.assertRaises(Exception): -# AverageSection.generate_jobs(self.diags, ['diagnostic']) -# with self.assertRaises(Exception): -# AverageSection.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) -# -# def test_str(self): -# self.assertEquals(str(self.psi), 'Average section Startdate: 20000101 Member: 1 Chunk: 1 Box: 0N0E ' -# 'Variable: ocean:var') +# coding=utf-8 +from unittest import TestCase + +from earthdiagnostics.box import Box +from earthdiagnostics.ocean.averagesection import AverageSection +from mock import Mock, patch +from earthdiagnostics.diagnostic import DiagnosticVariableOption + +from earthdiagnostics.modelingrealm import ModelingRealms + + +class TestAverageSection(TestCase): + + def setUp(self): + self.data_manager = Mock() + self.diags = Mock() + + self.box = Box() + self.box.min_lat = 0 + self.box.max_lat = 0 + self.box.min_lon = 0 + self.box.max_lon = 0 + + self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) + + def fake_parse(self, value): + return value + + @patch.object(DiagnosticVariableOption, 'parse', fake_parse) + def test_generate_jobs(self): + jobs = AverageSection.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', '0', '0', '0', '0']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], AverageSection(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', + self.box, '')) + self.assertEqual(jobs[1], AverageSection(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', + self.box, '')) + + jobs = AverageSection.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', '0', '0', '0', '0', 'grid']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], AverageSection(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', + self.box, 'grid')) + self.assertEqual(jobs[1], AverageSection(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', + self.box, 'grid')) + + with self.assertRaises(Exception): + AverageSection.generate_jobs(self.diags, ['diagnostic']) + with self.assertRaises(Exception): + AverageSection.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', '0', '0', '0', '0', 'grid', + 'extra']) + + def test_str(self): + diag = AverageSection(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', self.box, 'grid') + self.assertEquals(str(diag), 'Average section Startdate: 20010101 Member: 0 Chunk: 0 Box: 0N0E ' + 'Variable: ocean:var Grid: grid') diff --git a/test/unit/ocean/test_heatcontent.py b/test/unit/ocean/test_heatcontent.py index a98d0c86..cac23fcc 100644 --- a/test/unit/ocean/test_heatcontent.py +++ b/test/unit/ocean/test_heatcontent.py @@ -1,39 +1,46 @@ -# # coding=utf-8 -# from unittest import TestCase -# -# from earthdiagnostics.box import Box -# from earthdiagnostics.constants import Basins -# from earthdiagnostics.ocean.heatcontent import HeatContent -# from mock import Mock -# -# -# class TestHeatContent(TestCase): -# -# def setUp(self): -# self.data_manager = Mock() -# -# self.diags = Mock() -# self.diags.model_version = 'model_version' -# self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) -# -# self.box = Box(False) -# self.box.min_depth = 0 -# self.box.max_depth = 100 -# -# self.heat_content = HeatContent(self.data_manager, '20000101', 1, 1, Basins().Global, 1, self.box) -# -# def test_generate_jobs(self): -# jobs = HeatContent.generate_jobs(self.diags, ['diagnostic', 'atl', '-1', '0', '100']) -# self.assertEqual(len(jobs), 2) -# self.assertEqual(jobs[0], HeatContent(self.data_manager, '20010101', 0, 0, Basins().Atlantic, -1, self.box)) -# self.assertEqual(jobs[1], HeatContent(self.data_manager, '20010101', 0, 1, Basins().Atlantic, -1, self.box)) -# -# with self.assertRaises(Exception): -# HeatContent.generate_jobs(self.diags, ['diagnostic']) -# -# with self.assertRaises(Exception): -# HeatContent.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) -# -# def test_str(self): -# self.assertEquals(str(self.heat_content), 'Heat content Startdate: 20000101 Member: 1 Chunk: 1 Mixed layer: 1 ' -# 'Box: 0-100 Basin: Global_Ocean') +# coding=utf-8 +from unittest import TestCase + +from earthdiagnostics.box import Box +from earthdiagnostics.constants import Basins +from earthdiagnostics.ocean.heatcontent import HeatContent +from mock import Mock, patch + + +def _get_levels_from_meters_mock(cls, box): + return 20, 10 + + +class TestHeatContent(TestCase): + + def setUp(self): + self.data_manager = Mock() + + self.diags = Mock() + self.diags.model_version = 'model_version' + self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) + + self.box = Box(True) + self.box.min_depth = 0 + self.box.max_depth = 100 + + @patch('earthdiagnostics.ocean.heatcontent.HeatContent._get_levels_from_meters') + def test_generate_jobs(self, levels_mock): + levels_mock.return_value = (1, 20) + jobs = HeatContent.generate_jobs(self.diags, ['diagnostic', 'Global', '-1', '0', '100']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], HeatContent(self.data_manager, '20010101', 0, 0, Basins().Global, -1, + self.box, 0, 0)) + self.assertEqual(jobs[1], HeatContent(self.data_manager, '20010101', 0, 1, Basins().Global, -1, + self.box, 0, 0)) + + with self.assertRaises(Exception): + HeatContent.generate_jobs(self.diags, ['diagnostic']) + + with self.assertRaises(Exception): + HeatContent.generate_jobs(self.diags, ['diagnostic', '0', '0', '0', '0', '0', '0', '0']) + + def test_str(self): + diag = HeatContent(self.data_manager, '20010101', 0, 0, Basins().Global, -1, self.box, 1, 20) + self.assertEquals(str(diag), 'Heat content Startdate: 20010101 Member: 0 Chunk: 0 Mixed layer: -1 Box: 0-100m ' + 'Basin: Global') diff --git a/test/unit/ocean/test_heatcontentlayer.py b/test/unit/ocean/test_heatcontentlayer.py index 74876fc3..8700025c 100644 --- a/test/unit/ocean/test_heatcontentlayer.py +++ b/test/unit/ocean/test_heatcontentlayer.py @@ -20,7 +20,7 @@ class TestHeatContentLayer(TestCase): self.box.min_depth = 0 self.box.max_depth = 100 - self.psi = HeatContentLayer(self.data_manager, '20000101', 1, 1, self.box, self.weight, 0, 10) def test_str(self): - self.assertEquals(str(self.psi), 'Heat content layer Startdate: 20000101 Member: 1 Chunk: 1 Box: 0-100m') + diag = HeatContentLayer(self.data_manager, '20000101', 1, 1, self.box, self.weight, 0, 10) + self.assertEquals(str(diag), 'Heat content layer Startdate: 20000101 Member: 1 Chunk: 1 Box: 0-100m') diff --git a/test/unit/ocean/test_interpolate.py b/test/unit/ocean/test_interpolate.py index 3ca2ba52..4d19949f 100644 --- a/test/unit/ocean/test_interpolate.py +++ b/test/unit/ocean/test_interpolate.py @@ -1,54 +1,76 @@ -# # coding=utf-8 -# from unittest import TestCase -# -# from earthdiagnostics.ocean.interpolate import Interpolate -# from mock import Mock -# -# from earthdiagnostics.modelingrealm import ModelingRealms -# -# -# class TestInterpolate(TestCase): -# -# def setUp(self): -# self.data_manager = Mock() -# -# self.diags = Mock() -# self.diags.model_version = 'model_version' -# self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) -# self.diags.config.experiment.model_version = 'model_version' -# -# self.interpolate = Interpolate(self.data_manager, '20000101', 1, 1, ModelingRealms.atmos, 'var', 'grid', -# 'model_version', False) -# -# def test_generate_jobs(self): -# jobs = Interpolate.generate_jobs(self.diags, ['interp', 'grid', 'var']) -# self.assertEqual(len(jobs), 2) -# self.assertEqual(jobs[0], Interpolate(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 'grid', -# 'model_version', False)) -# self.assertEqual(jobs[1], Interpolate(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', 'grid', -# 'model_version', False)) -# -# jobs = Interpolate.generate_jobs(self.diags, ['interp', 'grid', 'var', 'atmos']) -# self.assertEqual(len(jobs), 2) -# self.assertEqual(jobs[0], Interpolate(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', 'grid', -# 'model_version', False)) -# self.assertEqual(jobs[1], Interpolate(self.data_manager, '20010101', 0, 1, ModelingRealms.atmos, 'var', 'grid', -# 'model_version', False)) -# -# jobs = Interpolate.generate_jobs(self.diags, ['interp', 'grid', 'var', 'atmos', 'true']) -# self.assertEqual(len(jobs), 2) -# self.assertEqual(jobs[0], Interpolate(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', 'grid', -# 'model_version', True)) -# self.assertEqual(jobs[1], Interpolate(self.data_manager, '20010101', 0, 1, ModelingRealms.atmos, 'var', 'grid', -# 'model_version', True)) -# -# with self.assertRaises(Exception): -# Interpolate.generate_jobs(self.diags, ['interp']) -# -# with self.assertRaises(Exception): -# Interpolate.generate_jobs(self.diags, ['interp', '0', '0', '0', '0', '0', '0', '0']) -# -# def test_str(self): -# self.assertEquals(str(self.interpolate), 'Interpolate Startdate: 20000101 Member: 1 Chunk: 1 ' -# 'Variable: atmos:var Target grid: grid Invert lat: False ' -# 'Model: model_version') +# coding=utf-8 +from unittest import TestCase + +from earthdiagnostics.ocean.interpolate import Interpolate +from mock import Mock, patch + +from earthdiagnostics.modelingrealm import ModelingRealms +from earthdiagnostics.diagnostic import DiagnosticVariableListOption + + +class TestInterpolate(TestCase): + + def setUp(self): + self.data_manager = Mock() + + self.diags = Mock() + self.diags.model_version = 'model_version' + self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) + self.diags.config.experiment.model_version = 'model_version' + + def fake_parse(self, value): + return value.split('-') + + @patch.object(DiagnosticVariableListOption, 'parse', fake_parse) + def test_generate_jobs(self): + jobs = Interpolate.generate_jobs(self.diags, ['interp', 'grid', 'var']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], Interpolate(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 'grid', + 'model_version', False, '')) + self.assertEqual(jobs[1], Interpolate(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', 'grid', + 'model_version', False, '')) + + jobs = Interpolate.generate_jobs(self.diags, ['interp', 'grid', 'var1-var2']) + self.assertEqual(len(jobs), 4) + self.assertEqual(jobs[0], Interpolate(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var1', 'grid', + 'model_version', False, '')) + self.assertEqual(jobs[1], Interpolate(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var1', 'grid', + 'model_version', False, '')) + self.assertEqual(jobs[2], Interpolate(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var2', 'grid', + 'model_version', False, '')) + self.assertEqual(jobs[3], Interpolate(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var2', 'grid', + 'model_version', False, '')) + + jobs = Interpolate.generate_jobs(self.diags, ['interp', 'grid', 'var', 'atmos']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], Interpolate(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', 'grid', + 'model_version', False, '')) + self.assertEqual(jobs[1], Interpolate(self.data_manager, '20010101', 0, 1, ModelingRealms.atmos, 'var', 'grid', + 'model_version', False, '')) + + jobs = Interpolate.generate_jobs(self.diags, ['interp', 'grid', 'var', 'atmos', 'true']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], Interpolate(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', 'grid', + 'model_version', True, '')) + self.assertEqual(jobs[1], Interpolate(self.data_manager, '20010101', 0, 1, ModelingRealms.atmos, 'var', 'grid', + 'model_version', True, '')) + + jobs = Interpolate.generate_jobs(self.diags, ['interp', 'grid', 'var', 'atmos', 'true', 'original_grid']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], Interpolate(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', 'grid', + 'model_version', True, 'original_grid')) + self.assertEqual(jobs[1], Interpolate(self.data_manager, '20010101', 0, 1, ModelingRealms.atmos, 'var', 'grid', + 'model_version', True, 'original_grid')) + + with self.assertRaises(Exception): + Interpolate.generate_jobs(self.diags, ['interp']) + + with self.assertRaises(Exception): + Interpolate.generate_jobs(self.diags, ['interp', 'grid', 'var', 'atmos', 'true', 'original_grid', 'extra']) + + def test_str(self): + diag = Interpolate(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', 'grid', + 'model_version', True, 'original_grid') + self.assertEquals(str(diag), 'Interpolate Startdate: 20010101 Member: 0 Chunk: 0 Variable: atmos:var ' + 'Target grid: grid Invert lat: True Model: model_version ' + 'Original grid: original_grid') diff --git a/test/unit/ocean/test_interpolatecdo.py b/test/unit/ocean/test_interpolatecdo.py new file mode 100644 index 00000000..04b08552 --- /dev/null +++ b/test/unit/ocean/test_interpolatecdo.py @@ -0,0 +1,89 @@ +# coding=utf-8 +from unittest import TestCase + +from earthdiagnostics.ocean.interpolatecdo import InterpolateCDO +from mock import Mock, patch + +from earthdiagnostics.modelingrealm import ModelingRealms +from earthdiagnostics.diagnostic import DiagnosticVariableListOption, DiagnosticOptionError + + +class TestInterpolate(TestCase): + + def setUp(self): + self.data_manager = Mock() + + self.diags = Mock() + self.diags.model_version = 'model_version' + self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) + self.diags.config.experiment.model_version = 'model_version' + self.diags.config.experiment.atmos_grid = 'atmos_grid' + + def fake_parse(self, value): + if not value: + raise DiagnosticOptionError + return value.split('-') + + @patch('earthdiagnostics.ocean.interpolatecdo.InterpolateCDO._compute_weights') + @patch.object(DiagnosticVariableListOption, 'parse', fake_parse) + def test_generate_jobs(self, mock_weights): + mock_weights.return_value = None + + jobs = InterpolateCDO.generate_jobs(self.diags, ['interpcdo', 'ocean', 'var']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], InterpolateCDO(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', + 'atmos_grid', 'model_version', True, '', None)) + self.assertEqual(jobs[1], InterpolateCDO(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', + 'atmos_grid', 'model_version', True, '', None)) + + jobs = InterpolateCDO.generate_jobs(self.diags, ['interpcdo', 'ocean', 'var', 'target_grid']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], InterpolateCDO(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', + 'target_grid', 'model_version', True, '', None)) + self.assertEqual(jobs[1], InterpolateCDO(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', + 'target_grid', 'model_version', True, '', None)) + + jobs = InterpolateCDO.generate_jobs(self.diags, ['interpcdo', 'ocean', 'var', 'target_grid', 'bicubic']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], InterpolateCDO(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', + 'target_grid', 'model_version', True, '', None)) + self.assertEqual(jobs[1], InterpolateCDO(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', + 'target_grid', 'model_version', True, '', None)) + + jobs = InterpolateCDO.generate_jobs(self.diags, ['interpcdo', 'ocean', 'var', 'target_grid', 'bicubic', + 'false']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], InterpolateCDO(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', + 'target_grid', 'model_version', False, '', None)) + self.assertEqual(jobs[1], InterpolateCDO(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', + 'target_grid', 'model_version', False, '', None)) + + jobs = InterpolateCDO.generate_jobs(self.diags, ['interpcdo', 'ocean', 'var', 'target_grid', 'bicubic', + 'false', 'orig']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], InterpolateCDO(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', + 'target_grid', 'model_version', False, 'orig', None)) + self.assertEqual(jobs[1], InterpolateCDO(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', + 'target_grid', 'model_version', False, 'orig', None)) + + jobs = InterpolateCDO.generate_jobs(self.diags, ['interpcdo', 'ocean', 'var', 'target_grid', 'bicubic', + 'false', 'orig', 'false']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], InterpolateCDO(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', + 'target_grid', 'model_version', False, 'orig', None)) + self.assertEqual(jobs[1], InterpolateCDO(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', + 'target_grid', 'model_version', False, 'orig', None)) + + with self.assertRaises(DiagnosticOptionError): + InterpolateCDO.generate_jobs(self.diags, ['interp']) + + with self.assertRaises(DiagnosticOptionError): + InterpolateCDO.generate_jobs(self.diags, ['interpcdo', 'ocean', 'var', 'bicubic', 'false', 'orig', 'false', + 'extra']) + + def test_str(self): + diag = InterpolateCDO(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', + 'atmos_grid', 'model_version', False, 'orig', None) + self.assertEquals(str(diag), 'Interpolate with CDO Startdate: 20010101 Member: 0 Chunk: 0 Variable: ocean:var ' + 'Target grid: atmos_grid Original grid: orig Mask ocean: False ' + 'Model: model_version') diff --git a/test/unit/ocean/test_maskland.py b/test/unit/ocean/test_maskland.py new file mode 100644 index 00000000..ede00911 --- /dev/null +++ b/test/unit/ocean/test_maskland.py @@ -0,0 +1,65 @@ +# coding=utf-8 +from unittest import TestCase + +from earthdiagnostics.diagnostic import DiagnosticVariableListOption, DiagnosticOptionError +from earthdiagnostics.box import Box +from earthdiagnostics.ocean.mask_land import MaskLand +from mock import Mock, patch + +from earthdiagnostics.modelingrealm import ModelingRealms + + +class TestMaskLand(TestCase): + + def setUp(self): + self.data_manager = Mock() + self.diags = Mock() + + self.box = Box() + self.box.min_lat = 0 + self.box.max_lat = 0 + self.box.min_lon = 0 + self.box.max_lon = 0 + + self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) + + def fake_parse(self, value): + if not value: + raise DiagnosticOptionError + return value.split('-') + + @patch.object(DiagnosticVariableListOption, 'parse', fake_parse) + @patch('earthdiagnostics.ocean.mask_land.MaskLand._get_mask') + def test_generate_jobs(self, get_mask_mock): + get_mask_mock.return_value = None + jobs = MaskLand.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], MaskLand(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 't', '')) + self.assertEqual(jobs[1], MaskLand(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', 't', '')) + + for mask in ('t', 'u', 'v', 'f', 'w'): + jobs = MaskLand.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', mask]) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], MaskLand(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', + mask, '')) + self.assertEqual(jobs[1], MaskLand(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', + mask, '')) + + with self.assertRaises(DiagnosticOptionError): + MaskLand.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', 'BAD']) + + jobs = MaskLand.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', 't', 'grid']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], MaskLand(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', + 't', 'grid')) + self.assertEqual(jobs[1], MaskLand(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', + 't', 'grid')) + + with self.assertRaises(DiagnosticOptionError): + MaskLand.generate_jobs(self.diags, ['diagnostic']) + with self.assertRaises(DiagnosticOptionError): + MaskLand.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', 't', 'grid', 'extra']) + + def test_str(self): + diag = MaskLand(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 't', 'grid') + self.assertEquals(str(diag), 'Land mask Startdate: 20010101 Member: 0 Chunk: 0 Variable: ocean:var Grid: grid') diff --git a/test/unit/ocean/test_mxl.py b/test/unit/ocean/test_mxl.py new file mode 100644 index 00000000..0385d0da --- /dev/null +++ b/test/unit/ocean/test_mxl.py @@ -0,0 +1,29 @@ +# coding=utf-8 +from unittest import TestCase + +from earthdiagnostics.ocean.mxl import Mxl +from mock import Mock + + +class TestMxl(TestCase): + + def setUp(self): + self.data_manager = Mock() + + self.diags = Mock() + self.diags.model_version = 'model_version' + self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) + + + def test_generate_jobs(self): + jobs = Mxl.generate_jobs(self.diags, ['diagnostic']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], Mxl(self.data_manager, '20010101', 0, 0)) + self.assertEqual(jobs[1], Mxl(self.data_manager, '20010101', 0, 1)) + + with self.assertRaises(Exception): + Mxl.generate_jobs(self.diags, ['diagnostic', 'extra']) + + def test_str(self): + diag = Mxl(self.data_manager, '20010101', 0, 0) + self.assertEquals(str(diag), 'Mixed layer Startdate: 20010101 Member: 0 Chunk: 0') diff --git a/test/unit/ocean/test_region_mean.py b/test/unit/ocean/test_region_mean.py new file mode 100644 index 00000000..4c96bdd6 --- /dev/null +++ b/test/unit/ocean/test_region_mean.py @@ -0,0 +1,101 @@ +# coding=utf-8 +from unittest import TestCase +from earthdiagnostics.ocean.regionmean import RegionMean +from earthdiagnostics.modelingrealm import ModelingRealms +from earthdiagnostics.constants import Basins +from earthdiagnostics.box import Box +from earthdiagnostics.diagnostic import DiagnosticOptionError, DiagnosticVariableOption +from mock import Mock, patch + + +class TestRegionMean(TestCase): + + def setUp(self): + self.data_manager = Mock() + self.diags = Mock() + self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) + + + def fake_parse(self, value): + if not value: + raise DiagnosticOptionError + return value + + @patch.object(DiagnosticVariableOption, 'parse', fake_parse) + def test_generate_jobs(self): + + box = Box() + box.min_depth = 0 + box.max_depth = 0 + + jobs = RegionMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 'T', + box, True, Basins().Global, False, '')) + self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', 'T', + box, True, Basins().Global, False, '')) + + jobs = RegionMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', 'U']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 'U', + box, True, Basins().Global, False, '')) + self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', 'U', + box, True, Basins().Global, False, '')) + + jobs = RegionMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', 'U', 'global']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 'U', + box, True, Basins().Global, False, '')) + self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', 'U', + box, True, Basins().Global, False, '')) + + box = Box() + box.min_depth = 1 + box.max_depth = 10 + + jobs = RegionMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', 'U', 'global', '1', '10']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 'U', + box, True, Basins().Global, False, '')) + self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', 'U', + box, True, Basins().Global, False, '')) + + jobs = RegionMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', 'U', 'global', '1', '10', 'false']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 'U', + box, False, Basins().Global, False, '')) + self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', 'U', + box, False, Basins().Global, False, '')) + + jobs = RegionMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', 'U', 'global', '1', '10', 'false', + 'True']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 'U', + box, False, Basins().Global, True, '')) + self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', 'U', + box, False, Basins().Global, True, '')) + + jobs = RegionMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', 'U', 'global', '1', '10', 'false', + 'True', 'grid']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 'U', + box, False, Basins().Global, True, 'grid')) + self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', 'U', + box, False, Basins().Global, True, 'grid')) + + with self.assertRaises(DiagnosticOptionError): + RegionMean.generate_jobs(self.diags, ['diagnostic']) + + with self.assertRaises(DiagnosticOptionError): + RegionMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', 'U', 'global', '1', '10', 'false', + 'True', 'grid', 'extra']) + + def test_str(self): + box = Box() + box.min_depth = 1 + box.max_depth = 10 + + diag = RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', 'U', box, False, + Basins().Global, True, 'grid') + self.assertEquals(str(diag), 'Region mean Startdate: 20010101 Member: 0 Chunk: 0 Variable: var Grid point: U ' + 'Box: 1-10 Save 3D: False Save variance: True Original grid: grid') diff --git a/test/unit/ocean/test_vertical_gradient.py b/test/unit/ocean/test_vertical_gradient.py new file mode 100644 index 00000000..e274df25 --- /dev/null +++ b/test/unit/ocean/test_vertical_gradient.py @@ -0,0 +1,66 @@ +# coding=utf-8 +from unittest import TestCase +from earthdiagnostics.ocean.verticalgradient import VerticalGradient +from earthdiagnostics.modelingrealm import ModelingRealms +from earthdiagnostics.constants import Basins +from earthdiagnostics.box import Box +from earthdiagnostics.diagnostic import DiagnosticOptionError, DiagnosticVariableOption +from mock import Mock, patch + + +class TestVerticalGradient(TestCase): + + def setUp(self): + self.data_manager = Mock() + self.diags = Mock() + self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) + + + def fake_parse(self, value): + if not value: + raise DiagnosticOptionError + return value + + @patch.object(DiagnosticVariableOption, 'parse', fake_parse) + def test_generate_jobs(self): + + box = Box() + box.min_depth = 1 + box.max_depth = 2 + + jobs = VerticalGradient.generate_jobs(self.diags, ['diagnostic', 'var']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], VerticalGradient(self.data_manager, '20010101', 0, 0, 'var', box)) + self.assertEqual(jobs[1], VerticalGradient(self.data_manager, '20010101', 0, 1, 'var', box)) + + box = Box() + box.min_depth = 2 + box.max_depth = 2 + + jobs = VerticalGradient.generate_jobs(self.diags, ['diagnostic', 'var', '2']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], VerticalGradient(self.data_manager, '20010101', 0, 0, 'var', box)) + self.assertEqual(jobs[1], VerticalGradient(self.data_manager, '20010101', 0, 1, 'var', box)) + + box = Box() + box.min_depth = 1 + box.max_depth = 10 + + jobs = VerticalGradient.generate_jobs(self.diags, ['diagnostic', 'var', '1', 10]) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], VerticalGradient(self.data_manager, '20010101', 0, 0, 'var', box)) + self.assertEqual(jobs[1], VerticalGradient(self.data_manager, '20010101', 0, 1, 'var', box)) + + with self.assertRaises(DiagnosticOptionError): + VerticalGradient.generate_jobs(self.diags, ['diagnostic']) + + with self.assertRaises(DiagnosticOptionError): + VerticalGradient.generate_jobs(self.diags, ['diagnostic', 'var', '1', '10', 'extra']) + + def test_str(self): + box = Box() + box.min_depth = 1 + box.max_depth = 10 + + diag = VerticalGradient(self.data_manager, '20010101', 0, 0, 'var', box) + self.assertEquals(str(diag), 'Vertical gradient Startdate: 20010101 Member: 0 Chunk: 0 Variable: var Box: 1-10') diff --git a/test/unit/statistics/test_daysoverpercentile.py b/test/unit/statistics/test_daysoverpercentile.py new file mode 100644 index 00000000..afa37276 --- /dev/null +++ b/test/unit/statistics/test_daysoverpercentile.py @@ -0,0 +1,32 @@ +# coding=utf-8 +from unittest import TestCase + +from earthdiagnostics.statistics.daysoverpercentile import DaysOverPercentile +from mock import Mock + +from earthdiagnostics.modelingrealm import ModelingRealms + + +class TestDaysOverPercentile(TestCase): + + def setUp(self): + self.data_manager = Mock() + self.diags = Mock() + + def test_generate_jobs(self): + jobs = DaysOverPercentile.generate_jobs(self.diags, ['monpercent', 'ocean', 'var', '2000', '2001', '11']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], DaysOverPercentile(self.data_manager, ModelingRealms.ocean, 'var', 2000, 2001, + 2000, 11)) + self.assertEqual(jobs[1], DaysOverPercentile(self.data_manager, ModelingRealms.ocean, 'var', 2000, 2001, + 2001, 11)) + + with self.assertRaises(Exception): + DaysOverPercentile.generate_jobs(self.diags, ['monpercent', 'ocean', 'var', '2000', '2001']) + with self.assertRaises(Exception): + DaysOverPercentile.generate_jobs(self.diags, ['monpercent', 'ocean', 'var', '2000', '2001', '11', 'extra']) + + def test_str(self): + diagnostic = DaysOverPercentile(self.data_manager, ModelingRealms.ocean, 'var', 2000, 2001, 2000, 11) + self.assertEquals(str(diagnostic), 'Days over percentile Startdate: 20001101 Variable: ocean:var ' + 'Climatology: 2000-2001') -- GitLab From ea0a61f31fd96d016c8252796114c8e9663d529b Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 30 Aug 2017 16:27:46 +0200 Subject: [PATCH 60/82] Added more tests --- earthdiagnostics/diagnostic.py | 4 ++ earthdiagnostics/publisher.py | 16 ++++-- test/unit/test_diagnostic.py | 90 ++++++++++++++++++++++++++++++++++ test/unit/test_publisher.py | 37 ++++++++++++++ 4 files changed, 143 insertions(+), 4 deletions(-) create mode 100644 test/unit/test_publisher.py diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index bd3521d0..90bcfa95 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -395,6 +395,10 @@ class DiagnosticChoiceOption(DiagnosticOption): self.choices = choices self.ignore_case = ignore_case + # To check if it is valid + if default_value is not None: + self.parse(default_value) + def parse(self, value): value = self.check_default(value) if self.ignore_case: diff --git a/earthdiagnostics/publisher.py b/earthdiagnostics/publisher.py index 6fc9e4c8..d1a7c856 100644 --- a/earthdiagnostics/publisher.py +++ b/earthdiagnostics/publisher.py @@ -4,7 +4,7 @@ class Publisher(object): Base class to provide functionality to notify updates to other objects """ def __init__(self): - self.subscribers = dict() + self._subscribers = dict() def subscribe(self, who, callback=None): """ @@ -17,7 +17,7 @@ class Publisher(object): """ if callback is None: callback = getattr(who, 'update') - self.subscribers[who] = callback + self._subscribers[who] = callback def unsubscribe(self, who): """ @@ -26,7 +26,7 @@ class Publisher(object): :param who: suscriber to remove :type who: object """ - del self.subscribers[who] + del self._subscribers[who] def dispatch(self, *args): """ @@ -34,5 +34,13 @@ class Publisher(object): :param args: arguments to pass """ - for subscriber, callback in self.subscribers.items(): + for subscriber, callback in self._subscribers.items(): callback(*args) + + def suscribers(self, *args): + """ + Notify update to all the suscribers + + :param args: arguments to pass + """ + return self._subscribers.keys() diff --git a/test/unit/test_diagnostic.py b/test/unit/test_diagnostic.py index 5754d4a9..5c315e1d 100644 --- a/test/unit/test_diagnostic.py +++ b/test/unit/test_diagnostic.py @@ -3,6 +3,7 @@ from earthdiagnostics.diagnostic import * from unittest import TestCase from earthdiagnostics.modelingrealm import ModelingRealms +from mock import patch, Mock # noinspection PyTypeChecker,PyTypeChecker,PyTypeChecker @@ -258,9 +259,98 @@ class TestDiagnosticListIntOption(TestCase): diag = DiagnosticListIntOption('option') self.assertEqual([3], diag.parse('3')) + def test_too_low(self): + diag = DiagnosticListIntOption('option', min_limit=5) + with self.assertRaises(DiagnosticOptionError): + diag.parse('3') + + def test_too_high(self): + diag = DiagnosticListIntOption('option', max_limit=5) + with self.assertRaises(DiagnosticOptionError): + diag.parse('8') + def test_parse_bad_value(self): diag = DiagnosticListIntOption('option') with self.assertRaises(ValueError): diag.parse('3.5') +class TestDiagnosticChoiceOption(TestCase): + + def test_choice_value(self): + diag = DiagnosticChoiceOption('option', ('a', 'b')) + self.assertEqual('a', diag.parse('a')) + + def test_choice_default_value(self): + diag = DiagnosticChoiceOption('option', ('a', 'b'), default_value='a') + self.assertEqual('a', diag.parse('')) + + def test_bad_default_value(self): + with self.assertRaises(DiagnosticOptionError): + DiagnosticChoiceOption('option', ('a', 'b'), default_value='c') + + def test_ignore_case_value(self): + diag = DiagnosticChoiceOption('option', ('a', 'b')) + self.assertEqual('b', diag.parse('b')) + self.assertEqual('b', diag.parse('B')) + + diag = DiagnosticChoiceOption('option', ('a', 'b'), ignore_case=False) + self.assertEqual('b', diag.parse('b')) + with self.assertRaises(DiagnosticOptionError): + self.assertEqual('b', diag.parse('B')) + + +class TestDiagnosticVariableOption(TestCase): + + def get_var_mock(self, name): + mock = Mock() + mock.short_name = name + return mock + + @patch('earthdiagnostics.variable.VariableManager.get_variable') + def test_parse(self, get_variable_mock): + get_variable_mock.return_value = self.get_var_mock('var1') + + diag = DiagnosticVariableOption() + self.assertEqual('var1', diag.parse('var1')) + + @patch('earthdiagnostics.variable.VariableManager.get_variable') + def test_parse(self, get_variable_mock): + get_variable_mock.return_value = self.get_var_mock('var1') + + diag = DiagnosticVariableOption() + self.assertEqual('var1', diag.parse('var1')) + + @patch('earthdiagnostics.variable.VariableManager.get_variable') + def test_not_recognized(self, get_variable_mock): + get_variable_mock.return_value = None + + diag = DiagnosticVariableOption() + self.assertEqual('var1', diag.parse('var1')) + + +class TestDiagnosticVariableListOption(TestCase): + + @patch('earthdiagnostics.variable.VariableManager.get_variable') + def test_parse_multiple(self, get_variable_mock): + get_variable_mock.side_effect = (self.get_var_mock('var1'), self.get_var_mock('var2')) + diag = DiagnosticVariableListOption('variables') + self.assertEqual(['var1', 'var2'], diag.parse('var1-var2')) + + @patch('earthdiagnostics.variable.VariableManager.get_variable') + def test_parse_one(self, get_variable_mock): + get_variable_mock.return_value = self.get_var_mock('var1') + diag = DiagnosticVariableListOption('variables') + self.assertEqual(['var1'], diag.parse('var1')) + + @patch('earthdiagnostics.variable.VariableManager.get_variable') + def test_not_recognized(self, get_variable_mock): + get_variable_mock.return_value = None + diag = DiagnosticVariableListOption('variables') + self.assertEqual(['var1'], diag.parse('var1')) + + def get_var_mock(self, name): + mock = Mock() + mock.short_name = name + return mock + diff --git a/test/unit/test_publisher.py b/test/unit/test_publisher.py new file mode 100644 index 00000000..25d8c034 --- /dev/null +++ b/test/unit/test_publisher.py @@ -0,0 +1,37 @@ +# coding=utf-8 +from unittest import TestCase +from earthdiagnostics.publisher import Publisher +from mock import Mock + + +class TestPublisher(TestCase): + + def test_suscribe(self): + suscriber = Mock() + pub = Publisher() + pub.subscribe(suscriber, callback=suscriber.callback) + self.assertIn(suscriber, pub.suscribers()) + + def test_suscribe_default(self): + suscriber = Mock() + pub = Publisher() + pub.subscribe(suscriber) + self.assertTrue(hasattr(suscriber, 'update')) + self.assertIn(suscriber, pub.suscribers()) + + def test_unsuscribe(self): + suscriber = Mock() + pub = Publisher() + pub.subscribe(suscriber, callback=suscriber.callback) + pub.unsubscribe(suscriber) + + self.assertNotIn(suscriber, pub.suscribers()) + + def test_dispatch(self): + suscriber = Mock() + pub = Publisher() + pub.subscribe(suscriber, callback=suscriber.callback) + + pub.dispatch(1, 2, 3) + suscriber.callback.assert_called_with(1, 2, 3) + -- GitLab From 1a26ef5bad2dab5aa9844c56a217446d176ac428 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 31 Aug 2017 15:27:03 +0200 Subject: [PATCH 61/82] Added more tests --- earthdiagnostics/variable.py | 7 +- test/unit/test_diagnostic.py | 127 +++++++++++++++------------- test/unit/test_variable.py | 158 ++++++++++++++++++++++++++++++++++- 3 files changed, 229 insertions(+), 63 deletions(-) diff --git a/earthdiagnostics/variable.py b/earthdiagnostics/variable.py index dc5745fd..4d34edf1 100644 --- a/earthdiagnostics/variable.py +++ b/earthdiagnostics/variable.py @@ -358,7 +358,7 @@ class Variable(object): parsed[0]) return parsed[0] - if not domains[0]: + elif len(domains) == 0: Log.warning('Variable {0} has no modeling realm defined'.format(self.short_name)) return None else: @@ -374,9 +374,6 @@ class Variable(object): self.valid_min = var_line[7].strip() self.valid_max = var_line[8].strip() self.grid = var_line[9].strip() - for table in var_line[10].strip().split(':'): - if table: - self.add_table(table) def get_table(self, frequency, data_convention): for table, priority in self.tables: @@ -385,7 +382,7 @@ class Variable(object): if self.domain: table_name = self.domain.get_table_name(frequency, data_convention) return CMORTable(table_name, frequency, 'December 2013') - return self.tables[0] + raise ValueError('Can not get table for {0} and frequency {1}'.format(self, frequency)) @staticmethod def _select_most_specific(parsed): diff --git a/test/unit/test_diagnostic.py b/test/unit/test_diagnostic.py index 5c315e1d..cc92092e 100644 --- a/test/unit/test_diagnostic.py +++ b/test/unit/test_diagnostic.py @@ -6,63 +6,6 @@ from earthdiagnostics.modelingrealm import ModelingRealms from mock import patch, Mock -# noinspection PyTypeChecker,PyTypeChecker,PyTypeChecker -class TestDiagnostic(TestCase): - - # noinspection PyMissingOrEmptyDocstring - class MockDiag(Diagnostic): - def request_data(self): - pass - - def declare_data_generated(self): - pass - - def compute(self): - pass - - @classmethod - def generate_jobs(cls, diags, options): - pass - - alias = 'mockdiag' - - def setUp(self): - self.diagnostic = Diagnostic(None) - Diagnostic.register(TestDiagnostic.MockDiag) - - def test_register(self): - with self.assertRaises(ValueError): - # noinspection PyTypeChecker - Diagnostic.register(str) - with self.assertRaises(ValueError): - Diagnostic.register(Diagnostic) - Diagnostic.register(TestDiagnostic.MockDiag) - - def test_get_diagnostic(self): - self.assertIsNone(Diagnostic.get_diagnostic('none')) - self.assertIs(TestDiagnostic.MockDiag, Diagnostic.get_diagnostic('mockdiag')) - - def test_generate_jobs(self): - with self.assertRaises(NotImplementedError): - Diagnostic.generate_jobs(None, ['']) - - def test_compute(self): - with self.assertRaises(NotImplementedError): - self.diagnostic.compute() - - def test_str(self): - self.assertEquals('Developer must override base class __str__ method', str(self.diagnostic)) - - def test_repr(self): - self.assertEquals(self.diagnostic.__repr__(), str(self.diagnostic)) - - def test_empty_process_options(self): - self.assertEqual(len(Diagnostic.process_options(('diag_name',), tuple())), 0) - - # def test_empty_process_options(self): - # self.assertEqual(len(cls.process_options(('diag_name', ), tuple())), 0) - - class TestDiagnosticOption(TestCase): def test_good_default_value(self): @@ -354,3 +297,73 @@ class TestDiagnosticVariableListOption(TestCase): mock.short_name = name return mock + +class TestDiagnostic(TestCase): + + def setUp(cls): + class MockDiag(Diagnostic): + pass + TestDiagnostic.MockDiag = MockDiag + + def test_str(self): + self.assertEqual(str(Diagnostic(None)), 'Developer must override base class __str__ method') + + def test_compute_is_virtual(self): + with self.assertRaises(NotImplementedError): + Diagnostic(None).compute() + + def test_declare_data_generated_is_virtual(self): + with self.assertRaises(NotImplementedError): + Diagnostic(None).declare_data_generated() + + def test_request_data_is_virtual(self): + with self.assertRaises(NotImplementedError): + Diagnostic(None).request_data() + + @patch.object(Diagnostic, 'dispatch') + def test_set_status_call_dispatch(self, dispatch_mock): + + + diag = Diagnostic(None) + diag.status = DiagnosticStatus.FAILED + dispatch_mock.assert_called_once_with(diag) + + @patch.object(Diagnostic, 'dispatch') + def test_set_status_call_dispatch(self, dispatch_mock): + class MockDiag(Diagnostic): + pass + + diag = Diagnostic(None) + diag.status = diag.status + assert not dispatch_mock.called, 'Dispatch should not have been called' + + def test_register(self): + with self.assertRaises(ValueError): + Diagnostic.register(TestDiagnostic) + + with self.assertRaises(ValueError): + Diagnostic.register(TestDiagnostic.MockDiag) + + TestDiagnostic.MockDiag.alias = 'mock' + Diagnostic.register(TestDiagnostic.MockDiag) + + + def test_get_diagnostic(self): + self.assertIsNone(Diagnostic.get_diagnostic('none')) + TestDiagnostic.MockDiag.alias = 'mock' + Diagnostic.register(TestDiagnostic.MockDiag) + self.assertIs(TestDiagnostic.MockDiag, Diagnostic.get_diagnostic('mock')) + + def test_generate_jobs(self): + with self.assertRaises(NotImplementedError): + Diagnostic.generate_jobs(None, ['']) + + def test_compute(self): + with self.assertRaises(NotImplementedError): + Diagnostic(None).compute() + + def test_repr(self): + self.assertEquals(Diagnostic(None).__repr__(), str(Diagnostic(None))) + + def test_empty_process_options(self): + self.assertEqual(len(Diagnostic.process_options(('diag_name',), tuple())), 0) diff --git a/test/unit/test_variable.py b/test/unit/test_variable.py index 07199ab8..8af9b1ad 100644 --- a/test/unit/test_variable.py +++ b/test/unit/test_variable.py @@ -2,7 +2,10 @@ from mock import Mock from unittest import TestCase -from earthdiagnostics.variable import CMORTable, VariableAlias +from earthdiagnostics.variable import CMORTable, VariableAlias, Variable, VariableJsonException +from earthdiagnostics.modelingrealm import ModelingRealms +from earthdiagnostics.constants import Basins +from earthdiagnostics.frequency import Frequencies class TestCMORTable(TestCase): @@ -24,4 +27,157 @@ class TestVariableAlias(TestCase): alias.grid = 'grid' self.assertEquals(str(alias), 'alias Basin: basin Grid: grid') +class TestVariable(TestCase): + def test_parse_json(self): + var = Variable() + json = {'out_name': 'out_name', + 'standard_name': 'standard_name', + 'long_name': 'long_name', + 'modeling_realm': 'ocean', + 'valid_min': 'valid_min', + 'valid_max': 'valid_max', + 'units': 'units', + } + var.parse_json(json, 'out_name') + + self.assertEqual(var.short_name, 'out_name') + self.assertEqual(var.standard_name, 'standard_name') + self.assertEqual(var.long_name, 'long_name') + + self.assertEqual(var.valid_min, 'valid_min') + self.assertEqual(var.valid_max, 'valid_max') + self.assertEqual(var.units, 'units') + self.assertEqual(var.priority, 1) + + self.assertEqual(var.domain, ModelingRealms.ocean) + + def test_parse_json_no_out_name(self): + var = Variable() + json = {'standard_name': 'standard_name', + 'long_name': 'long_name', + 'modeling_realm': 'ocean', + 'valid_min': 'valid_min', + 'valid_max': 'valid_max', + 'units': 'units', + } + with self.assertRaises(VariableJsonException): + var.parse_json(json, 'out_name') + + def test_parse_json_with_priority(self): + var = Variable() + json = {'out_name': 'out_name', + 'standard_name': 'standard_name', + 'long_name': 'long_name', + 'modeling_realm': 'ocean', + 'valid_min': 'valid_min', + 'valid_max': 'valid_max', + 'units': 'units', + 'priority': '2', + } + var.parse_json(json, 'out_name') + + self.assertEqual(var.short_name, 'out_name') + self.assertEqual(var.standard_name, 'standard_name') + self.assertEqual(var.long_name, 'long_name') + + self.assertEqual(var.valid_min, 'valid_min') + self.assertEqual(var.valid_max, 'valid_max') + self.assertEqual(var.units, 'units') + self.assertEqual(var.priority, 2) + + self.assertEqual(var.domain, ModelingRealms.ocean) + + def test_parse_json_with_primavera_priority(self): + var = Variable() + json = {'out_name': 'out_name', + 'standard_name': 'standard_name', + 'long_name': 'long_name', + 'modeling_realm': 'ocean', + 'valid_min': 'valid_min', + 'valid_max': 'valid_max', + 'units': 'units', + 'primavera_priority': '2', + } + var.parse_json(json, 'out_name') + + self.assertEqual(var.short_name, 'out_name') + self.assertEqual(var.standard_name, 'standard_name') + self.assertEqual(var.long_name, 'long_name') + + self.assertEqual(var.valid_min, 'valid_min') + self.assertEqual(var.valid_max, 'valid_max') + self.assertEqual(var.units, 'units') + self.assertEqual(var.priority, 2) + + self.assertEqual(var.domain, ModelingRealms.ocean) + + def test_get_modelling_realm(self): + var = Variable() + domain = var.get_modelling_realm(('ocean',)) + self.assertEqual(ModelingRealms.ocean, domain) + + domain = var.get_modelling_realm(('ocean', 'atmos')) + self.assertEqual(ModelingRealms.ocean, domain) + + domain = var.get_modelling_realm(('ocean', 'ocnBgchem')) + self.assertEqual(ModelingRealms.ocnBgchem, domain) + + domain = var.get_modelling_realm(('ocean', 'seaIce')) + self.assertEqual(ModelingRealms.seaIce, domain) + + domain = var.get_modelling_realm(('atmos', 'atmosChem')) + self.assertEqual(ModelingRealms.atmosChem, domain) + + domain = var.get_modelling_realm(('land', 'landIce')) + self.assertEqual(ModelingRealms.landIce, domain) + + domain = var.get_modelling_realm(tuple()) + self.assertIsNone(domain) + + def test_parse_csv(self): + var = Variable() + var.parse_csv(['not_used', 'out_name', 'standard_name', 'long_name', 'ocean', 'global', 'units', + 'valid_min', 'valid_max', 'grid', 'Amon: ']) + self.assertEqual(var.short_name, 'out_name') + self.assertEqual(var.standard_name, 'standard_name') + self.assertEqual(var.long_name, 'long_name') + + self.assertEqual(var.valid_min, 'valid_min') + self.assertEqual(var.valid_max, 'valid_max') + self.assertEqual(var.units, 'units') + self.assertEqual(var.grid, 'grid') + + self.assertEqual(var.domain, ModelingRealms.ocean) + self.assertEqual(var.basin, Basins().Global) + + def test_get_table(self): + var = Variable() + var.domain = ModelingRealms.atmos + table = var.get_table(Frequencies.monthly, 'specs') + self.assertEqual(table.frequency, Frequencies.monthly) + self.assertEqual(table.name, 'Amon') + self.assertEqual(table.date, 'December 2013') + + def test_get_table_added(self): + var = Variable() + var.domain = ModelingRealms.atmos + var.add_table(CMORTable('Amon', Frequencies.monthly, 'December 2013')) + table = var.get_table(Frequencies.monthly, 'specs') + self.assertEqual(table.frequency, Frequencies.monthly) + self.assertEqual(table.name, 'Amon') + self.assertEqual(table.date, 'December 2013') + + def test_get_table_not_added(self): + var = Variable() + var.domain = ModelingRealms.atmos + var.add_table(CMORTable('Amon', Frequencies.monthly, 'December 2013')) + table = var.get_table(Frequencies.daily, 'specs') + self.assertEqual(table.frequency, Frequencies.daily) + self.assertEqual(table.name, 'day') + self.assertEqual(table.date, 'December 2013') + + def test_get_table_not_matching(self): + var = Variable() + with self.assertRaises(ValueError): + var.get_table(Frequencies.daily, 'specs') -- GitLab From 7de0c4b86ef6f80284d001f7b8e752885dad8236 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 4 Sep 2017 14:44:52 +0200 Subject: [PATCH 62/82] Added discretize --- diags.conf | 4 +- earthdiagnostics/diagnostic.py | 4 +- earthdiagnostics/frequency.py | 2 + earthdiagnostics/obsreconmanager.py | 4 + earthdiagnostics/statistics/__init__.py | 1 + .../statistics/climatologicalpercentile.py | 166 ++---------- earthdiagnostics/statistics/discretize.py | 237 ++++++++++++++++++ earthdiagnostics/work_manager.py | 1 + .../test_climatologicalpercentile.py | 24 +- test/unit/statistics/test_discretize.py | 60 +++++ 10 files changed, 332 insertions(+), 171 deletions(-) create mode 100644 earthdiagnostics/statistics/discretize.py create mode 100644 test/unit/statistics/test_discretize.py diff --git a/diags.conf b/diags.conf index b0c16c93..246ad6f4 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = climpercent,atmos,sfcWind,2000,2000,1 daysover,atmos,sfcWind,2000,2000,1 +DIAGS = discretize,atmos,sfcWind,500,0,40 climpercent,atmos,sfcWind,1984,1984,1 # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. @@ -87,7 +87,7 @@ OCEAN_TIMESTEP = 6 # CHUNK_SIZE is the size of each data file, given in months # CHUNKS is the number of chunks. You can specify less chunks than present on the experiment EXPID = testing_erainterim -STARTDATES = 20000101 +STARTDATES = 19840101 MEMBERS = 0 MEMBER_DIGITS = 1 CHUNK_SIZE = 1 diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index 90bcfa95..6330ef4e 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -201,8 +201,8 @@ class Diagnostic(Publisher): return 'Developer must override base class __str__ method' def request_chunk(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, - to_modify=False): - request = self.data_manager.request_chunk(domain, var, startdate, member, chunk, grid, box, frequency) + to_modify=False, vartype=VariableType.MEAN): + request = self.data_manager.request_chunk(domain, var, startdate, member, chunk, grid, box, frequency, vartype) if to_modify: request.add_modifier(self) self._requests.append(request) diff --git a/earthdiagnostics/frequency.py b/earthdiagnostics/frequency.py index 1473fa5a..09ebe3de 100644 --- a/earthdiagnostics/frequency.py +++ b/earthdiagnostics/frequency.py @@ -38,6 +38,8 @@ class Frequency(object): freq_str = 'clim' elif self in (Frequencies.three_hourly, Frequencies.six_hourly, Frequencies.hourly): freq_str = self.frequency[:-2] + 'hourly' + if vartype != VariableType.MEAN: + freq_str = '{0}_{1}'.format(freq_str, VariableType.to_str(vartype)) else: freq_str = 'monthly_{0}'.format(VariableType.to_str(vartype)) return freq_str diff --git a/earthdiagnostics/obsreconmanager.py b/earthdiagnostics/obsreconmanager.py index 749d0cf4..4d5d1bde 100644 --- a/earthdiagnostics/obsreconmanager.py +++ b/earthdiagnostics/obsreconmanager.py @@ -1,6 +1,8 @@ # coding=utf-8 import os +from bscearth.utils.log import Log + from earthdiagnostics.datamanager import DataManager from earthdiagnostics.variable_type import VariableType @@ -221,6 +223,7 @@ class ObsReconManager(DataManager): """ var = self._get_final_var_name(box, var) filepath = self.get_file_path(startdate, domain, var, frequency, vartype, grid, box) + Log.debug('{0} requested', filepath) return self._get_file_from_storage(filepath) def declare_chunk(self, domain, var, startdate, member, chunk, grid=None, region=None, box=None, frequency=None, @@ -263,5 +266,6 @@ class ObsReconManager(DataManager): netcdf_file = self._declare_generated_file(filepath, domain, final_name, cmor_var, self.config.data_convention, region, diagnostic, grid, vartype, original_name) netcdf_file.frequency = frequency + Log.debug('{0} will be generated', filepath) return netcdf_file diff --git a/earthdiagnostics/statistics/__init__.py b/earthdiagnostics/statistics/__init__.py index 8f29b84a..2424b993 100644 --- a/earthdiagnostics/statistics/__init__.py +++ b/earthdiagnostics/statistics/__init__.py @@ -2,3 +2,4 @@ from monthlypercentile import MonthlyPercentile from climatologicalpercentile import ClimatologicalPercentile from daysoverpercentile import DaysOverPercentile +from discretize import Discretize diff --git a/earthdiagnostics/statistics/climatologicalpercentile.py b/earthdiagnostics/statistics/climatologicalpercentile.py index a4de5472..d3d96f88 100644 --- a/earthdiagnostics/statistics/climatologicalpercentile.py +++ b/earthdiagnostics/statistics/climatologicalpercentile.py @@ -34,52 +34,27 @@ class ClimatologicalPercentile(Diagnostic): Percentiles = np.array([0.1, 0.25, 0.33, 0.5, 0.66, 0.75, 0.9]) - def __init__(self, data_manager, domain, variable, num_bins, start_year, end_year, min_value, max_value, + def __init__(self, data_manager, domain, variable, start_year, end_year, forecast_month, experiment_config): Diagnostic.__init__(self, data_manager) self.variable = variable self.domain = domain self.experiment_config = experiment_config - self.realizations = None - self.lat_coord = None - self.lon_coord = None - self.num_bins = num_bins - self._bins = None self.start_year = start_year self.end_year = end_year self.forecast_month = forecast_month self.cmor_var = data_manager.variable_list.get_variable(variable, silent=True) - if not math.isnan(min_value): - self.min_value = min_value - self.check_min_value = False - elif self.cmor_var and self.cmor_var.valid_min: - self.min_value = float(self.cmor_var.valid_min) - self.check_min_value = False - else: - self.min_value = None - self.check_min_value = True - - if not math.isnan(max_value): - self.max_value = max_value - self.check_max_value = False - elif self.cmor_var and self.cmor_var.valid_min: - self.max_value = float(self.cmor_var.valid_max) - self.check_max_value = False - else: - self.max_value = None - self.check_max_value = True def __eq__(self, other): - return self.domain == other.domain and self.variable == other.variable and self.num_bins == other.num_bins and \ - self.min_value == other.min_value and self.max_value == other.max_value and \ + return self.domain == other.domain and self.variable == other.variable and \ self.start_year == other.start_year and self.end_year == other.end_year and \ self.forecast_month == other.forecast_month def __str__(self): return 'Climatological percentile Variable: {0.domain}:{0.variable} Period: {0.start_year}-{0.end_year} ' \ - 'Forecast month: {0.forecast_month} Bins: {0.num_bins}'.format(self) + 'Forecast month: {0.forecast_month}'.format(self) @classmethod def generate_jobs(cls, diags, options): @@ -97,17 +72,13 @@ class ClimatologicalPercentile(Diagnostic): DiagnosticIntOption('start_year'), DiagnosticIntOption('end_year'), DiagnosticListIntOption('forecast_month'), - DiagnosticIntOption('bins', 2000), - DiagnosticFloatOption('min_value', float('nan')), - DiagnosticFloatOption('max_value', float('nan')), ) options = cls.process_options(options, options_available) job_list = list() for forecast_month in options['forecast_month']: job_list.append(ClimatologicalPercentile(diags.data_manager, options['domain'], options['variable'], - options['bins'], options['start_year'], options['end_year'], - options['min_value'], options['max_value'], + options['start_year'], options['end_year'], forecast_month, diags.config.experiment)) return job_list @@ -120,10 +91,11 @@ class ClimatologicalPercentile(Diagnostic): if startdate not in self.leadtime_files: self.leadtime_files[startdate] = {} Log.debug('Retrieving startdate {0}', startdate) - self.leadtime_files[startdate] = self.request_chunk(self.domain, self.variable, startdate, None, None) + self.leadtime_files[startdate] = self.request_chunk(self.domain, '{0}_dis'.format(self.variable), startdate, + None, None, vartype=VariableType.STATISTIC) def declare_data_generated(self): - var_name = '{0.variable}prct{0.start_year}{0.forecast_month}-{0.end_year}{0.forecast_month}'.format(self) + var_name = '{0.variable}prct{0.start_year}{0.forecast_month}-{0.end_year}{0.forecast_month:2d}'.format(self) self.percentiles_file = self.declare_chunk(self.domain, var_name, None, None, None, frequency=Frequencies.climatology, vartype=VariableType.STATISTIC) @@ -132,48 +104,9 @@ class ClimatologicalPercentile(Diagnostic): Runs the diagnostic """ iris.FUTURE.netcdf_promote = True - for startdate in self.leadtime_files.keys(): - Log.debug('Getting data for startdate {0}', startdate) - data_cube = self._load_cube(startdate) - self._get_value_interval(data_cube) - self._get_realizations_present(data_cube) - if self.lat_coord is None: - self.units = data_cube.units - self.lat_coord = data_cube.coord('latitude') - self.lon_coord = data_cube.coord('longitude') - Log.info('Range: [{0}, {1}]', self.min_value, self.max_value) - distribution = self._get_distribution() - percentile_values = self._calculate_percentiles(distribution) + percentile_values = self._calculate_percentiles() self._save_results(percentile_values) - def _load_cube(self, startdate): - - handler = Utils.openCdf(self.leadtime_files[startdate].local_file) - if 'realization' in handler.variables: - handler.variables[self.variable].coordinates = 'realization' - handler.close() - data_cube = iris.load_cube(self.leadtime_files[startdate].local_file) - - date = parse_date(startdate) - lead_date = add_months(date, 1, self.data_manager.config.experiment.calendar) - leadtimes = {1: PartialDateTime(lead_date.year, lead_date.month, lead_date.day)} - - def assign_leadtime(coord, x): - leadtime_month = 1 - partial_date = leadtimes[leadtime_month] - while coord.units.num2date(x) >= partial_date: - leadtime_month += 1 - try: - partial_date = leadtimes[leadtime_month] - except KeyError: - new_date = add_months(date, leadtime_month, self.data_manager.config.experiment.calendar) - partial_date = PartialDateTime(new_date.year, new_date.month, new_date.day) - leadtimes[leadtime_month] = partial_date - return leadtime_month - - iris.coord_categorisation.add_categorised_coord(data_cube, 'leadtime', 'time', assign_leadtime) - return data_cube - def _save_results(self, percentile_values): temp = TempFile.get() percentile_coord = iris.coords.DimCoord(ClimatologicalPercentile.Percentiles, long_name='percentile') @@ -190,9 +123,9 @@ class ClimatologicalPercentile(Diagnostic): self.percentiles_file.set_local_file(temp, rename_var='percent') - def _calculate_percentiles(self, distribution): + def _calculate_percentiles(self): Log.debug('Calculating percentiles') - percentiles = {} + percentiles = iris.cube.CubeList() def calculate(point_distribution): cs = np.cumsum(point_distribution) @@ -200,79 +133,16 @@ class ClimatologicalPercentile(Diagnostic): percentile_values = ClimatologicalPercentile.Percentiles * total index = np.searchsorted(cs, percentile_values) return [(self._bins[i + 1] + self._bins[i]) / 2 for i in index] - for leadtime, dist in six.iteritems(distribution): - Log.debug('Calculating leadtime {0}', leadtime) - percentiles[leadtime] = np.apply_along_axis(calculate, 0, dist) - return percentiles + for leadtime_slice in self.distribution.slices_over('leadtime'): + percentiles.append(np.apply_along_axis(calculate, 0, leadtime_slice.data)) + return percentiles.merge_cube() def _get_distribution(self): - distribution = {} for startdate in self.leadtime_files: Log.info('Getting data for startdate {0}', startdate) - data_cube = self._load_cube(startdate) - Log.debug('Discretizing file {0}', data_cube) - - for leadtime in set(data_cube.coord('leadtime').points): - Log.debug('Discretizing leadtime {0}', leadtime) - leadtime_cube = data_cube.extract(iris.Constraint(leadtime=leadtime)) - - for realization in range(self.realizations): - Log.debug('Discretizing realization {0}', realization) - try: - realization_cube = leadtime_cube.extract(iris.Constraint(realization=realization+1)) - except iris.exceptions.CoordinateNotFoundError: - realization_cube = leadtime_cube - if realization_cube is None and realization == 0: - realization_cube = leadtime_cube - if leadtime not in distribution: - distribution[leadtime] = self._calculate_distribution(realization_cube) - else: - distribution[leadtime] += self._calculate_distribution(realization_cube) - return distribution - - def _get_realizations_present(self, data_cube): - realizations = 1 - ensemble_dimensions = ('realization', 'ensemble') - for ensemble_dimension in ensemble_dimensions: - try: - realizations = data_cube.coord(ensemble_dimension).shape[0] - break - except iris.exceptions.CoordinateNotFoundError: - pass - - if self.realizations is None: - self.realizations = realizations - if realizations != self.realizations: - # noinspection PyNoneFunctionAssignment - self.realizations = min(self.realizations, realizations) - Log.warning('Different number of realizations in the data used by diagnostic {0}', self) - - def _get_value_interval(self, data_cube): - if self.check_min_value or self.check_max_value: - for time_slice in data_cube.slices_over('time'): - if self.check_min_value: - file_min = np.amin(time_slice.data) - if self.min_value is None: - self.min_value = file_min - self.min_value = min(self.min_value, file_min) - - if self.check_max_value: - file_max = np.amax(time_slice.data) - self.max_value = max(self.min_value, file_max) - - def _calculate_distribution(self, data_cube): - def calculate_histogram(time_series): - histogram, self._bins = np.histogram(time_series, bins=self.num_bins, - range=(self.min_value, self.max_value)) - return histogram - - return np.apply_along_axis(calculate_histogram, 0, data_cube.data) - - - - - - - - + data_cube = iris.load_cube(startdate.local_file) + if self.distribution is None: + self.distribution = data_cube + else: + self.distribution += data_cube diff --git a/earthdiagnostics/statistics/discretize.py b/earthdiagnostics/statistics/discretize.py new file mode 100644 index 00000000..08753ab5 --- /dev/null +++ b/earthdiagnostics/statistics/discretize.py @@ -0,0 +1,237 @@ +# coding=utf-8 +import six +from bscearth.utils.date import parse_date, add_months +from bscearth.utils.log import Log + +from earthdiagnostics.diagnostic import Diagnostic, DiagnosticVariableOption, DiagnosticDomainOption, \ + DiagnosticIntOption, DiagnosticListIntOption, DiagnosticFloatOption +from earthdiagnostics.frequency import Frequencies +from earthdiagnostics.utils import Utils, TempFile +from earthdiagnostics.variable_type import VariableType +import numpy as np +import iris +from iris.cube import Cube +import iris.coord_categorisation +from iris.time import PartialDateTime +import iris.exceptions +import iris.coords +import math + + +class Discretize(Diagnostic): + """ + Discretizes a variable + + :param data_manager: data management object + :type data_manager: DataManager + :param variable: variable to average + :type variable: str + """ + + alias = 'discretize' + "Diagnostic alias for the configuration file" + + Percentiles = np.array([0.1, 0.25, 0.33, 0.5, 0.66, 0.75, 0.9]) + + def __init__(self, data_manager, startdate, domain, variable, num_bins, min_value, max_value): + Diagnostic.__init__(self, data_manager) + + self.startdate = startdate + self.variable = variable + self.domain = domain + + self.realizations = None + self.num_bins = num_bins + self._bins = None + self.cmor_var = data_manager.variable_list.get_variable(variable, silent=True) + + if not math.isnan(min_value): + self.min_value = min_value + self.check_min_value = False + elif self.cmor_var and self.cmor_var.valid_min: + self.min_value = float(self.cmor_var.valid_min) + self.check_min_value = False + else: + self.min_value = None + self.check_min_value = True + + if not math.isnan(max_value): + self.max_value = max_value + self.check_max_value = False + elif self.cmor_var and self.cmor_var.valid_min: + self.max_value = float(self.cmor_var.valid_max) + self.check_max_value = False + else: + self.max_value = None + self.check_max_value = True + + @property + def bins(self): + if self._bins is None: + return self.num_bins + return self._bins + + @bins.setter + def bins(self, value): + self._bins = value + + def __eq__(self, other): + return self.domain == other.domain and self.variable == other.variable and self.num_bins == other.num_bins and \ + self.min_value == other.min_value and self.max_value == other.max_value and \ + self.startdate == other.startdate + + def __str__(self): + return 'Discretizing variable: {0.domain}:{0.variable} Startdate: {0.startdate} ' \ + 'Bins: {0.num_bins} Range: [{0.min_value}, {0.max_value}]'.format(self) + + @classmethod + def generate_jobs(cls, diags, options): + """ + Creates a job for each chunk to compute the diagnostic + + :param diags: Diagnostics manager class + :type diags: Diags + :param options: domain, variable, percentil number, maximum depth (level) + :type options: list[str] + :return: + """ + options_available = (DiagnosticDomainOption(), + DiagnosticVariableOption(), + DiagnosticIntOption('bins', 2000), + DiagnosticFloatOption('min_value', float('nan')), + DiagnosticFloatOption('max_value', float('nan')), + ) + options = cls.process_options(options, options_available) + + job_list = list() + for startdate in diags.config.experiment.startdates: + job_list.append(Discretize(diags.data_manager, startdate, options['domain'], options['variable'], + options['bins'], options['min_value'], options['max_value'])) + return job_list + + def request_data(self): + self.original_data = self.request_chunk(self.domain, self.variable, self.startdate, None, None) + + def declare_data_generated(self): + var_name = '{0.variable}_dis'.format(self) + self.discretized_data = self.declare_chunk(self.domain, var_name, self.startdate, None, None, + vartype=VariableType.STATISTIC) + + def compute(self): + """ + Runs the diagnostic + """ + iris.FUTURE.netcdf_promote = True + self._load_cube() + self._get_value_interval() + Log.info('Range: [{0}, {1}]', self.min_value, self.max_value) + self._get_distribution() + self._save_results() + + def _load_cube(self): + + handler = Utils.openCdf(self.original_data.local_file) + if 'realization' in handler.variables: + handler.variables[self.variable].coordinates = 'realization' + handler.close() + data_cube = iris.load_cube(self.original_data.local_file) + + date = parse_date(self.startdate) + lead_date = add_months(date, 1, self.data_manager.config.experiment.calendar) + leadtimes = {1: PartialDateTime(lead_date.year, lead_date.month, lead_date.day)} + + def assign_leadtime(coord, x): + leadtime_month = 1 + partial_date = leadtimes[leadtime_month] + while coord.units.num2date(x) >= partial_date: + leadtime_month += 1 + try: + partial_date = leadtimes[leadtime_month] + except KeyError: + new_date = add_months(date, leadtime_month, self.data_manager.config.experiment.calendar) + partial_date = PartialDateTime(new_date.year, new_date.month, new_date.day) + leadtimes[leadtime_month] = partial_date + return leadtime_month + + iris.coord_categorisation.add_categorised_coord(data_cube, 'leadtime', 'time', assign_leadtime) + self.data_cube = data_cube + + def _save_results(self): + Log.debug('Saving results...') + + bins = np.zeros(self.num_bins) + bins_bounds = np.zeros((self.num_bins, 2)) + + for x in range(self.num_bins): + bins[x] = (self.bins[x+1] - self.bins[x]) / 2 + self.bins[x] + bins_bounds[x, 0] = self.bins[x] + bins_bounds[x, 1] = self.bins[x+1] + + bins_coord = iris.coords.DimCoord(bins, var_name='bin', units=self.data_cube.units, bounds=bins_bounds) + + cubes = iris.cube.CubeList() + + for leadtime, distribution in self.distribution.iteritems(): + leadtime_cube = Cube(distribution.astype(np.uint32), var_name=self.data_cube.var_name, + standard_name=self.data_cube.standard_name, units='1') + leadtime_cube.add_dim_coord(bins_coord, 0) + leadtime_cube.add_dim_coord(self.data_cube.coord('latitude'), 1) + leadtime_cube.add_dim_coord(self.data_cube.coord('longitude'), 2) + leadtime_cube.add_aux_coord(iris.coords.AuxCoord(np.array((leadtime,), np.int8), var_name='leadtime', units='months')) + cubes.append(leadtime_cube) + temp = TempFile.get() + iris.FUTURE.netcdf_no_unlimited = True + iris.save(cubes.merge_cube(), temp, zlib=True) + self.discretized_data.set_local_file(temp, rename_var=self.data_cube.var_name) + + def _get_distribution(self): + self.distribution = {} + Log.debug('Discretizing...') + + for leadtime in set(self.data_cube.coord('leadtime').points): + Log.debug('Discretizing leadtime {0}', leadtime) + leadtime_cube = self.data_cube.extract(iris.Constraint(leadtime=leadtime)) + + for realization in self.data_cube.coord('realization').points: + Log.debug('Discretizing realization {0}', realization) + try: + realization_cube = leadtime_cube.extract(iris.Constraint(realization=realization)) + except iris.exceptions.CoordinateNotFoundError: + realization_cube = leadtime_cube + if realization_cube is None and realization == 0: + realization_cube = leadtime_cube + if leadtime not in self.distribution: + self.distribution[leadtime] = self._calculate_distribution(realization_cube) + else: + self.distribution[leadtime] += self._calculate_distribution(realization_cube) + + def _get_value_interval(self): + if self.check_min_value or self.check_max_value: + Log.debug('Calculating max and min values...') + for time_slice in self.data_cube.slices_over('time'): + if self.check_min_value: + file_min = np.amin(time_slice.data) + if self.min_value is None: + self.min_value = file_min + self.min_value = min(self.min_value, file_min) + + if self.check_max_value: + file_max = np.amax(time_slice.data) + self.max_value = max(self.max_value, file_max) + + def _calculate_distribution(self, data_cube): + def calculate_histogram(time_series): + histogram, self.bins = np.histogram(time_series, bins=self.bins, + range=(self.min_value, self.max_value)) + return histogram + + return np.apply_along_axis(calculate_histogram, 0, data_cube.data) + + + + + + + + + diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index 450cc9e5..ddb82bd5 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -182,6 +182,7 @@ class WorkManager(object): Diagnostic.register(MonthlyPercentile) Diagnostic.register(ClimatologicalPercentile) Diagnostic.register(DaysOverPercentile) + Diagnostic.register(Discretize) @staticmethod def _register_general_diagnostics(): diff --git a/test/unit/statistics/test_climatologicalpercentile.py b/test/unit/statistics/test_climatologicalpercentile.py index 76fd8553..93636693 100644 --- a/test/unit/statistics/test_climatologicalpercentile.py +++ b/test/unit/statistics/test_climatologicalpercentile.py @@ -25,32 +25,18 @@ class TestClimatologicalPercentile(TestCase): jobs = ClimatologicalPercentile.generate_jobs(self.diags, ['climpercent', 'ocean', 'var', '2000', '2001', '11']) self.assertEqual(len(jobs), 1) self.assertEqual(jobs[0], ClimatologicalPercentile(self.data_manager, ModelingRealms.ocean, 'var', - 2000, 2000, 2001, float('nan'), float('nan'), 11, - self.diags.config.experiment)) - - jobs = ClimatologicalPercentile.generate_jobs(self.diags, ['climpercent', 'ocean', 'var', '2000', '2001', '11', - '', '0', '40']) - self.assertEqual(len(jobs), 1) - self.assertEqual(jobs[0], ClimatologicalPercentile(self.data_manager, ModelingRealms.ocean, 'var', - 2000, 2000, 2001, 0.0, 40.0, 11, - self.diags.config.experiment)) - - jobs = ClimatologicalPercentile.generate_jobs(self.diags, ['climpercent', 'ocean', 'var', '2000', '2001', '7', - '1000', '', '10']) - self.assertEqual(len(jobs), 1) - self.assertEqual(jobs[0], ClimatologicalPercentile(self.data_manager, ModelingRealms.ocean, 'var', - 1000, 2000, 2001, float('nan'), 10.0, 7, + 2000, 2001, 11, self.diags.config.experiment)) with self.assertRaises(Exception): ClimatologicalPercentile.generate_jobs(self.diags, ['climpercent']) with self.assertRaises(Exception): - ClimatologicalPercentile.generate_jobs(self.diags, ['climpercent', '0', '0', '0', '0', '0', '0', '0', '0']) + ClimatologicalPercentile.generate_jobs(self.diags, ['climpercent', 'ocean', 'var', '2000', '2001', '11', + 'extra']) def test_str(self): diagnostic = ClimatologicalPercentile(self.data_manager, ModelingRealms.ocean, 'var', - 1000, 2000, 2001, float('nan'), float('nan'), 11, - self.diags.config.experiment) + 2000, 2001, 11, self.diags.config.experiment) self.assertEquals(str(diagnostic), 'Climatological percentile Variable: ocean:var Period: 2000-2001 ' - 'Forecast month: 11 Bins: 1000') + 'Forecast month: 11') diff --git a/test/unit/statistics/test_discretize.py b/test/unit/statistics/test_discretize.py new file mode 100644 index 00000000..402a3772 --- /dev/null +++ b/test/unit/statistics/test_discretize.py @@ -0,0 +1,60 @@ +# coding=utf-8 +from unittest import TestCase + +from earthdiagnostics.statistics.discretize import Discretize +from earthdiagnostics.diagnostic import DiagnosticVariableOption +from mock import Mock, patch + +from earthdiagnostics.modelingrealm import ModelingRealms + + +class TestClimatologicalPercentile(TestCase): + + def setUp(self): + self.data_manager = Mock() + self.data_manager.variable_list.get_variable.return_value = None + + self.diags = Mock() + self.diags.data_manager = self.data_manager + self.diags.config.experiment.startdates = ('20000101', '20010101') + + def fake_parse(self, value): + return value + + @patch.object(DiagnosticVariableOption, 'parse', fake_parse) + def test_generate_jobs(self): + jobs = Discretize.generate_jobs(self.diags, ['discretize', 'ocean', 'var']) + self.assertEqual(len(jobs), 2) + self.assertEqual(jobs[0], Discretize(self.data_manager, '20000101', ModelingRealms.ocean, 'var', + 2000, float('nan'), float('nan'))) + self.assertEqual(jobs[1], Discretize(self.data_manager, '20010101', ModelingRealms.ocean, 'var', + 2000, float('nan'), float('nan'))) + + jobs = Discretize.generate_jobs(self.diags, ['discretize', 'ocean', 'var', '500']) + self.assertEqual(jobs[0], Discretize(self.data_manager, '20000101', ModelingRealms.ocean, 'var', + 500, float('nan'), float('nan'))) + self.assertEqual(jobs[1], Discretize(self.data_manager, '20010101', ModelingRealms.ocean, 'var', + 500, float('nan'), float('nan'))) + + jobs = Discretize.generate_jobs(self.diags, ['discretize', 'ocean', 'var', '500', '0', '40']) + self.assertEqual(jobs[0], Discretize(self.data_manager, '20000101', ModelingRealms.ocean, 'var', + 500, 0, 40)) + self.assertEqual(jobs[1], Discretize(self.data_manager, '20010101', ModelingRealms.ocean, 'var', + 500, 0, 40)) + + with self.assertRaises(Exception): + Discretize.generate_jobs(self.diags, ['discretize']) + with self.assertRaises(Exception): + Discretize.generate_jobs(self.diags, ['discretize', 'ocean', 'var', '500', '0', '40', 'extra']) + + def test_str(self): + diagnostic = Discretize(self.data_manager, '20000101', ModelingRealms.ocean, 'var', 2000, 10, 40) + + self.assertEquals(str(diagnostic), 'Discretizing variable: ocean:var Startdate: 20000101 Bins: 2000 ' + 'Range: [10, 40]') + + diagnostic = Discretize(self.data_manager, '20000101', ModelingRealms.ocean, 'var', 2000, + float('nan'), float('nan')) + + self.assertEquals(str(diagnostic), 'Discretizing variable: ocean:var Startdate: 20000101 Bins: 2000 ' + 'Range: [None, None]') -- GitLab From 905a187a6f2b6bfcb958a9301c3034a449ef8803 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 4 Sep 2017 17:05:09 +0200 Subject: [PATCH 63/82] Climpercent adapted to reading discretized variables --- diags.conf | 4 +-- .../statistics/climatologicalpercentile.py | 33 +++++++++++-------- launch_diags.sh | 3 +- 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/diags.conf b/diags.conf index 246ad6f4..b3b57b1b 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = discretize,atmos,sfcWind,500,0,40 climpercent,atmos,sfcWind,1984,1984,1 +DIAGS = discretize,atmos,sfcWind,2000,0,40 climpercent,atmos,sfcWind,1984,1984,1 # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. @@ -87,7 +87,7 @@ OCEAN_TIMESTEP = 6 # CHUNK_SIZE is the size of each data file, given in months # CHUNKS is the number of chunks. You can specify less chunks than present on the experiment EXPID = testing_erainterim -STARTDATES = 19840101 +STARTDATES = 19911101 19921101 MEMBERS = 0 MEMBER_DIGITS = 1 CHUNK_SIZE = 1 diff --git a/earthdiagnostics/statistics/climatologicalpercentile.py b/earthdiagnostics/statistics/climatologicalpercentile.py index d3d96f88..c5cbb704 100644 --- a/earthdiagnostics/statistics/climatologicalpercentile.py +++ b/earthdiagnostics/statistics/climatologicalpercentile.py @@ -44,8 +44,9 @@ class ClimatologicalPercentile(Diagnostic): self.start_year = start_year self.end_year = end_year self.forecast_month = forecast_month - self.cmor_var = data_manager.variable_list.get_variable(variable, silent=True) + self.distribution = None + self.leadtime_files = {} def __eq__(self, other): return self.domain == other.domain and self.variable == other.variable and \ @@ -86,7 +87,6 @@ class ClimatologicalPercentile(Diagnostic): return ['{0}{1:02}01'.format(year, self.forecast_month) for year in range(self.start_year, self.end_year+1)] def request_data(self): - self.leadtime_files = {} for startdate in self.requested_startdates(): if startdate not in self.leadtime_files: self.leadtime_files[startdate] = {} @@ -95,7 +95,7 @@ class ClimatologicalPercentile(Diagnostic): None, None, vartype=VariableType.STATISTIC) def declare_data_generated(self): - var_name = '{0.variable}prct{0.start_year}{0.forecast_month}-{0.end_year}{0.forecast_month:2d}'.format(self) + var_name = '{0.variable}prct{0.start_year}{0.forecast_month}-{0.end_year}{0.forecast_month:02d}'.format(self) self.percentiles_file = self.declare_chunk(self.domain, var_name, None, None, None, frequency=Frequencies.climatology, vartype=VariableType.STATISTIC) @@ -104,6 +104,7 @@ class ClimatologicalPercentile(Diagnostic): Runs the diagnostic """ iris.FUTURE.netcdf_promote = True + self._get_distribution() percentile_values = self._calculate_percentiles() self._save_results(percentile_values) @@ -111,11 +112,12 @@ class ClimatologicalPercentile(Diagnostic): temp = TempFile.get() percentile_coord = iris.coords.DimCoord(ClimatologicalPercentile.Percentiles, long_name='percentile') results = iris.cube.CubeList() - for leadtime in percentile_values.keys(): - result = iris.cube.Cube(percentile_values[leadtime], var_name='percent', units=self.units) + for leadtime, data in percentile_values.items(): + result = iris.cube.Cube(data, var_name='percent', + units=self.distribution.coord('bin').units) result.add_dim_coord(percentile_coord, 0) - result.add_dim_coord(self.lat_coord, 1) - result.add_dim_coord(self.lon_coord, 2) + result.add_dim_coord(self.distribution.coord('latitude'), 1) + result.add_dim_coord(self.distribution.coord('longitude'), 2) result.add_aux_coord(iris.coords.AuxCoord(np.int8(leadtime), long_name='leadtime')) results.append(result) iris.FUTURE.netcdf_no_unlimited = True @@ -125,24 +127,27 @@ class ClimatologicalPercentile(Diagnostic): def _calculate_percentiles(self): Log.debug('Calculating percentiles') - percentiles = iris.cube.CubeList() + percentiles = {} + + bins = self.distribution.coord('bin').points def calculate(point_distribution): cs = np.cumsum(point_distribution) total = cs[-1] percentile_values = ClimatologicalPercentile.Percentiles * total index = np.searchsorted(cs, percentile_values) - return [(self._bins[i + 1] + self._bins[i]) / 2 for i in index] + return [bins[i] for i in index] + for leadtime_slice in self.distribution.slices_over('leadtime'): - percentiles.append(np.apply_along_axis(calculate, 0, leadtime_slice.data)) - return percentiles.merge_cube() + leadtime = leadtime_slice.coord('leadtime').points[0] + percentiles[leadtime]=np.apply_along_axis(calculate, 0, leadtime_slice.data) + return percentiles def _get_distribution(self): - for startdate in self.leadtime_files: + for startdate, startdate_file in self.leadtime_files.iteritems(): Log.info('Getting data for startdate {0}', startdate) - data_cube = iris.load_cube(startdate.local_file) + data_cube = iris.load_cube(startdate_file.local_file) if self.distribution is None: self.distribution = data_cube else: self.distribution += data_cube - diff --git a/launch_diags.sh b/launch_diags.sh index 3c8797f0..01ea9c10 100755 --- a/launch_diags.sh +++ b/launch_diags.sh @@ -1,7 +1,6 @@ #!/usr/bin/env bash -#SBATCH -n 1 -#SBATCH -w gustafson +#SBATCH -n 8 #SBATCH --time 2:00:00 #SBATCH --error=job.%J.err #SBATCH --output=job.%J.out -- GitLab From 7fc69265a996a61dd743356e3617986e1a50fbe2 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 6 Sep 2017 09:31:13 +0200 Subject: [PATCH 64/82] Fixed cmorization --- diags.conf | 8 ++--- earthdiagnostics/cmorizer.py | 16 +++++---- earthdiagnostics/cmormanager.py | 58 ++++++++++++++++----------------- launch_diags.sh | 6 ++-- 4 files changed, 45 insertions(+), 43 deletions(-) diff --git a/diags.conf b/diags.conf index b3b57b1b..997f8bfa 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = discretize,atmos,sfcWind,2000,0,40 climpercent,atmos,sfcWind,1984,1984,1 +DIAGS = discretize,atmos,sfcWind,2000,0,40 climpercent,atmos,sfcWind,1991,1992,11 # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. @@ -25,8 +25,8 @@ FREQUENCY = 6hr CDFTOOLS_PATH = ~jvegas/CDFTOOLS/bin # If true, copies the mesh files regardless of presence in scratch dir RESTORE_MESHES = False -# Limits the maximum amount of threads used. Default: 0 (no limitation, one per virtual core available) -MAX_CORES = 2 +# Limits the maximum amount of threads used. Default: 0 (no limitation, one per virtual core available)z +# MAX_CORES = 2 [CMOR] # If true, recreates CMOR files regardless of presence. Default = False @@ -87,7 +87,7 @@ OCEAN_TIMESTEP = 6 # CHUNK_SIZE is the size of each data file, given in months # CHUNKS is the number of chunks. You can specify less chunks than present on the experiment EXPID = testing_erainterim -STARTDATES = 19911101 19921101 +STARTDATES = 19931101 19941101 19951101 19961101 19971101 19981101 19991101 20001101 20011101 20021101 20031101 MEMBERS = 0 MEMBER_DIGITS = 1 CHUNK_SIZE = 1 diff --git a/earthdiagnostics/cmorizer.py b/earthdiagnostics/cmorizer.py index 593180f7..e99afcd1 100644 --- a/earthdiagnostics/cmorizer.py +++ b/earthdiagnostics/cmorizer.py @@ -277,13 +277,12 @@ class Cmorizer(object): Log.info('Unpacking... ') # remap on regular Gauss grid if grid == 'SH': - Utils.cdo.splitparam(input='-sp2gpl {0}'.format(full_file), output=gribfile + '_', - options='-f nc4') + Utils.cdo.splitparam(input=['-sp2gpl {0}'.format(full_file), gribfile + '_'], options='-f nc4') else: - Utils.cdo.splitparam(input=full_file, output=gribfile + '_', options='-R -f nc4') + Utils.cdo.splitparam(input=[full_file, gribfile + '_'], options='-R -f nc4') # total precipitation (remove negative values) - Utils.cdo.setcode(228, input='-setmisstoc,0 -setvrange,0,Inf -add ' - '{0}_{{142,143}}.128.nc'.format(gribfile), + Utils.cdo.setcode(228, + input='-setmisstoc,0 -setvrange,0,Inf -add {0}_{{142,143}}.128.nc'.format(gribfile), output='{0}_228.128.nc'.format(gribfile)) Utils.remove_file('ICM') @@ -645,12 +644,15 @@ class Cmorizer(object): gribfiles = glob.glob(grb_path) return len(gribfiles) > 0 - def cmorization_required(self, chunk, domain): + def cmorization_required(self, chunk, domains): if not self.config.cmor.chunk_cmorization_requested(chunk): return False if self.config.cmor.force: return True - return not self.data_manager.is_cmorized(self.startdate, self.member, chunk, domain) + for domain in domains: + if self.data_manager.is_cmorized(self.startdate, self.member, chunk, domain): + return False + return True class CMORException(Exception): diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index c2d30f2f..32698f9e 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -377,46 +377,45 @@ class CMORManager(DataManager): if not self._unpack_cmor_files(startdate, member): self._cmorize_member(startdate, member) - def is_cmorized(self, startdate, member, chunk): + def is_cmorized(self, startdate, member, chunk, domain): identifier = (startdate, member, chunk) if identifier not in self._dic_cmorized: - self._dic_cmorized[identifier] = self._is_cmorized(startdate, member, chunk) - if self._dic_cmorized[identifier]: - return True - return False + self._dic_cmorized[identifier] = {} + self._dic_cmorized[identifier][domain] = self._is_cmorized(startdate, member, chunk, domain) + elif domain not in self._dic_cmorized[identifier]: + self._dic_cmorized[identifier][domain] = self._is_cmorized(startdate, member, chunk, domain) + return self._dic_cmorized[identifier][domain] + - def _is_cmorized(self, startdate, member, chunk): + def _is_cmorized(self, startdate, member, chunk, domain): startdate_path = self._get_startdate_path(startdate) if not os.path.isdir(startdate_path): return False if self.config.data_convention == 'specs': for freq in os.listdir(startdate_path): - for domain in (ModelingRealms.ocean, ModelingRealms.ocnBgchem, ModelingRealms.ocnBgchem, - ModelingRealms.atmos): - domain_path = os.path.join(startdate_path, freq, - domain.name) - if os.path.isdir(domain_path): - for var in os.listdir(domain_path): - cmor_var = self.variable_list.get_variable(var, True) - var_path = self.get_file_path(startdate, member, domain, var, cmor_var, chunk, - Frequency(freq)) - if os.path.isfile(var_path): - return True + domain_path = os.path.join(startdate_path, freq, + domain.name) + if os.path.isdir(domain_path): + for var in os.listdir(domain_path): + cmor_var = self.variable_list.get_variable(var, True) + var_path = self.get_file_path(startdate, member, domain, var, cmor_var, chunk, + Frequency(freq)) + if os.path.isfile(var_path): + return True else: member_path = os.path.join(startdate_path, self._get_member_str(member)) if not os.path.isdir(member_path): return False - for table, domain, freq in (('Amon', ModelingRealms.atmos, Frequencies.monthly), - ('Omon', ModelingRealms.ocean, Frequencies.monthly), - ('SImon', ModelingRealms.seaIce, Frequencies.monthly)): - table_dir = os.path.join(member_path, table) - if not os.path.isdir(table_dir): - continue - for var in os.listdir(table_dir): - cmor_var = self.variable_list.get_variable(var, True) - var_path = self.get_file_path(startdate, member, domain, var, cmor_var, chunk, frequency=freq) - if os.path.isfile(var_path): - return True + freq = Frequencies.monthly + table = domain.get_table(freq, self.config.data_convention) + table_dir = os.path.join(member_path, table) + if not os.path.isdir(table_dir): + return False + for var in os.listdir(table_dir): + cmor_var = self.variable_list.get_variable(var, True) + var_path = self.get_file_path(startdate, member, domain, var, cmor_var, chunk, frequency=freq) + if os.path.isfile(var_path): + return True return False def _cmorize_member(self, startdate, member): @@ -436,7 +435,8 @@ class CMORManager(DataManager): cmorized = False if not self.config.cmor.force_untar: - while self.is_cmorized(startdate, member, chunk): + while self.is_cmorized(startdate, member, chunk, ModelingRealms.atmos) or \ + self.is_cmorized(startdate, member, chunk, ModelingRealms.ocean): chunk += 1 while self._unpack_chunk(startdate, member, chunk): diff --git a/launch_diags.sh b/launch_diags.sh index 01ea9c10..5fc3cc96 100755 --- a/launch_diags.sh +++ b/launch_diags.sh @@ -1,13 +1,13 @@ #!/usr/bin/env bash #SBATCH -n 8 -#SBATCH --time 2:00:00 +#SBATCH --time 7-00:00:00 #SBATCH --error=job.%J.err #SBATCH --output=job.%J.out -PATH_TO_CONF_FILE=~vtorralb/diags-erai.conf +PATH_TO_CONF_FILE=~jvegas/earthdiagnostics/diags.conf PATH_TO_DIAGNOSTICS=~jvegas/earthdiagnostics PATH_TO_CONDAENV=~jvegas/anaconda/envs/earthdiags/bin @@ -22,4 +22,4 @@ source ${PATH_TO_CONDAENV}/activate earthdiags export PYTHONPATH=${PATH_TO_DIAGNOSTICS}:${PYTHONPATH} cd ${PATH_TO_DIAGNOSTICS}/earthdiagnostics/ -./earthdiags.py -f ${PATH_TO_CONF_FILE} +./earthdiags.py -lc DEBUG -f ${PATH_TO_CONF_FILE} -- GitLab From 3d6ee65f2caa4be3df63d30466edafcaaad3a203 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 6 Sep 2017 16:14:41 +0200 Subject: [PATCH 65/82] Fixed to work with conda envs --- earthdiagnostics/cmorizer.py | 4 ++-- earthdiagnostics/earthdiags.py | 3 +++ setup.py | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/earthdiagnostics/cmorizer.py b/earthdiagnostics/cmorizer.py index e99afcd1..81f43669 100644 --- a/earthdiagnostics/cmorizer.py +++ b/earthdiagnostics/cmorizer.py @@ -277,9 +277,9 @@ class Cmorizer(object): Log.info('Unpacking... ') # remap on regular Gauss grid if grid == 'SH': - Utils.cdo.splitparam(input=['-sp2gpl {0}'.format(full_file), gribfile + '_'], options='-f nc4') + Utils.cdo.splitparam(input='-sp2gpl {0}'.format(full_file), output=gribfile + '_', options='-f nc4') else: - Utils.cdo.splitparam(input=[full_file, gribfile + '_'], options='-R -f nc4') + Utils.cdo.splitparam(input=full_file, output=gribfile + '_', options='-R -f nc4') # total precipitation (remove negative values) Utils.cdo.setcode(228, input='-setmisstoc,0 -setvrange,0,Inf -add {0}_{{142,143}}.128.nc'.format(gribfile), diff --git a/earthdiagnostics/earthdiags.py b/earthdiagnostics/earthdiags.py index 72c3cba9..e8dae22a 100755 --- a/earthdiagnostics/earthdiags.py +++ b/earthdiagnostics/earthdiags.py @@ -20,6 +20,7 @@ from earthdiagnostics.utils import TempFile, Utils from earthdiagnostics.variable import VariableManager from work_manager import WorkManager +from distutils.spawn import find_executable class EarthDiags(object): @@ -101,6 +102,8 @@ class EarthDiags(object): Utils.cdo.debug = True Utils.nco.debug = False # This is due to a bug in nco. Must change when it's solved + Utils.cdo.CDO = find_executable('cdo') + if args.logfilepath: Log.set_file(bscearth.utils.path.expand_path(args.logfilepath)) diff --git a/setup.py b/setup.py index 3a4c38c5..3c64109a 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ setup( url='http://www.bsc.es/projects/earthsciences/autosubmit/', keywords=['climate', 'weather', 'diagnostic'], setup_requires=['pyproj'], - install_requires=['numpy', 'netCDF4', 'bscearth.utils', 'cdo', 'nco>=0.0.3', 'iris>=1.12.0', 'coverage', + install_requires=['numpy', 'netCDF4', 'bscearth.utils', 'cdo>=1.3.4', 'nco>=0.0.3', 'iris>=1.12.0', 'coverage', 'pygrib', 'openpyxl', 'mock', 'futures', 'cf_units', 'cfunits', 'xxhash'], packages=find_packages(), include_package_data=True, -- GitLab From d2cbdc591d45f2cc47640d43c3cea53305685a94 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Fri, 8 Sep 2017 10:12:12 +0200 Subject: [PATCH 66/82] Fixed bug in cmorization --- earthdiagnostics/cmorizer.py | 24 +++++++++++++++++++++--- earthdiagnostics/datafile.py | 2 +- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/earthdiagnostics/cmorizer.py b/earthdiagnostics/cmorizer.py index 81f43669..1381e331 100644 --- a/earthdiagnostics/cmorizer.py +++ b/earthdiagnostics/cmorizer.py @@ -14,6 +14,7 @@ from earthdiagnostics.frequency import Frequency, Frequencies from earthdiagnostics.modelingrealm import ModelingRealms from earthdiagnostics.utils import TempFile, Utils from earthdiagnostics.variable import VariableManager +from earthdiagnostics.datafile import NetCDFFile class Cmorizer(object): @@ -421,9 +422,26 @@ class Cmorizer(object): raise CMORException('Variable {0}:{1} can not be cmorized. Original filename does not match a recognized ' 'pattern'.format(var_cmor.domain, var_cmor.short_name)) - self.data_manager.send_file(temp, var_cmor.domain, var_cmor.short_name, self.startdate, self.member, - frequency=frequency, rename_var=variable, date_str=date_str, region=region, - move_old=True, grid=alias.grid, cmorized=True) + netcdf_file = NetCDFFile() + netcdf_file.data_manager = self.data_manager + netcdf_file.local_file = temp + netcdf_file.remote_file = self.data_manager.get_file_path(self.startdate, self.member, + var_cmor.domain, var_cmor.short_name, var_cmor, + None, frequency, + grid=alias.grid, year=None, date_str=date_str) + + netcdf_file.data_convention = self.config.data_convention + netcdf_file.region = region + + netcdf_file.frequency = frequency + netcdf_file.domain = var_cmor.domain + netcdf_file.var = var_cmor.short_name + netcdf_file.final_name = var_cmor.short_name + + netcdf_file.prepare_to_upload(rename_var=variable) + netcdf_file.add_cmorization_history() + netcdf_file.upload() + if region: region_str = ' (Region {})'.format(region) else: diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index 80484ace..0ae84dd2 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -445,7 +445,7 @@ class NetCDFFile(DataFile): def create_link(self): try: self.data_manager.create_link(self.domain, self.remote_file, self.frequency, self.final_name, - self.grid, False, self.var_type) + self.grid, True, self.var_type) except Exception as ex: Log.error('Can not create link to {1}: {0}'.format(ex, self.remote_file)) -- GitLab From e04dfc074f6aecc580929f650185b4c98d25da80 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Fri, 8 Sep 2017 12:38:12 +0200 Subject: [PATCH 67/82] Fixed errors appearing when jobs failed --- earthdiagnostics/datafile.py | 2 +- earthdiagnostics/ocean/interpolate.py | 12 ++++++++---- earthdiagnostics/publisher.py | 5 ++--- test/unit/test_publisher.py | 6 +++--- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index 0ae84dd2..727c8c76 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -57,7 +57,7 @@ class DataFile(Publisher): def unsubscribe(self, who): super(DataFile, self).unsubscribe(who) - if self.local_status == LocalStatus.READY and len(self.subscribers) == 0: + if self.local_status == LocalStatus.READY and not self.suscribers: os.remove(self.local_file) def upload_required(self): diff --git a/earthdiagnostics/ocean/interpolate.py b/earthdiagnostics/ocean/interpolate.py index 26c577b0..ea64a0c9 100644 --- a/earthdiagnostics/ocean/interpolate.py +++ b/earthdiagnostics/ocean/interpolate.py @@ -172,12 +172,16 @@ class Interpolate(Diagnostic): nco.ncwa(input=temp, output=temp, options=('-O -h -a lev',)) else: shutil.copy(input_file, temp) + + weights_file = '/esnas/autosubmit/con_files/weigths/{0}/rmp_{0}_to_{1}_lev{2}.nc'.format(self.model_version, + self.grid, lev + 1) + if not os.path.isfile(weights_file): + raise Exception('Level {0} weights file does not exist for model {1} ' + 'and grid {2}'.format(lev+1, self.model_version, self.grid)) namelist_file = TempFile.get(suffix='') scrip_use_in = open(namelist_file, 'w') scrip_use_in.writelines("&remap_inputs\n") - scrip_use_in.writelines(" remap_wgt = '/esnas/autosubmit/con_files/" - "weigths/{0}/rmp_{0}_to_{1}_lev{2}.nc'\n".format(self.model_version, self.grid, - lev + 1)) + scrip_use_in.writelines(" remap_wgt = '{0}'\n".format(weights_file)) scrip_use_in.writelines(" infile = '{0}'\n".format(temp)) scrip_use_in.writelines(" invertlat = FALSE\n") scrip_use_in.writelines(" var = '{0}'\n".format(self.variable)) @@ -186,7 +190,7 @@ class Interpolate(Diagnostic): scrip_use_in.writelines("/\n") scrip_use_in.close() Utils.execute_shell_command('/home/Earth/jvegas/pyCharm/cfutools/interpolation/scrip_use ' - '{0}'.format(namelist_file), Log.DEBUG) + '{0}'.format(namelist_file), Log.INFO) os.remove(namelist_file) nco.ncecat(input=temp, output=temp, options=("-O -h",)) shutil.move(temp, self._get_level_file(lev)) diff --git a/earthdiagnostics/publisher.py b/earthdiagnostics/publisher.py index d1a7c856..928d3eca 100644 --- a/earthdiagnostics/publisher.py +++ b/earthdiagnostics/publisher.py @@ -37,10 +37,9 @@ class Publisher(object): for subscriber, callback in self._subscribers.items(): callback(*args) + @property def suscribers(self, *args): """ - Notify update to all the suscribers - - :param args: arguments to pass + List of suscribers of this publisher """ return self._subscribers.keys() diff --git a/test/unit/test_publisher.py b/test/unit/test_publisher.py index 25d8c034..5fe325ff 100644 --- a/test/unit/test_publisher.py +++ b/test/unit/test_publisher.py @@ -10,14 +10,14 @@ class TestPublisher(TestCase): suscriber = Mock() pub = Publisher() pub.subscribe(suscriber, callback=suscriber.callback) - self.assertIn(suscriber, pub.suscribers()) + self.assertIn(suscriber, pub.suscribers) def test_suscribe_default(self): suscriber = Mock() pub = Publisher() pub.subscribe(suscriber) self.assertTrue(hasattr(suscriber, 'update')) - self.assertIn(suscriber, pub.suscribers()) + self.assertIn(suscriber, pub.suscribers) def test_unsuscribe(self): suscriber = Mock() @@ -25,7 +25,7 @@ class TestPublisher(TestCase): pub.subscribe(suscriber, callback=suscriber.callback) pub.unsubscribe(suscriber) - self.assertNotIn(suscriber, pub.suscribers()) + self.assertNotIn(suscriber, pub.suscribers) def test_dispatch(self): suscriber = Mock() -- GitLab From e1dbf8f6c3112af19733e09afda10e36e5aff036 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 12 Sep 2017 14:15:27 +0200 Subject: [PATCH 68/82] Fixed module diagnostic --- earthdiagnostics/datafile.py | 6 +++--- earthdiagnostics/general/module.py | 14 +++++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index 727c8c76..88b9495c 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -202,9 +202,9 @@ class DataFile(Publisher): else: valid_max = '' Utils.nco.ncatted(input=self.local_file, output=self.local_file, - options='-O -a _FillValue,{0},o,{1},"1.e20" ' - '-a missingValue,{0},o,{1},"1.e20" {2}{3}'.format(self.final_name, var_type.char, - valid_min, valid_max)) + options=('-O -a _FillValue,{0},o,{1},"1.e20" ' + '-a missingValue,{0},o,{1},"1.e20" {2}{3}'.format(self.final_name, var_type.char, + valid_min, valid_max),)) def _fix_coordinate_variables_metadata(self, handler): if 'lev' in handler.variables: diff --git a/earthdiagnostics/general/module.py b/earthdiagnostics/general/module.py index f22f37bb..abf77c8a 100644 --- a/earthdiagnostics/general/module.py +++ b/earthdiagnostics/general/module.py @@ -79,10 +79,10 @@ class Module(Diagnostic): return job_list def request_data(self): - self.component_u_file = self.data_manager.get_file(self.domain, self.componentu, self.startdate, self.member, - self.chunk, grid=self.grid) - self.component_v_file = self.data_manager.get_file(self.domain, self.componentv, self.startdate, self.member, - self.chunk, grid=self.grid) + self.component_u_file = self.request_chunk(self.domain, self.componentu, self.startdate, self.member, + self.chunk, grid=self.grid) + self.component_v_file = self.request_chunk(self.domain, self.componentv, self.startdate, self.member, + self.chunk, grid=self.grid) def declare_data_generated(self): self.module_file = self.declare_chunk(self.domain, self.module, self.startdate, self.member, self.chunk, @@ -93,9 +93,9 @@ class Module(Diagnostic): Runs the diagnostic """ temp = TempFile.get() - Utils.copy_file(self.component_u_file, temp) + Utils.copy_file(self.component_u_file.local_file, temp) component_u = Utils.openCdf(temp) - component_v = Utils.openCdf(self.component_v_file) + component_v = Utils.openCdf(self.component_v_file.local_file) variable_u = component_u.variables[self.componentu] variable_v = component_v.variables[self.componentv] @@ -109,4 +109,4 @@ class Module(Diagnostic): component_u.close() component_v.close() - self.module_file.set_local_file(temp, rename_var=self.componentu) + self.module_file.set_local_file(temp, rename_var=self.componentu) \ No newline at end of file -- GitLab From e512b9a32bd1fc72dfddaec22f0a1eeac0764200 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 12 Sep 2017 14:15:52 +0200 Subject: [PATCH 69/82] Add memory tracker for discretize diagnostic --- diags.conf | 8 +++++--- earthdiagnostics/statistics/discretize.py | 23 ++++++++++++++++++----- launch_diags.sh | 7 ++++--- setup.py | 2 +- 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/diags.conf b/diags.conf index 997f8bfa..efda37e4 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,8 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = discretize,atmos,sfcWind,2000,0,40 climpercent,atmos,sfcWind,1991,1992,11 +DIAGS = discretize,atmos,sfcWind,2000,0,40 +# climpercent,atmos,sfcWind,1991,1992,11 # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. @@ -26,7 +27,7 @@ CDFTOOLS_PATH = ~jvegas/CDFTOOLS/bin # If true, copies the mesh files regardless of presence in scratch dir RESTORE_MESHES = False # Limits the maximum amount of threads used. Default: 0 (no limitation, one per virtual core available)z -# MAX_CORES = 2 +MAX_CORES = 1 [CMOR] # If true, recreates CMOR files regardless of presence. Default = False @@ -87,7 +88,8 @@ OCEAN_TIMESTEP = 6 # CHUNK_SIZE is the size of each data file, given in months # CHUNKS is the number of chunks. You can specify less chunks than present on the experiment EXPID = testing_erainterim -STARTDATES = 19931101 19941101 19951101 19961101 19971101 19981101 19991101 20001101 20011101 20021101 20031101 +# STARTDATES = 19931101 19941101 19951101 19961101 19971101 19981101 19991101 20001101 20011101 20021101 20031101 +STARTDATES = 19840101 19850101 MEMBERS = 0 MEMBER_DIGITS = 1 CHUNK_SIZE = 1 diff --git a/earthdiagnostics/statistics/discretize.py b/earthdiagnostics/statistics/discretize.py index 08753ab5..bd0470c7 100644 --- a/earthdiagnostics/statistics/discretize.py +++ b/earthdiagnostics/statistics/discretize.py @@ -1,11 +1,9 @@ # coding=utf-8 -import six from bscearth.utils.date import parse_date, add_months from bscearth.utils.log import Log from earthdiagnostics.diagnostic import Diagnostic, DiagnosticVariableOption, DiagnosticDomainOption, \ - DiagnosticIntOption, DiagnosticListIntOption, DiagnosticFloatOption -from earthdiagnostics.frequency import Frequencies + DiagnosticIntOption, DiagnosticFloatOption from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.variable_type import VariableType import numpy as np @@ -16,6 +14,8 @@ from iris.time import PartialDateTime import iris.exceptions import iris.coords import math +import psutil +import six class Discretize(Diagnostic): @@ -65,6 +65,11 @@ class Discretize(Diagnostic): self.max_value = None self.check_max_value = True + self.process = psutil.Process() + + def print_memory_used(self): + Log.user_warning('Memory: {0:.2f} GB'.format(self.process.memory_info().rss / 1024.0**3)) + @property def bins(self): if self._bins is None: @@ -121,12 +126,20 @@ class Discretize(Diagnostic): """ Runs the diagnostic """ + self.print_memory_used() iris.FUTURE.netcdf_promote = True self._load_cube() + self.print_memory_used() self._get_value_interval() + self.print_memory_used() Log.info('Range: [{0}, {1}]', self.min_value, self.max_value) self._get_distribution() + self.print_memory_used() self._save_results() + self.print_memory_used() + del self.distribution + del self.data_cube + self.print_memory_used() def _load_cube(self): @@ -171,7 +184,7 @@ class Discretize(Diagnostic): cubes = iris.cube.CubeList() - for leadtime, distribution in self.distribution.iteritems(): + for leadtime, distribution in six.iteritems(self.distribution): leadtime_cube = Cube(distribution.astype(np.uint32), var_name=self.data_cube.var_name, standard_name=self.data_cube.standard_name, units='1') leadtime_cube.add_dim_coord(bins_coord, 0) @@ -191,9 +204,9 @@ class Discretize(Diagnostic): for leadtime in set(self.data_cube.coord('leadtime').points): Log.debug('Discretizing leadtime {0}', leadtime) leadtime_cube = self.data_cube.extract(iris.Constraint(leadtime=leadtime)) - for realization in self.data_cube.coord('realization').points: Log.debug('Discretizing realization {0}', realization) + self.print_memory_used() try: realization_cube = leadtime_cube.extract(iris.Constraint(realization=realization)) except iris.exceptions.CoordinateNotFoundError: diff --git a/launch_diags.sh b/launch_diags.sh index 5fc3cc96..a7d9ff88 100755 --- a/launch_diags.sh +++ b/launch_diags.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -#SBATCH -n 8 +#SBATCH -n 1 #SBATCH --time 7-00:00:00 #SBATCH --error=job.%J.err #SBATCH --output=job.%J.out @@ -9,16 +9,17 @@ PATH_TO_CONF_FILE=~jvegas/earthdiagnostics/diags.conf PATH_TO_DIAGNOSTICS=~jvegas/earthdiagnostics -PATH_TO_CONDAENV=~jvegas/anaconda/envs/earthdiags/bin +PATH_TO_CONDAENV=diags module purge module load NCO/4.5.4-foss-2015a module load CDO/1.7.2-foss-2015a module load CDFTOOLS/3.0a8-foss-2015a +module load Miniconda2 set -xv -source ${PATH_TO_CONDAENV}/activate earthdiags +source activate diags export PYTHONPATH=${PATH_TO_DIAGNOSTICS}:${PYTHONPATH} cd ${PATH_TO_DIAGNOSTICS}/earthdiagnostics/ diff --git a/setup.py b/setup.py index 3c64109a..0a88cf58 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ setup( keywords=['climate', 'weather', 'diagnostic'], setup_requires=['pyproj'], install_requires=['numpy', 'netCDF4', 'bscearth.utils', 'cdo>=1.3.4', 'nco>=0.0.3', 'iris>=1.12.0', 'coverage', - 'pygrib', 'openpyxl', 'mock', 'futures', 'cf_units', 'cfunits', 'xxhash'], + 'pygrib', 'openpyxl', 'mock', 'futures', 'cf_units', 'cfunits', 'xxhash', 'six', 'psutil'], packages=find_packages(), include_package_data=True, scripts=['bin/earthdiags'] -- GitLab From 65823feb1ff80d13ecbbac5116b9ef78ded27bb0 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 12 Sep 2017 16:54:12 +0200 Subject: [PATCH 70/82] Updated documentation --- doc/source/config_file.rst | 40 ++++++++++++++++++++++++++++++++++++-- doc/source/tips.rst | 13 +++++++------ doc/source/tutorial.rst | 17 ++++++++-------- model_launch_diags.sh | 15 ++++++++++++++ 4 files changed, 68 insertions(+), 17 deletions(-) create mode 100755 model_launch_diags.sh diff --git a/doc/source/config_file.rst b/doc/source/config_file.rst index dde7685a..370fea73 100644 --- a/doc/source/config_file.rst +++ b/doc/source/config_file.rst @@ -53,8 +53,8 @@ Optional configurations Type of the dataset to use. It can be exp, obs or recon. Default is exp. * DATA_CONVENTION - Convention to use for file paths and names and variable naming among other things. Can be SPECS, PRIMAVERA or CMIP6. - Default is SPECS. + Convention to use for file paths and names and variable naming among other things. Can be SPECS, PREFACE, + PRIMAVERA or CMIP6. Default is SPECS. * CDFTOOLS_PATH Path to the folder containing CDFTOOLS executables. By default is empty, so CDFTOOLS binaries must be added to the @@ -65,6 +65,28 @@ Optional configurations necessary when launching through a scheduler, as Earthdiagnostics can detect how many cores the scheduler has allocated to it. +* AUTO_CLEAN + If True, EarthDiagnostics removes the temporary folder just after finsihing. If RAM_DISK is set to True, this value + is ignored and always Default is True + +* RAM_DISK + If set to True, the temporary files is created at the /dev/shm partition. This partition is not mounted from a disk. + Instead, all files are created in the RAM memory, so hopefully this will improve performance at the cost of a much + higher RAM consumption. Default is False. + +* MESH_MASK + Custom file to use instead of the corresponding mesh mask file. + +* NEW_MASK_GLO + Custom file to use instead of the corresponding new mask glo file + +* MASK_REGIONS + Custom file to use instead of the corresponding 2D regions file + +* MASK_REGIONS_3D + Custom file to use instead of the corresponding 3D regions file + + EXPERIMENT ---------- @@ -117,6 +139,9 @@ This sections contains options related to the experiment's definition or configu * CHUNKS Number of chunks to run +* CHUNK_LIST + List of chunks to run. If empty, all diagnostics will be applied to all chunks + * CALENDAR Calendar to use for date calculation. All calendars supported by Autosubmit are available. Default is 'standard' @@ -213,6 +238,17 @@ cmorized files. * SOURCE Default value is 'to be filled' +* VERSION + Dataset version to use (not present in all conventions) + +* DEFAULT_OCEAN_GRID + Name of the default ocean grid for those conventions that require it (CMIP6 and PRIMAVERA). Default is gn. + +* DEFAULT_ATMOS_GRID + Name of the default atmos grid for those conventions that require it (CMIP6 and PRIMAVERA). Default is gr. + +* ACTIVITY + Name of the activity. Default is CMIP THREDDS ------- diff --git a/doc/source/tips.rst b/doc/source/tips.rst index b9f88661..2ef74365 100644 --- a/doc/source/tips.rst +++ b/doc/source/tips.rst @@ -16,15 +16,16 @@ system, the diagnostics will always use the number of cores that you reserved. I system, the diagnostics will try to use all the cores on the machine. To avoid this, add the MAX_CORES parameter to the DIAGNOSTICS section inside the diags.conf file that you are using. -NEMO files ----------- +Cleaning temp file +------------------ + +By default, EarthDiagnostics removes the temporary directory after execution. This behaviour can be avoided be setting +ra -Unlike the bash version of the ocean diagnostics, this program keeps the NEMO files in the scratch folder so you can -launch different configurations for the same experiment with reduced start time. You will need to remove the experiment's -folder in the scratch directory at the end of the experiment to avoid wasting resources. To do this, just use +By default .. code-block:: bash earthdiags -f PATH_TO_CONF --clean -If you plan to run the earthdiagnostics only once, you can add this line after the execution \ No newline at end of file + diff --git a/doc/source/tutorial.rst b/doc/source/tutorial.rst index c2b99900..52d071c5 100644 --- a/doc/source/tutorial.rst +++ b/doc/source/tutorial.rst @@ -37,8 +37,8 @@ Creating a config file ---------------------- Go to the folder where you installed the EarthDiagnostics. You will see a folder called earthdiagnostics, -and, inside it, a diags.conf file that can be used as a model for your config file. Create a copy of it wherever it -suites you. +and, inside it, the model_diags.conf file that can be used as a template for your config file. Create a copy of it +wherever it suites you. Now open your brand new copy with your preferred text editor. The file contains commentaries explaining each one of its options, so read it carefully and edit whatever you need. Don't worry about DIAGS option, we will @@ -53,21 +53,20 @@ whichever suits you better. From now on, we will assume that you are going to ru .. hint:: For old Ocean Diagnostics users: you can use most of the old names as aliases to launch one or multiple diagnostics. - Check the ALIAS section on the diags.conf to see which ones are available. + Check the ALIAS section on the model_diags.conf to see which ones are available. -First, choose a variable that has daily data. Then replace the DIAGS option with the next one where $VARIABLE represents the -variable's name and $DOMAIN its domain (atmos, ocean, seaice, landice...) +First, choose a variable that has daily data. Then replace the DIAGS option with the next one where $VARIABLE represents +the variable's name and $DOMAIN its domain (atmos, ocean, seaice, landice...) .. code-block:: sh - DIAGS = monmean,$VARIABLE,$DOMAIN + DIAGS = monmean,$DOMAIN,$VARIABLE Prepare the run script ---------------------- -Once you have configured your experiment you can execute any diagnostic with the provided launch_diags.sh script. -Create a copy and change the variables PATH_TO_CONF_FILE and PATH_TO_DIAGNOSTICS so they point to your conf file and -installation folder. +Once you have configured your experiment you can execute any diagnostic with the provided model_launch_diags.sh script. +Create a copy and change the variable PATH_TO_CONF_FILE so it points to your conf file . Now, execute the script (or submit it to bsceslogin01, it has the correct header) and... that's it! You will find your results directly on the storage and a folder for the temp files in the scratch named after the EXPID. diff --git a/model_launch_diags.sh b/model_launch_diags.sh new file mode 100755 index 00000000..7a226961 --- /dev/null +++ b/model_launch_diags.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +#SBATCH -n 1 +#SBATCH --time 7-00:00:00 +#SBATCH --error=earthdiags.%J.err +#SBATCH --output=earthdiags.%J.out + +PATH_TO_CONF_FILE=~jvegas/earthdiagnostics/diags.conf + +module purge +module load earthdiagnostics + +set -xv + +earthdiags -lc DEBUG -f ${PATH_TO_CONF_FILE} -- GitLab From ecd86a0474b04e41ba8188b8e81eeaf8750ca697 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 12 Sep 2017 16:54:12 +0200 Subject: [PATCH 71/82] Updated documentation --- doc/source/config_file.rst | 42 +++++++++++++++++++++++++++++++++++--- doc/source/tips.rst | 13 ++++++------ doc/source/tutorial.rst | 17 ++++++++------- model_launch_diags.sh | 15 ++++++++++++++ 4 files changed, 69 insertions(+), 18 deletions(-) create mode 100755 model_launch_diags.sh diff --git a/doc/source/config_file.rst b/doc/source/config_file.rst index dde7685a..efe2c79b 100644 --- a/doc/source/config_file.rst +++ b/doc/source/config_file.rst @@ -31,7 +31,7 @@ Mandatory configurations ignore it completely. * DIAGS: - List of diagnostic to run, in the order you want them to run + List of diagnostic to run. No specific order is needed: data dependencies will be enforced. Optional configurations @@ -53,8 +53,8 @@ Optional configurations Type of the dataset to use. It can be exp, obs or recon. Default is exp. * DATA_CONVENTION - Convention to use for file paths and names and variable naming among other things. Can be SPECS, PRIMAVERA or CMIP6. - Default is SPECS. + Convention to use for file paths and names and variable naming among other things. Can be SPECS, PREFACE, + PRIMAVERA or CMIP6. Default is SPECS. * CDFTOOLS_PATH Path to the folder containing CDFTOOLS executables. By default is empty, so CDFTOOLS binaries must be added to the @@ -65,6 +65,28 @@ Optional configurations necessary when launching through a scheduler, as Earthdiagnostics can detect how many cores the scheduler has allocated to it. +* AUTO_CLEAN + If True, EarthDiagnostics removes the temporary folder just after finsihing. If RAM_DISK is set to True, this value + is ignored and always Default is True + +* RAM_DISK + If set to True, the temporary files is created at the /dev/shm partition. This partition is not mounted from a disk. + Instead, all files are created in the RAM memory, so hopefully this will improve performance at the cost of a much + higher RAM consumption. Default is False. + +* MESH_MASK + Custom file to use instead of the corresponding mesh mask file. + +* NEW_MASK_GLO + Custom file to use instead of the corresponding new mask glo file + +* MASK_REGIONS + Custom file to use instead of the corresponding 2D regions file + +* MASK_REGIONS_3D + Custom file to use instead of the corresponding 3D regions file + + EXPERIMENT ---------- @@ -117,6 +139,9 @@ This sections contains options related to the experiment's definition or configu * CHUNKS Number of chunks to run +* CHUNK_LIST + List of chunks to run. If empty, all diagnostics will be applied to all chunks + * CALENDAR Calendar to use for date calculation. All calendars supported by Autosubmit are available. Default is 'standard' @@ -213,6 +238,17 @@ cmorized files. * SOURCE Default value is 'to be filled' +* VERSION + Dataset version to use (not present in all conventions) + +* DEFAULT_OCEAN_GRID + Name of the default ocean grid for those conventions that require it (CMIP6 and PRIMAVERA). Default is gn. + +* DEFAULT_ATMOS_GRID + Name of the default atmos grid for those conventions that require it (CMIP6 and PRIMAVERA). Default is gr. + +* ACTIVITY + Name of the activity. Default is CMIP THREDDS ------- diff --git a/doc/source/tips.rst b/doc/source/tips.rst index b9f88661..2ef74365 100644 --- a/doc/source/tips.rst +++ b/doc/source/tips.rst @@ -16,15 +16,16 @@ system, the diagnostics will always use the number of cores that you reserved. I system, the diagnostics will try to use all the cores on the machine. To avoid this, add the MAX_CORES parameter to the DIAGNOSTICS section inside the diags.conf file that you are using. -NEMO files ----------- +Cleaning temp file +------------------ + +By default, EarthDiagnostics removes the temporary directory after execution. This behaviour can be avoided be setting +ra -Unlike the bash version of the ocean diagnostics, this program keeps the NEMO files in the scratch folder so you can -launch different configurations for the same experiment with reduced start time. You will need to remove the experiment's -folder in the scratch directory at the end of the experiment to avoid wasting resources. To do this, just use +By default .. code-block:: bash earthdiags -f PATH_TO_CONF --clean -If you plan to run the earthdiagnostics only once, you can add this line after the execution \ No newline at end of file + diff --git a/doc/source/tutorial.rst b/doc/source/tutorial.rst index c2b99900..52d071c5 100644 --- a/doc/source/tutorial.rst +++ b/doc/source/tutorial.rst @@ -37,8 +37,8 @@ Creating a config file ---------------------- Go to the folder where you installed the EarthDiagnostics. You will see a folder called earthdiagnostics, -and, inside it, a diags.conf file that can be used as a model for your config file. Create a copy of it wherever it -suites you. +and, inside it, the model_diags.conf file that can be used as a template for your config file. Create a copy of it +wherever it suites you. Now open your brand new copy with your preferred text editor. The file contains commentaries explaining each one of its options, so read it carefully and edit whatever you need. Don't worry about DIAGS option, we will @@ -53,21 +53,20 @@ whichever suits you better. From now on, we will assume that you are going to ru .. hint:: For old Ocean Diagnostics users: you can use most of the old names as aliases to launch one or multiple diagnostics. - Check the ALIAS section on the diags.conf to see which ones are available. + Check the ALIAS section on the model_diags.conf to see which ones are available. -First, choose a variable that has daily data. Then replace the DIAGS option with the next one where $VARIABLE represents the -variable's name and $DOMAIN its domain (atmos, ocean, seaice, landice...) +First, choose a variable that has daily data. Then replace the DIAGS option with the next one where $VARIABLE represents +the variable's name and $DOMAIN its domain (atmos, ocean, seaice, landice...) .. code-block:: sh - DIAGS = monmean,$VARIABLE,$DOMAIN + DIAGS = monmean,$DOMAIN,$VARIABLE Prepare the run script ---------------------- -Once you have configured your experiment you can execute any diagnostic with the provided launch_diags.sh script. -Create a copy and change the variables PATH_TO_CONF_FILE and PATH_TO_DIAGNOSTICS so they point to your conf file and -installation folder. +Once you have configured your experiment you can execute any diagnostic with the provided model_launch_diags.sh script. +Create a copy and change the variable PATH_TO_CONF_FILE so it points to your conf file . Now, execute the script (or submit it to bsceslogin01, it has the correct header) and... that's it! You will find your results directly on the storage and a folder for the temp files in the scratch named after the EXPID. diff --git a/model_launch_diags.sh b/model_launch_diags.sh new file mode 100755 index 00000000..7a226961 --- /dev/null +++ b/model_launch_diags.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +#SBATCH -n 1 +#SBATCH --time 7-00:00:00 +#SBATCH --error=earthdiags.%J.err +#SBATCH --output=earthdiags.%J.out + +PATH_TO_CONF_FILE=~jvegas/earthdiagnostics/diags.conf + +module purge +module load earthdiagnostics + +set -xv + +earthdiags -lc DEBUG -f ${PATH_TO_CONF_FILE} -- GitLab From 24be5636da7cc9776586e03959129c2c76c35756 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 13 Sep 2017 16:34:36 +0200 Subject: [PATCH 72/82] Cleaned code --- diags.conf | 6 ++-- earthdiagnostics/cdftools.py | 2 +- earthdiagnostics/cmormanager.py | 2 +- earthdiagnostics/datafile.py | 2 +- earthdiagnostics/datamanager.py | 4 ++- earthdiagnostics/general/module.py | 2 +- .../general/verticalmeanmetersiris.py | 1 - earthdiagnostics/obsreconmanager.py | 1 + earthdiagnostics/ocean/interpolate.py | 4 +-- earthdiagnostics/ocean/interpolatecdo.py | 2 +- earthdiagnostics/publisher.py | 3 +- .../statistics/climatologicalpercentile.py | 13 +++---- .../statistics/daysoverpercentile.py | 35 +++++++++---------- earthdiagnostics/statistics/discretize.py | 6 ++-- earthdiagnostics/threddsmanager.py | 3 ++ earthdiagnostics/utils.py | 18 ++++++++-- earthdiagnostics/work_manager.py | 1 + launch_diags.sh | 2 +- test/unit/__init__.py | 2 +- test/unit/general/__init__.py | 1 + test/unit/general/test_attribute.py | 4 +-- test/unit/general/test_dailymean.py | 6 ++-- test/unit/ocean/__init__.py | 1 + test/unit/ocean/test_averagesection.py | 1 + test/unit/ocean/test_heatcontent.py | 1 + test/unit/ocean/test_heatcontentlayer.py | 1 - test/unit/ocean/test_maxmoc.py | 1 - test/unit/ocean/test_mxl.py | 1 - test/unit/ocean/test_region_mean.py | 1 - test/unit/ocean/test_vertical_gradient.py | 1 - test/unit/statistics/__init__.py | 1 + .../test_climatologicalpercentile.py | 2 +- test/unit/test_cdftools.py | 13 ++++--- test/unit/test_diagnostic.py | 23 +++++++----- test/unit/test_frequency.py | 5 ++- test/unit/test_variable.py | 1 + 36 files changed, 102 insertions(+), 71 deletions(-) diff --git a/diags.conf b/diags.conf index efda37e4..8ce61543 100644 --- a/diags.conf +++ b/diags.conf @@ -27,7 +27,7 @@ CDFTOOLS_PATH = ~jvegas/CDFTOOLS/bin # If true, copies the mesh files regardless of presence in scratch dir RESTORE_MESHES = False # Limits the maximum amount of threads used. Default: 0 (no limitation, one per virtual core available)z -MAX_CORES = 1 +#MAX_CORES = 1 [CMOR] # If true, recreates CMOR files regardless of presence. Default = False @@ -88,8 +88,8 @@ OCEAN_TIMESTEP = 6 # CHUNK_SIZE is the size of each data file, given in months # CHUNKS is the number of chunks. You can specify less chunks than present on the experiment EXPID = testing_erainterim -# STARTDATES = 19931101 19941101 19951101 19961101 19971101 19981101 19991101 20001101 20011101 20021101 20031101 -STARTDATES = 19840101 19850101 +STARTDATES = 19801101 19811101 19821101 19831101 19841101 19851101 19861101 19871101 19881101 19891101 19701101 19711101 19721101 19731101 19741101 19751101 19761101 19771101 19781101 19791101 +# STARTDATES = 19840101 19850101 MEMBERS = 0 MEMBER_DIGITS = 1 CHUNK_SIZE = 1 diff --git a/earthdiagnostics/cdftools.py b/earthdiagnostics/cdftools.py index 5c78dd19..36fffdaa 100644 --- a/earthdiagnostics/cdftools.py +++ b/earthdiagnostics/cdftools.py @@ -28,7 +28,7 @@ class CDFTools(object): :param output: output file. Not all tools support this parameter :type options: str :param options: options for the tool. - :type options: str | [str] | Tuple[str] + :type options: str | [str] | Tuple[str] | NoneType :param log_level: log level at which the output of the cdftool command will be added :type log_level: int :param input_option: option to add before input file diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index 32698f9e..e116190b 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -47,6 +47,7 @@ class CMORManager(DataManager): raise Exception('Can not find model data') self.cmor_path = os.path.join(self.config.data_dir, self.experiment.expid, 'cmorfiles') + # noinspection PyUnusedLocal def file_exists(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=VariableType.MEAN, possible_versions=None): cmor_var = self.variable_list.get_variable(var) @@ -386,7 +387,6 @@ class CMORManager(DataManager): self._dic_cmorized[identifier][domain] = self._is_cmorized(startdate, member, chunk, domain) return self._dic_cmorized[identifier][domain] - def _is_cmorized(self, startdate, member, chunk, domain): startdate_path = self._get_startdate_path(startdate) if not os.path.isdir(startdate_path): diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index 88b9495c..b79e1ac4 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -204,7 +204,7 @@ class DataFile(Publisher): Utils.nco.ncatted(input=self.local_file, output=self.local_file, options=('-O -a _FillValue,{0},o,{1},"1.e20" ' '-a missingValue,{0},o,{1},"1.e20" {2}{3}'.format(self.final_name, var_type.char, - valid_min, valid_max),)) + valid_min, valid_max),)) def _fix_coordinate_variables_metadata(self, handler): if 'lev' in handler.variables: diff --git a/earthdiagnostics/datamanager.py b/earthdiagnostics/datamanager.py index de32bd10..4b643f4e 100644 --- a/earthdiagnostics/datamanager.py +++ b/earthdiagnostics/datamanager.py @@ -118,7 +118,7 @@ class DataManager(object): Utils.create_folder_tree(os.path.dirname(link_path)) relative_path = os.path.relpath(filepath, os.path.dirname(link_path)) os.symlink(relative_path, link_path) - except: + except Exception: raise finally: self.lock.release() @@ -183,6 +183,8 @@ class DataManager(object): :param frequency: file's frequency (only needed if it is different from the default) :type frequency: Frequency|NoneType :return: path to the copy created on the scratch folder + :param vartype: Variable type (mean, statistic) + :type vartype: VariableType :rtype: str """ raise NotImplementedError('Class must override request_chunk method') diff --git a/earthdiagnostics/general/module.py b/earthdiagnostics/general/module.py index abf77c8a..3e6e48ba 100644 --- a/earthdiagnostics/general/module.py +++ b/earthdiagnostics/general/module.py @@ -109,4 +109,4 @@ class Module(Diagnostic): component_u.close() component_v.close() - self.module_file.set_local_file(temp, rename_var=self.componentu) \ No newline at end of file + self.module_file.set_local_file(temp, rename_var=self.componentu) diff --git a/earthdiagnostics/general/verticalmeanmetersiris.py b/earthdiagnostics/general/verticalmeanmetersiris.py index b86e12a4..b609318d 100644 --- a/earthdiagnostics/general/verticalmeanmetersiris.py +++ b/earthdiagnostics/general/verticalmeanmetersiris.py @@ -3,7 +3,6 @@ import iris import iris.analysis import iris.exceptions -from diagnostic import DiagnosticOption from earthdiagnostics.box import Box from earthdiagnostics.diagnostic import Diagnostic, DiagnosticFloatOption, DiagnosticDomainOption, \ DiagnosticVariableOption diff --git a/earthdiagnostics/obsreconmanager.py b/earthdiagnostics/obsreconmanager.py index 4d5d1bde..ee732dc0 100644 --- a/earthdiagnostics/obsreconmanager.py +++ b/earthdiagnostics/obsreconmanager.py @@ -226,6 +226,7 @@ class ObsReconManager(DataManager): Log.debug('{0} requested', filepath) return self._get_file_from_storage(filepath) + # noinspection PyUnusedLocal def declare_chunk(self, domain, var, startdate, member, chunk, grid=None, region=None, box=None, frequency=None, vartype=VariableType.MEAN, diagnostic=None): """ diff --git a/earthdiagnostics/ocean/interpolate.py b/earthdiagnostics/ocean/interpolate.py index ea64a0c9..e58d89e7 100644 --- a/earthdiagnostics/ocean/interpolate.py +++ b/earthdiagnostics/ocean/interpolate.py @@ -95,7 +95,7 @@ class Interpolate(Diagnostic): for startdate, member, chunk in diags.config.experiment.get_chunk_list(): job_list.append( Interpolate(diags.data_manager, startdate, member, chunk, - options['domain'], var , options['target_grid'], + options['domain'], var, options['target_grid'], diags.config.experiment.model_version, options['invert_lat'], options['original_grid'])) return job_list @@ -174,7 +174,7 @@ class Interpolate(Diagnostic): shutil.copy(input_file, temp) weights_file = '/esnas/autosubmit/con_files/weigths/{0}/rmp_{0}_to_{1}_lev{2}.nc'.format(self.model_version, - self.grid, lev + 1) + self.grid, lev + 1) if not os.path.isfile(weights_file): raise Exception('Level {0} weights file does not exist for model {1} ' 'and grid {2}'.format(lev+1, self.model_version, self.grid)) diff --git a/earthdiagnostics/ocean/interpolatecdo.py b/earthdiagnostics/ocean/interpolatecdo.py index c867dc31..b8eaac55 100644 --- a/earthdiagnostics/ocean/interpolatecdo.py +++ b/earthdiagnostics/ocean/interpolatecdo.py @@ -71,7 +71,7 @@ class InterpolateCDO(Diagnostic): 'Mask ocean: {0.mask_oceans} Model: {0.model_version}'.format(self) @classmethod - def generate_jobs(cls, diags, options ): + def generate_jobs(cls, diags, options): """ Creates a job for each chunk to compute the diagnostic diff --git a/earthdiagnostics/publisher.py b/earthdiagnostics/publisher.py index 928d3eca..4b318b2c 100644 --- a/earthdiagnostics/publisher.py +++ b/earthdiagnostics/publisher.py @@ -35,10 +35,11 @@ class Publisher(object): :param args: arguments to pass """ for subscriber, callback in self._subscribers.items(): + # noinspection PyCallingNonCallable callback(*args) @property - def suscribers(self, *args): + def suscribers(self): """ List of suscribers of this publisher """ diff --git a/earthdiagnostics/statistics/climatologicalpercentile.py b/earthdiagnostics/statistics/climatologicalpercentile.py index c5cbb704..85760a0e 100644 --- a/earthdiagnostics/statistics/climatologicalpercentile.py +++ b/earthdiagnostics/statistics/climatologicalpercentile.py @@ -1,20 +1,17 @@ # coding=utf-8 import six -from bscearth.utils.date import parse_date, add_months from bscearth.utils.log import Log from earthdiagnostics.diagnostic import Diagnostic, DiagnosticVariableOption, DiagnosticDomainOption, \ - DiagnosticIntOption, DiagnosticListIntOption, DiagnosticFloatOption + DiagnosticIntOption, DiagnosticListIntOption from earthdiagnostics.frequency import Frequencies -from earthdiagnostics.utils import Utils, TempFile +from earthdiagnostics.utils import TempFile from earthdiagnostics.variable_type import VariableType import numpy as np import iris import iris.coord_categorisation -from iris.time import PartialDateTime import iris.exceptions import iris.coords -import math class ClimatologicalPercentile(Diagnostic): @@ -49,7 +46,7 @@ class ClimatologicalPercentile(Diagnostic): self.leadtime_files = {} def __eq__(self, other): - return self.domain == other.domain and self.variable == other.variable and \ + return self.domain == other.domain and self.variable == other.variable and \ self.start_year == other.start_year and self.end_year == other.end_year and \ self.forecast_month == other.forecast_month @@ -140,11 +137,11 @@ class ClimatologicalPercentile(Diagnostic): for leadtime_slice in self.distribution.slices_over('leadtime'): leadtime = leadtime_slice.coord('leadtime').points[0] - percentiles[leadtime]=np.apply_along_axis(calculate, 0, leadtime_slice.data) + percentiles[leadtime] = np.apply_along_axis(calculate, 0, leadtime_slice.data) return percentiles def _get_distribution(self): - for startdate, startdate_file in self.leadtime_files.iteritems(): + for startdate, startdate_file in six.iteritems(self.leadtime_files): Log.info('Getting data for startdate {0}', startdate) data_cube = iris.load_cube(startdate_file.local_file) if self.distribution is None: diff --git a/earthdiagnostics/statistics/daysoverpercentile.py b/earthdiagnostics/statistics/daysoverpercentile.py index 0cbd46f2..aaa0065d 100644 --- a/earthdiagnostics/statistics/daysoverpercentile.py +++ b/earthdiagnostics/statistics/daysoverpercentile.py @@ -21,12 +21,6 @@ class DaysOverPercentile(Diagnostic): :param data_manager: data management object :type data_manager: DataManager - :param startdate: startdate - :type startdate: str - :param member: member number - :type member: int - :param chunk: chunk's number - :type chunk: int :param variable: variable to average :type variable: str """ @@ -94,11 +88,13 @@ class DaysOverPercentile(Diagnostic): self.days_over_file = {} self.days_below_file = {} for perc in ClimatologicalPercentile.Percentiles: - self.days_over_file[perc] = self.declare_chunk(self.domain, var_over.format(int(perc * 100)), self.startdate, None, + self.days_over_file[perc] = self.declare_chunk(self.domain, var_over.format(int(perc * 100)), + self.startdate, None, None, frequency=Frequencies.monthly, vartype=VariableType.STATISTIC) - self.days_below_file[perc] = self.declare_chunk(self.domain, var_below.format(int(perc * 100)), self.startdate, None, + self.days_below_file[perc] = self.declare_chunk(self.domain, var_below.format(int(perc * 100)), + self.startdate, None, None, frequency=Frequencies.monthly, vartype=VariableType.STATISTIC) @@ -119,6 +115,7 @@ class DaysOverPercentile(Diagnostic): leadtimes = {1: PartialDateTime(lead_date.year, lead_date.month, lead_date.day)} def assign_leadtime(coord, x): + # noinspection PyBroadException try: leadtime_month = 1 partial_date = leadtimes[leadtime_month] @@ -140,8 +137,8 @@ class DaysOverPercentile(Diagnostic): realization_coord = var.coord('realization') except iris.exceptions.CoordinateNotFoundError: realization_coord = None - lat_coord = var.coord('latitude') - lon_coord = var.coord('longitude') + self.lat_coord = var.coord('latitude') + self.lon_coord = var.coord('longitude') results_over = {perc: iris.cube.CubeList() for perc in ClimatologicalPercentile.Percentiles} results_below = {perc: iris.cube.CubeList() for perc in ClimatologicalPercentile.Percentiles} @@ -154,7 +151,7 @@ class DaysOverPercentile(Diagnostic): for leadtime in leadtimes.keys(): leadtime_slice = var.extract(iris.Constraint(leadtime=leadtime)) - if len(percentiles.coords('leadtime')) >0: + if len(percentiles.coords('leadtime')) > 0: percentiles_leadtime = percentiles.extract(iris.Constraint(leadtime=leadtime)) else: percentiles_leadtime = percentiles @@ -166,13 +163,15 @@ class DaysOverPercentile(Diagnostic): for percentile_slice in percentiles_leadtime.slices_over('percentile'): percentile = percentile_slice.coord('percentile').points[0] + # noinspection PyTypeChecker days_over = np.sum(leadtime_slice.data > percentile_slice.data, 0) / float(timesteps) - result = self.create_results_cube(days_over, lat_coord, lon_coord, percentile, realization_coord, + result = self.create_results_cube(days_over, percentile, realization_coord, time_coord, var_daysover, long_name_days_over) results_over[percentile].append(result) + # noinspection PyTypeChecker days_below = np.sum(leadtime_slice.data < percentile_slice.data, 0) / float(timesteps) - result = self.create_results_cube(days_below, lat_coord, lon_coord, percentile, realization_coord, + result = self.create_results_cube(days_below, percentile, realization_coord, time_coord, var_days_below, long_name_days_below) results_below[percentile].append(result) @@ -190,16 +189,16 @@ class DaysOverPercentile(Diagnostic): must_exist=False, rename_dimension=True) self.days_below_file[perc].set_local_file(temp, rename_var='daysbelow') - def create_results_cube(self, days_over, lat_coord, lon_coord, percentile, realization_coord, time_coord, + def create_results_cube(self, days_over, percentile, realization_coord, time_coord, var_name, long_name): result = iris.cube.Cube(days_over.astype(np.float32), var_name=var_name, long_name=long_name, units=1.0) if realization_coord is not None: result.add_aux_coord(realization_coord, 0) - result.add_dim_coord(lat_coord, 1) - result.add_dim_coord(lon_coord, 2) + result.add_dim_coord(self.lat_coord, 1) + result.add_dim_coord(self.lon_coord, 2) else: - result.add_dim_coord(lat_coord, 0) - result.add_dim_coord(lon_coord, 1) + result.add_dim_coord(self.lat_coord, 0) + result.add_dim_coord(self.lon_coord, 1) result.add_aux_coord(iris.coords.AuxCoord(percentile, long_name='percentile')) result.add_aux_coord(time_coord) return result diff --git a/earthdiagnostics/statistics/discretize.py b/earthdiagnostics/statistics/discretize.py index bd0470c7..8d07af23 100644 --- a/earthdiagnostics/statistics/discretize.py +++ b/earthdiagnostics/statistics/discretize.py @@ -68,7 +68,7 @@ class Discretize(Diagnostic): self.process = psutil.Process() def print_memory_used(self): - Log.user_warning('Memory: {0:.2f} GB'.format(self.process.memory_info().rss / 1024.0**3)) + Log.debug('Memory: {0:.2f} GB'.format(self.process.memory_info().rss / 1024.0**3)) @property def bins(self): @@ -190,7 +190,8 @@ class Discretize(Diagnostic): leadtime_cube.add_dim_coord(bins_coord, 0) leadtime_cube.add_dim_coord(self.data_cube.coord('latitude'), 1) leadtime_cube.add_dim_coord(self.data_cube.coord('longitude'), 2) - leadtime_cube.add_aux_coord(iris.coords.AuxCoord(np.array((leadtime,), np.int8), var_name='leadtime', units='months')) + leadtime_cube.add_aux_coord(iris.coords.AuxCoord(np.array((leadtime,), np.int8), var_name='leadtime', + units='months')) cubes.append(leadtime_cube) temp = TempFile.get() iris.FUTURE.netcdf_no_unlimited = True @@ -218,6 +219,7 @@ class Discretize(Diagnostic): else: self.distribution[leadtime] += self._calculate_distribution(realization_cube) + # noinspection PyTypeChecker def _get_value_interval(self): if self.check_min_value or self.check_max_value: Log.debug('Calculating max and min values...') diff --git a/earthdiagnostics/threddsmanager.py b/earthdiagnostics/threddsmanager.py index d39074d0..1dfb7ec4 100644 --- a/earthdiagnostics/threddsmanager.py +++ b/earthdiagnostics/threddsmanager.py @@ -140,6 +140,7 @@ class THREDDSManager(DataManager): var_folder) return folder_path + # noinspection PyUnusedLocal def get_year(self, domain, var, startdate, member, year, grid=None, box=None, vartype=VariableType.MEAN): """ Ge a file containing all the data for one year for one variable @@ -270,6 +271,7 @@ class THREDDSManager(DataManager): self.requested_files[file_path] = thredds_subset return thredds_subset + # noinspection PyUnusedLocal def declare_chunk(self, domain, var, startdate, member, chunk, grid=None, region=None, box=None, frequency=None, vartype=VariableType.MEAN, diagnostic=None): """ @@ -368,6 +370,7 @@ class THREDDSSubset(DataFile): Log.error('Can not retrieve {0} from server: {1}'.format(self, ex)) self.local_status = LocalStatus.FAILED + # noinspection PyUnusedLocal,PyMethodMayBeStatic def _correct_cube(self, cube, field, filename): if not cube.coords('time'): return diff --git a/earthdiagnostics/utils.py b/earthdiagnostics/utils.py index ed4351a8..79d5cc6c 100644 --- a/earthdiagnostics/utils.py +++ b/earthdiagnostics/utils.py @@ -34,6 +34,10 @@ def suppress_stdout(): sys.stdout = old_stdout +class File(object): + pass + + class Utils(object): """ Container class for miscellaneous utility methods @@ -224,6 +228,8 @@ class Utils(object): """ Copies a file from source to destiny, creating dirs if necessary + :param save_hash: if True, stores hash value in a file + :type save_hash: bool :param source: path to source :type source: str :param destiny: path to destiny @@ -262,6 +268,8 @@ class Utils(object): :type source: str :param destiny: path to destiny :type destiny: str + :param save_hash: if True, stores hash value in a file + :type save_hash: bool """ Utils.copy_file(source, destiny, save_hash) os.remove(source) @@ -302,6 +310,10 @@ class Utils(object): Returns the xxHash hash for the given filepath :param filepath: path to the file to compute hash on :type filepath:str + :param use_stored: if True, try to read the hash value from file + :type use_stored: bool + :param save: if True, stores hash value in a file + :type save: bool :return: file's xxHash hash :rtype: str """ @@ -328,9 +340,9 @@ class Utils(object): @staticmethod def _get_hash_filename(filepath): - dir = os.path.dirname(filepath) + folder = os.path.dirname(filepath) filename = os.path.basename(filepath) - hash_file = os.path.join(dir, '.{0}.xxhash64.hash'.format(filename)) + hash_file = os.path.join(folder, '.{0}.xxhash64.hash'.format(filename)) return hash_file @staticmethod @@ -615,7 +627,7 @@ class Utils(object): # noinspection PyBroadException try: os.makedirs(path) - except: + except Exception: # Here we can have a race condition. Let's check again for existence and rethrow if still not exists if not os.path.isdir(path): raise diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index ddb82bd5..55cea8db 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -3,6 +3,7 @@ import datetime import operator from bscearth.utils.log import Log +# noinspection PyCompatibility from concurrent.futures import ThreadPoolExecutor from earthdiagnostics.datafile import StorageStatus, LocalStatus diff --git a/launch_diags.sh b/launch_diags.sh index a7d9ff88..477a1ca4 100755 --- a/launch_diags.sh +++ b/launch_diags.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -#SBATCH -n 1 +#SBATCH -n 4 #SBATCH --time 7-00:00:00 #SBATCH --error=job.%J.err #SBATCH --output=job.%J.out diff --git a/test/unit/__init__.py b/test/unit/__init__.py index 8b137891..9bad5790 100644 --- a/test/unit/__init__.py +++ b/test/unit/__init__.py @@ -1 +1 @@ - +# coding=utf-8 diff --git a/test/unit/general/__init__.py b/test/unit/general/__init__.py index e69de29b..9bad5790 100644 --- a/test/unit/general/__init__.py +++ b/test/unit/general/__init__.py @@ -0,0 +1 @@ +# coding=utf-8 diff --git a/test/unit/general/test_attribute.py b/test/unit/general/test_attribute.py index ee9a0118..b857ec9c 100644 --- a/test/unit/general/test_attribute.py +++ b/test/unit/general/test_attribute.py @@ -51,5 +51,5 @@ class TestAttribute(TestCase): def test_str(self): mixed = Attribute(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', 'grid', 'att', 'value') - self.assertEquals(str(mixed), 'Write attributte output Startdate: 20010101 Member: 0 Chunk: 0 Variable: atmos:var ' - 'Attributte: att:value Grid: grid') + self.assertEquals(str(mixed), 'Write attributte output Startdate: 20010101 Member: 0 Chunk: 0 ' + 'Variable: atmos:var Attributte: att:value Grid: grid') diff --git a/test/unit/general/test_dailymean.py b/test/unit/general/test_dailymean.py index b58fe146..ec85d9dc 100644 --- a/test/unit/general/test_dailymean.py +++ b/test/unit/general/test_dailymean.py @@ -32,9 +32,9 @@ class TestDailyMean(TestCase): jobs = DailyMean.generate_jobs(self.diags, ['diagnostic', 'atmos', 'var', '6hr']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], DailyMean(self.data_manager, '20010101', 0, 0, ModelingRealms.atmos, 'var', - Frequencies.six_hourly, '')) + Frequencies.six_hourly, '')) self.assertEqual(jobs[1], DailyMean(self.data_manager, '20010101', 0, 1, ModelingRealms.atmos, 'var', - Frequencies.six_hourly, '')) + Frequencies.six_hourly, '')) jobs = DailyMean.generate_jobs(self.diags, ['diagnostic', 'seaice', 'var', '3h', 'grid']) self.assertEqual(len(jobs), 2) @@ -52,4 +52,4 @@ class TestDailyMean(TestCase): def test_str(self): mixed = DailyMean(self.data_manager, '20000101', 1, 1, ModelingRealms.ocean, 'var', 'freq', '') self.assertEquals(str(mixed), 'Calculate daily mean Startdate: 20000101 Member: 1 Chunk: 1 ' - 'Variable: ocean:var Original frequency: freq Grid: ') + 'Variable: ocean:var Original frequency: freq Grid: ') diff --git a/test/unit/ocean/__init__.py b/test/unit/ocean/__init__.py index e69de29b..9bad5790 100644 --- a/test/unit/ocean/__init__.py +++ b/test/unit/ocean/__init__.py @@ -0,0 +1 @@ +# coding=utf-8 diff --git a/test/unit/ocean/test_averagesection.py b/test/unit/ocean/test_averagesection.py index a5b29133..d3be4b2b 100644 --- a/test/unit/ocean/test_averagesection.py +++ b/test/unit/ocean/test_averagesection.py @@ -26,6 +26,7 @@ class TestAverageSection(TestCase): def fake_parse(self, value): return value + # noinspection PyUnresolvedReferences @patch.object(DiagnosticVariableOption, 'parse', fake_parse) def test_generate_jobs(self): jobs = AverageSection.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', '0', '0', '0', '0']) diff --git a/test/unit/ocean/test_heatcontent.py b/test/unit/ocean/test_heatcontent.py index cac23fcc..c58d1217 100644 --- a/test/unit/ocean/test_heatcontent.py +++ b/test/unit/ocean/test_heatcontent.py @@ -7,6 +7,7 @@ from earthdiagnostics.ocean.heatcontent import HeatContent from mock import Mock, patch +# noinspection PyUnusedLocal def _get_levels_from_meters_mock(cls, box): return 20, 10 diff --git a/test/unit/ocean/test_heatcontentlayer.py b/test/unit/ocean/test_heatcontentlayer.py index 8700025c..bf8135c6 100644 --- a/test/unit/ocean/test_heatcontentlayer.py +++ b/test/unit/ocean/test_heatcontentlayer.py @@ -20,7 +20,6 @@ class TestHeatContentLayer(TestCase): self.box.min_depth = 0 self.box.max_depth = 100 - def test_str(self): diag = HeatContentLayer(self.data_manager, '20000101', 1, 1, self.box, self.weight, 0, 10) self.assertEquals(str(diag), 'Heat content layer Startdate: 20000101 Member: 1 Chunk: 1 Box: 0-100m') diff --git a/test/unit/ocean/test_maxmoc.py b/test/unit/ocean/test_maxmoc.py index 4284750e..f6f12b34 100644 --- a/test/unit/ocean/test_maxmoc.py +++ b/test/unit/ocean/test_maxmoc.py @@ -23,7 +23,6 @@ class TestMaxMoc(TestCase): self.maxmoc = MaxMoc(self.data_manager, '20000101', 1, 2000, self.basins.Global, self.box) - def fake_parse(self, value): if type(value) is Basin: return value diff --git a/test/unit/ocean/test_mxl.py b/test/unit/ocean/test_mxl.py index 0385d0da..ead3fbbc 100644 --- a/test/unit/ocean/test_mxl.py +++ b/test/unit/ocean/test_mxl.py @@ -14,7 +14,6 @@ class TestMxl(TestCase): self.diags.model_version = 'model_version' self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) - def test_generate_jobs(self): jobs = Mxl.generate_jobs(self.diags, ['diagnostic']) self.assertEqual(len(jobs), 2) diff --git a/test/unit/ocean/test_region_mean.py b/test/unit/ocean/test_region_mean.py index 4c96bdd6..e527feda 100644 --- a/test/unit/ocean/test_region_mean.py +++ b/test/unit/ocean/test_region_mean.py @@ -15,7 +15,6 @@ class TestRegionMean(TestCase): self.diags = Mock() self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) - def fake_parse(self, value): if not value: raise DiagnosticOptionError diff --git a/test/unit/ocean/test_vertical_gradient.py b/test/unit/ocean/test_vertical_gradient.py index e274df25..e2d9d22d 100644 --- a/test/unit/ocean/test_vertical_gradient.py +++ b/test/unit/ocean/test_vertical_gradient.py @@ -15,7 +15,6 @@ class TestVerticalGradient(TestCase): self.diags = Mock() self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) - def fake_parse(self, value): if not value: raise DiagnosticOptionError diff --git a/test/unit/statistics/__init__.py b/test/unit/statistics/__init__.py index e69de29b..9bad5790 100644 --- a/test/unit/statistics/__init__.py +++ b/test/unit/statistics/__init__.py @@ -0,0 +1 @@ +# coding=utf-8 diff --git a/test/unit/statistics/test_climatologicalpercentile.py b/test/unit/statistics/test_climatologicalpercentile.py index 93636693..12752caa 100644 --- a/test/unit/statistics/test_climatologicalpercentile.py +++ b/test/unit/statistics/test_climatologicalpercentile.py @@ -36,7 +36,7 @@ class TestClimatologicalPercentile(TestCase): def test_str(self): diagnostic = ClimatologicalPercentile(self.data_manager, ModelingRealms.ocean, 'var', - 2000, 2001, 11, self.diags.config.experiment) + 2000, 2001, 11, self.diags.config.experiment) self.assertEquals(str(diagnostic), 'Climatological percentile Variable: ocean:var Period: 2000-2001 ' 'Forecast month: 11') diff --git a/test/unit/test_cdftools.py b/test/unit/test_cdftools.py index eb45269c..28de4ac3 100644 --- a/test/unit/test_cdftools.py +++ b/test/unit/test_cdftools.py @@ -7,14 +7,16 @@ from earthdiagnostics.cdftools import CDFTools import mock -def mock_exists(path, access=None): +# noinspection PyUnusedLocal +def bad_file(path, access=None): return not os.path.basename(path).startswith('bad') class TestCDFTools(TestCase): - @mock.patch('os.path.isfile', side_effect=mock_exists) - @mock.patch('os.access', side_effect=mock_exists) + # noinspection PyUnusedLocal + @mock.patch('os.path.isfile', side_effect=bad_file) + @mock.patch('os.access', side_effect=bad_file) @mock.patch('earthdiagnostics.utils.Utils.execute_shell_command') def test_run(self, mock_path, mock_exists, execute_mock): self.cdftools = CDFTools('/test/path') @@ -37,8 +39,9 @@ class TestCDFTools(TestCase): self.cdftools.run('command', input='input_file', options='-o -p') self.cdftools.run('command', input='input_file', options=('-o', '-p')) - @mock.patch('os.path.isfile', side_effect=mock_exists) - @mock.patch('os.access', side_effect=mock_exists) + # noinspection PyUnusedLocal + @mock.patch('os.path.isfile', side_effect=bad_file) + @mock.patch('os.access', side_effect=bad_file) @mock.patch('earthdiagnostics.utils.Utils.execute_shell_command') def test_run(self, mock_path, mock_exists, execute_mock): self.cdftools = CDFTools('') diff --git a/test/unit/test_diagnostic.py b/test/unit/test_diagnostic.py index cc92092e..93cbe9b1 100644 --- a/test/unit/test_diagnostic.py +++ b/test/unit/test_diagnostic.py @@ -300,9 +300,22 @@ class TestDiagnosticVariableListOption(TestCase): class TestDiagnostic(TestCase): - def setUp(cls): + @classmethod + def setUpClass(cls): class MockDiag(Diagnostic): - pass + @classmethod + def generate_jobs(cls, diags, options): + pass + + def declare_data_generated(self): + pass + + def request_data(self): + pass + + def compute(self): + pass + TestDiagnostic.MockDiag = MockDiag def test_str(self): @@ -322,17 +335,12 @@ class TestDiagnostic(TestCase): @patch.object(Diagnostic, 'dispatch') def test_set_status_call_dispatch(self, dispatch_mock): - - diag = Diagnostic(None) diag.status = DiagnosticStatus.FAILED dispatch_mock.assert_called_once_with(diag) @patch.object(Diagnostic, 'dispatch') def test_set_status_call_dispatch(self, dispatch_mock): - class MockDiag(Diagnostic): - pass - diag = Diagnostic(None) diag.status = diag.status assert not dispatch_mock.called, 'Dispatch should not have been called' @@ -347,7 +355,6 @@ class TestDiagnostic(TestCase): TestDiagnostic.MockDiag.alias = 'mock' Diagnostic.register(TestDiagnostic.MockDiag) - def test_get_diagnostic(self): self.assertIsNone(Diagnostic.get_diagnostic('none')) TestDiagnostic.MockDiag.alias = 'mock' diff --git a/test/unit/test_frequency.py b/test/unit/test_frequency.py index 845dfe6a..34e32a36 100644 --- a/test/unit/test_frequency.py +++ b/test/unit/test_frequency.py @@ -24,7 +24,10 @@ class TestFrequency(TestCase): self.assertEqual(Frequency('d').folder_name(VariableType.STATISTIC), 'daily_statistics') def test_get_6hourlymean(self): - self.assertEqual(Frequency('6hr').folder_name(VariableType.STATISTIC), '6hourly') + self.assertEqual(Frequency('6hr').folder_name(VariableType.MEAN), '6hourly') + + def test_get_6hourlystatistics(self): + self.assertEqual(Frequency('6hr').folder_name(VariableType.STATISTIC), '6hourly_statistics') def test_get_climatology(self): self.assertEqual(Frequency('clim').folder_name(VariableType.STATISTIC), 'clim') diff --git a/test/unit/test_variable.py b/test/unit/test_variable.py index 8af9b1ad..c53baa63 100644 --- a/test/unit/test_variable.py +++ b/test/unit/test_variable.py @@ -27,6 +27,7 @@ class TestVariableAlias(TestCase): alias.grid = 'grid' self.assertEquals(str(alias), 'alias Basin: basin Grid: grid') + class TestVariable(TestCase): def test_parse_json(self): -- GitLab From 38245e799a0e332bb96e19e9b45b0ce9da66829c Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 13 Sep 2017 17:22:26 +0200 Subject: [PATCH 73/82] Better logging --- earthdiagnostics/datafile.py | 14 ++++++++++++-- earthdiagnostics/threddsmanager.py | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index b79e1ac4..d37877ef 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -57,8 +57,16 @@ class DataFile(Publisher): def unsubscribe(self, who): super(DataFile, self).unsubscribe(who) - if self.local_status == LocalStatus.READY and not self.suscribers: - os.remove(self.local_file) + self._clean_local() + + def _clean_local(self): + if self.local_status != LocalStatus.READY or len(self.suscribers) > 0 or self.upload_required(): + return + Log.debug('File {0} no longer needed. Deleting from scratch...'.format(self.remote_file)) + os.remove(self.local_file) + Log.debug('File {0} deleted from scratch'.format(self.remote_file)) + self.local_file = None + self.local_status = LocalStatus.PENDING def upload_required(self): return self.local_status == LocalStatus.READY and self.storage_status == StorageStatus.PENDING @@ -155,6 +163,7 @@ class DataFile(Publisher): except Exception as ex: Log.warning('Link for file {0} can not be created: {1}', self.remote_file, ex) self.storage_status = StorageStatus.READY + self._clean_local() def set_local_file(self, local_file, diagnostic=None, rename_var=''): if diagnostic in self._modifiers: @@ -429,6 +438,7 @@ class NetCDFFile(DataFile): def download(self): try: self.local_status = LocalStatus.DOWNLOADING + Log.debug('Downloading file {0}...', self.remote_file) if not self.local_file: self.local_file = TempFile.get() Utils.get_file_hash(self.remote_file, use_stored=True, save=True) diff --git a/earthdiagnostics/threddsmanager.py b/earthdiagnostics/threddsmanager.py index 1dfb7ec4..8637121a 100644 --- a/earthdiagnostics/threddsmanager.py +++ b/earthdiagnostics/threddsmanager.py @@ -353,6 +353,7 @@ class THREDDSSubset(DataFile): def download(self): try: + Log.debug('Downloading thredds subset {0}...', self) iris.FUTURE.netcdf_promote = True iris.FUTURE.netcdf_no_unlimited = True with iris.FUTURE.context(cell_datetime_objects=True): -- GitLab From f996af195d7fab23ef8b8fe3c5eb5af307fb1413 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 18 Sep 2017 11:38:14 +0200 Subject: [PATCH 74/82] Removed variable manger singleton to make config easier to test --- diags.conf | 2 +- earthdiagnostics/cmorizer.py | 3 +- earthdiagnostics/config.py | 16 ++- earthdiagnostics/datamanager.py | 2 +- earthdiagnostics/diagnostic.py | 12 +- earthdiagnostics/earthdiags.py | 2 +- earthdiagnostics/general/attribute.py | 2 +- earthdiagnostics/general/dailymean.py | 2 +- earthdiagnostics/general/module.py | 6 +- earthdiagnostics/general/monthlymean.py | 2 +- earthdiagnostics/general/relink.py | 5 +- earthdiagnostics/general/rewrite.py | 2 +- earthdiagnostics/general/scale.py | 2 +- earthdiagnostics/general/select_levels.py | 2 +- .../general/simplify_dimensions.py | 2 +- .../general/verticalmeanmetersiris.py | 2 +- earthdiagnostics/general/yearlymean.py | 2 +- earthdiagnostics/ocean/averagesection.py | 2 +- earthdiagnostics/ocean/cutsection.py | 2 +- earthdiagnostics/ocean/interpolate.py | 2 +- earthdiagnostics/ocean/interpolatecdo.py | 2 +- earthdiagnostics/ocean/mask_land.py | 2 +- earthdiagnostics/ocean/regionmean.py | 2 +- earthdiagnostics/ocean/rotation.py | 4 +- earthdiagnostics/ocean/verticalgradient.py | 2 +- earthdiagnostics/ocean/verticalmean.py | 2 +- earthdiagnostics/ocean/verticalmeanmeters.py | 2 +- .../statistics/climatologicalpercentile.py | 2 +- earthdiagnostics/statistics/discretize.py | 2 +- earthdiagnostics/variable.py | 2 - earthdiagnostics/work_manager.py | 4 +- test/unit/general/test_attribute.py | 1 - test/unit/test_config.py | 117 ++++++++++++++++++ test/unit/test_diagnostic.py | 64 +++++----- 34 files changed, 199 insertions(+), 81 deletions(-) create mode 100644 test/unit/test_config.py diff --git a/diags.conf b/diags.conf index 8ce61543..31d3bd91 100644 --- a/diags.conf +++ b/diags.conf @@ -88,7 +88,7 @@ OCEAN_TIMESTEP = 6 # CHUNK_SIZE is the size of each data file, given in months # CHUNKS is the number of chunks. You can specify less chunks than present on the experiment EXPID = testing_erainterim -STARTDATES = 19801101 19811101 19821101 19831101 19841101 19851101 19861101 19871101 19881101 19891101 19701101 19711101 19721101 19731101 19741101 19751101 19761101 19771101 19781101 19791101 +STARTDATES = 20001101 20011101 20021101 20031101 20041101 20051101 20061101 20071101 20081101 20091101 20101101 20111101 20121101 20131101 20141101 20151101 20161101 # STARTDATES = 19840101 19850101 MEMBERS = 0 MEMBER_DIGITS = 1 diff --git a/earthdiagnostics/cmorizer.py b/earthdiagnostics/cmorizer.py index 1381e331..7f97802d 100644 --- a/earthdiagnostics/cmorizer.py +++ b/earthdiagnostics/cmorizer.py @@ -13,7 +13,6 @@ from bscearth.utils.date import parse_date, chunk_end_date, previous_day, date2s from earthdiagnostics.frequency import Frequency, Frequencies from earthdiagnostics.modelingrealm import ModelingRealms from earthdiagnostics.utils import TempFile, Utils -from earthdiagnostics.variable import VariableManager from earthdiagnostics.datafile import NetCDFFile @@ -397,7 +396,7 @@ class Cmorizer(object): :param variable: variable's name :type variable: str """ - alias, var_cmor = VariableManager().get_variable_and_alias(variable) + alias, var_cmor = self.config.var_manager.get_variable_and_alias(variable) if var_cmor is None: return diff --git a/earthdiagnostics/config.py b/earthdiagnostics/config.py index cfc8b690..2e4d54f9 100644 --- a/earthdiagnostics/config.py +++ b/earthdiagnostics/config.py @@ -20,6 +20,7 @@ class Config(object): """ def __init__(self, path): + parser = ConfigParser() parser.optionxform = str parser.read(path) @@ -58,7 +59,8 @@ class Config(object): self.data_convention = parser.get_choice_option('DIAGNOSTICS', 'DATA_CONVENTION', ('specs', 'primavera', 'cmip6', 'preface'), 'specs', ignore_case=True) - VariableManager().load_variables(self.data_convention) + self.var_manager = VariableManager() + self.var_manager.load_variables(self.data_convention) self._diags = parser.get_option('DIAGNOSTICS', 'DIAGS') self.frequency = Frequency(parser.get_option('DIAGNOSTICS', 'FREQUENCY')) "Default data frequency to be used by the diagnostics" @@ -67,6 +69,10 @@ class Config(object): "Path to CDFTOOLS executables" self.max_cores = parser.get_int_option('DIAGNOSTICS', 'MAX_CORES', 0) "Maximum number of cores to use" + self.parallel_downloads = parser.get_int_option('DIAGNOSTICS', 'PARALLEL_DOWNLOADS', 1) + "Maximum number of simultaneous downloads" + self.parallel_uploads = parser.get_int_option('DIAGNOSTICS', 'PARALLEL_UPLOADS', 1) + "Maximum number of simultaneous uploads" self.restore_meshes = parser.get_bool_option('DIAGNOSTICS', 'RESTORE_MESHES', False) "If True, forces the tool to copy all the mesh and mask files for the model, regardless of existence" @@ -112,7 +118,7 @@ class Config(object): class CMORConfig(object): - def __init__(self, parser): + def __init__(self, parser, var_manager): self.force = parser.get_bool_option('CMOR', 'FORCE', False) self.force_untar = parser.get_bool_option('CMOR', 'FORCE_UNTAR', False) self.filter_files = parser.get_option('CMOR', 'FILTER_FILES', '') @@ -134,14 +140,14 @@ class CMORConfig(object): self.activity = parser.get_option('CMOR', 'ACTIVITY', 'CMIP') vars_string = parser.get_option('CMOR', 'VARIABLE_LIST', '') - var_manager = VariableManager() + self.var_manager = var_manager if vars_string: self._variable_list = list() for domain_var in vars_string.split(' '): if domain_var.startswith('#'): break splitted = domain_var.split(':') - cmor_var = var_manager.get_variable(splitted[1], silent=True) + cmor_var = self.var_manager.get_variable(splitted[1], silent=True) if not cmor_var: continue if ModelingRealm(splitted[0]) != cmor_var.domain: @@ -174,7 +180,7 @@ class CMORConfig(object): if self._variable_list is None: return True for var in variables: - if self.cmorize(VariableManager().get_variable(var, silent=True)): + if self.cmorize(self.var_manager.get_variable(var, silent=True)): return True return False diff --git a/earthdiagnostics/datamanager.py b/earthdiagnostics/datamanager.py index 4b643f4e..fe26f90b 100644 --- a/earthdiagnostics/datamanager.py +++ b/earthdiagnostics/datamanager.py @@ -23,7 +23,7 @@ class DataManager(object): self.config = config self.experiment = config.experiment self._checked_vars = list() - self.variable_list = VariableManager() + self.variable_list = config.var_manager UnitConversion.load_conversions() self.lock = threading.Lock() self.requested_files = {} diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index 6330ef4e..5b839839 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -330,23 +330,29 @@ class DiagnosticListFrequenciesOption(DiagnosticOption): class DiagnosticVariableOption(DiagnosticOption): - def __init__(self, name='variable', default_value=None): + def __init__(self, var_manager, name='variable', default_value=None): super(DiagnosticVariableOption, self).__init__(name, default_value) + self.var_manager = var_manager def parse(self, option_value): option_value = self.check_default(option_value) - real_name = VariableManager().get_variable(option_value, False) + real_name = self.var_manager.get_variable(option_value, False) if real_name is None: return option_value return real_name.short_name class DiagnosticVariableListOption(DiagnosticOption): + + def __init__(self, var_manager, name, default_value=None): + super(DiagnosticVariableListOption, self).__init__(name, default_value) + self.var_manager = var_manager + def parse(self, option_value): option_value = self.check_default(option_value) var_names = [] for value in option_value.split('-'): - real_name = VariableManager().get_variable(value, False) + real_name = self.var_manager.get_variable(value, False) if real_name is None: var_names.append(value) else: diff --git a/earthdiagnostics/earthdiags.py b/earthdiagnostics/earthdiags.py index e8dae22a..4eb31b5f 100755 --- a/earthdiagnostics/earthdiags.py +++ b/earthdiagnostics/earthdiags.py @@ -228,7 +228,7 @@ class EarthDiags(object): return True def _get_variable_report(self, startdate, member): - var_manager = VariableManager() + var_manager = self.config.var_manager results = list() for var in var_manager.get_all_variables(): if var.domain is None: diff --git a/earthdiagnostics/general/attribute.py b/earthdiagnostics/general/attribute.py index 3fcda66c..1c618578 100644 --- a/earthdiagnostics/general/attribute.py +++ b/earthdiagnostics/general/attribute.py @@ -66,7 +66,7 @@ class Attribute(Diagnostic): """ options_available = (DiagnosticDomainOption(), - DiagnosticVariableOption(), + DiagnosticVariableOption(diags.data_manager.config.var_manager), DiagnosticOption('name'), DiagnosticComplexStrOption('value'), DiagnosticOption('grid', '')) diff --git a/earthdiagnostics/general/dailymean.py b/earthdiagnostics/general/dailymean.py index b91a451b..7fb4736e 100644 --- a/earthdiagnostics/general/dailymean.py +++ b/earthdiagnostics/general/dailymean.py @@ -71,7 +71,7 @@ class DailyMean(Diagnostic): """ options_available = (DiagnosticDomainOption(), - DiagnosticVariableOption(), + DiagnosticVariableOption(diags.data_manager.config.var_manager), DiagnosticFrequencyOption(), DiagnosticOption('grid', '')) options = cls.process_options(options, options_available) diff --git a/earthdiagnostics/general/module.py b/earthdiagnostics/general/module.py index 3e6e48ba..f72aa5f5 100644 --- a/earthdiagnostics/general/module.py +++ b/earthdiagnostics/general/module.py @@ -66,9 +66,9 @@ class Module(Diagnostic): :return: """ options_available = (DiagnosticDomainOption(), - DiagnosticVariableOption('componentu'), - DiagnosticVariableOption('componentv'), - DiagnosticVariableOption('module'), + DiagnosticVariableOption(diags.data_manager.config.var_manager, 'componentu'), + DiagnosticVariableOption(diags.data_manager.config.var_manager, 'componentv'), + DiagnosticVariableOption(diags.data_manager.config.var_manager, 'module'), DiagnosticOption('grid', '')) options = cls.process_options(options, options_available) job_list = list() diff --git a/earthdiagnostics/general/monthlymean.py b/earthdiagnostics/general/monthlymean.py index 6753266d..dca5e730 100644 --- a/earthdiagnostics/general/monthlymean.py +++ b/earthdiagnostics/general/monthlymean.py @@ -69,7 +69,7 @@ class MonthlyMean(Diagnostic): """ options_available = (DiagnosticDomainOption(), - DiagnosticVariableOption(), + DiagnosticVariableOption(diags.data_manager.config.var_manager), DiagnosticFrequencyOption('frequency', Frequencies.daily), DiagnosticOption('grid', '')) options = cls.process_options(options, options_available) diff --git a/earthdiagnostics/general/relink.py b/earthdiagnostics/general/relink.py index 689aba87..60c69f4c 100644 --- a/earthdiagnostics/general/relink.py +++ b/earthdiagnostics/general/relink.py @@ -41,6 +41,7 @@ class Relink(Diagnostic): self.domain = domain self.move_old = move_old self.grid = grid + self.var_manager = data_manager.config.var_manager def __str__(self): return 'Relink output Startdate: {0.startdate} Member: {0.member} Chunk: {0.chunk} Move old: {0.move_old} ' \ @@ -63,7 +64,7 @@ class Relink(Diagnostic): :return: """ options_available = (DiagnosticDomainOption(), - DiagnosticVariableOption(), + DiagnosticVariableOption(diags.data_manager.config.var_manager), DiagnosticBoolOption('move_old', True), DiagnosticOption('grid', '')) options = cls.process_options(options, options_available) @@ -83,7 +84,7 @@ class Relink(Diagnostic): """ Runs the diagnostic """ - self.data_manager.link_file(self.domain, self.variable, VariableManager().get_variable(self.variable), + self.data_manager.link_file(self.domain, self.variable, self.var_manager.get_variable(self.variable), self.startdate, self.member, self.chunk, move_old=self.move_old, grid=self.grid) diff --git a/earthdiagnostics/general/rewrite.py b/earthdiagnostics/general/rewrite.py index 6b881716..2aa937ba 100644 --- a/earthdiagnostics/general/rewrite.py +++ b/earthdiagnostics/general/rewrite.py @@ -59,7 +59,7 @@ class Rewrite(Diagnostic): :return: """ options_available = (DiagnosticDomainOption(), - DiagnosticVariableOption(), + DiagnosticVariableOption(diags.data_manager.config.var_manager), DiagnosticOption('grid', '')) options = cls.process_options(options, options_available) job_list = list() diff --git a/earthdiagnostics/general/scale.py b/earthdiagnostics/general/scale.py index 25306047..116a978d 100644 --- a/earthdiagnostics/general/scale.py +++ b/earthdiagnostics/general/scale.py @@ -71,7 +71,7 @@ class Scale(Diagnostic): :return: """ options_available = (DiagnosticDomainOption(), - DiagnosticVariableOption(), + DiagnosticVariableOption(diags.data_manager.config.var_manager), DiagnosticFloatOption('value'), DiagnosticFloatOption('offset'), DiagnosticOption('grid', ''), diff --git a/earthdiagnostics/general/select_levels.py b/earthdiagnostics/general/select_levels.py index 39a01ca6..1d2fb9ca 100644 --- a/earthdiagnostics/general/select_levels.py +++ b/earthdiagnostics/general/select_levels.py @@ -66,7 +66,7 @@ class SelectLevels(Diagnostic): :return: """ options_available = (DiagnosticDomainOption(), - DiagnosticVariableListOption('variables'), + DiagnosticVariableListOption(diags.data_manager.config.var_manager, 'variables'), DiagnosticIntOption('first_level'), DiagnosticIntOption('last_level'), DiagnosticOption('grid', '')) diff --git a/earthdiagnostics/general/simplify_dimensions.py b/earthdiagnostics/general/simplify_dimensions.py index 9b1d83b3..579a5473 100644 --- a/earthdiagnostics/general/simplify_dimensions.py +++ b/earthdiagnostics/general/simplify_dimensions.py @@ -62,7 +62,7 @@ class SimplifyDimensions(Diagnostic): :return: """ options_available = (DiagnosticDomainOption(), - DiagnosticVariableListOption('variables'), + DiagnosticVariableListOption(diags.data_manager.config.var_manager, 'variables'), DiagnosticOption('grid', '')) options = cls.process_options(options, options_available) job_list = list() diff --git a/earthdiagnostics/general/verticalmeanmetersiris.py b/earthdiagnostics/general/verticalmeanmetersiris.py index b609318d..92de0bef 100644 --- a/earthdiagnostics/general/verticalmeanmetersiris.py +++ b/earthdiagnostics/general/verticalmeanmetersiris.py @@ -66,7 +66,7 @@ class VerticalMeanMetersIris(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticVariableOption(), + options_available = (DiagnosticVariableOption(diags.data_manager.config.var_manager), DiagnosticFloatOption('min_depth', -1), DiagnosticFloatOption('max_depth', -1), DiagnosticDomainOption(default_value=ModelingRealms.ocean)) diff --git a/earthdiagnostics/general/yearlymean.py b/earthdiagnostics/general/yearlymean.py index 517e844d..148f0ca2 100644 --- a/earthdiagnostics/general/yearlymean.py +++ b/earthdiagnostics/general/yearlymean.py @@ -71,7 +71,7 @@ class YearlyMean(Diagnostic): """ options_available = (DiagnosticDomainOption(), - DiagnosticVariableOption(), + DiagnosticVariableOption(diags.data_manager.config.var_manager), DiagnosticFrequencyOption(default_value=diags.config.frequency), DiagnosticOption('grid', '')) options = cls.process_options(options, options_available) diff --git a/earthdiagnostics/ocean/averagesection.py b/earthdiagnostics/ocean/averagesection.py index 18fafbb8..8ca8abb0 100644 --- a/earthdiagnostics/ocean/averagesection.py +++ b/earthdiagnostics/ocean/averagesection.py @@ -66,7 +66,7 @@ class AverageSection(Diagnostic): :return: """ options_available = (DiagnosticDomainOption(), - DiagnosticVariableOption(), + DiagnosticVariableOption(diags.data_manager.config.var_manager), DiagnosticIntOption('min_lon'), DiagnosticIntOption('max_lon'), DiagnosticIntOption('min_lat'), diff --git a/earthdiagnostics/ocean/cutsection.py b/earthdiagnostics/ocean/cutsection.py index e8c25a90..d4e5f89c 100644 --- a/earthdiagnostics/ocean/cutsection.py +++ b/earthdiagnostics/ocean/cutsection.py @@ -79,7 +79,7 @@ class CutSection(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticVariableOption(), + options_available = (DiagnosticVariableOption(diags.data_manager.config.var_manager), DiagnosticBoolOption('zonal'), DiagnosticIntOption('value'), DiagnosticDomainOption(default_value=ModelingRealms.ocean)) diff --git a/earthdiagnostics/ocean/interpolate.py b/earthdiagnostics/ocean/interpolate.py index e58d89e7..ed64f4be 100644 --- a/earthdiagnostics/ocean/interpolate.py +++ b/earthdiagnostics/ocean/interpolate.py @@ -84,7 +84,7 @@ class Interpolate(Diagnostic): :return: """ options_available = (DiagnosticOption('target_grid'), - DiagnosticVariableListOption('variable'), + DiagnosticVariableListOption(diags.data_manager.config.var_manager, 'variable'), DiagnosticDomainOption(default_value=ModelingRealms.ocean), DiagnosticBoolOption('invert_lat', False), DiagnosticOption('original_grid', '')) diff --git a/earthdiagnostics/ocean/interpolatecdo.py b/earthdiagnostics/ocean/interpolatecdo.py index b8eaac55..eefa4dc7 100644 --- a/earthdiagnostics/ocean/interpolatecdo.py +++ b/earthdiagnostics/ocean/interpolatecdo.py @@ -82,7 +82,7 @@ class InterpolateCDO(Diagnostic): :return: """ options_available = (DiagnosticDomainOption(default_value=ModelingRealms.ocean), - DiagnosticVariableListOption('variables'), + DiagnosticVariableListOption(diags.data_manager.config.var_manager, 'variables'), DiagnosticOption('target_grid', diags.config.experiment.atmos_grid.lower()), DiagnosticChoiceOption('method', InterpolateCDO.METHODS, InterpolateCDO.BILINEAR), DiagnosticBoolOption('mask_oceans', True), diff --git a/earthdiagnostics/ocean/mask_land.py b/earthdiagnostics/ocean/mask_land.py index 71c54007..a7af9aaa 100644 --- a/earthdiagnostics/ocean/mask_land.py +++ b/earthdiagnostics/ocean/mask_land.py @@ -56,7 +56,7 @@ class MaskLand(Diagnostic): :return: """ options_available = (DiagnosticDomainOption('domain'), - DiagnosticVariableListOption('variables'), + DiagnosticVariableListOption(diags.data_manager.config.var_manager, 'variables'), DiagnosticChoiceOption('cell', ('t', 'u', 'v', 'f', 'w'), 't'), DiagnosticOption('grid', '')) options = cls.process_options(options, options_available) diff --git a/earthdiagnostics/ocean/regionmean.py b/earthdiagnostics/ocean/regionmean.py index f4a0c02d..038f9f98 100644 --- a/earthdiagnostics/ocean/regionmean.py +++ b/earthdiagnostics/ocean/regionmean.py @@ -74,7 +74,7 @@ class RegionMean(Diagnostic): :return: """ options_available = (DiagnosticDomainOption(), - DiagnosticVariableOption(), + DiagnosticVariableOption(diags.data_manager.config.var_manager), DiagnosticOption('grid_point', 'T'), DiagnosticBasinOption('basin', Basins().Global), DiagnosticIntOption('min_depth', 0), diff --git a/earthdiagnostics/ocean/rotation.py b/earthdiagnostics/ocean/rotation.py index 54b794ce..bdd6132c 100644 --- a/earthdiagnostics/ocean/rotation.py +++ b/earthdiagnostics/ocean/rotation.py @@ -63,8 +63,8 @@ class Rotation(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticVariableOption('variableu'), - DiagnosticVariableOption('variablev'), + options_available = (DiagnosticVariableOption(diags.data_manager.config.var_manager, 'variableu'), + DiagnosticVariableOption(diags.data_manager.config.var_manager, 'variablev'), DiagnosticDomainOption(default_value=ModelingRealms.ocean), DiagnosticOption('executable', '/home/Earth/jvegas/pyCharm/cfutools/interpolation/rotateUVorca')) diff --git a/earthdiagnostics/ocean/verticalgradient.py b/earthdiagnostics/ocean/verticalgradient.py index 1753e7f2..9cd9ab61 100644 --- a/earthdiagnostics/ocean/verticalgradient.py +++ b/earthdiagnostics/ocean/verticalgradient.py @@ -63,7 +63,7 @@ class VerticalGradient(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticVariableOption(), + options_available = (DiagnosticVariableOption(diags.data_manager.config.var_manager), DiagnosticIntOption('upper_level', 1), DiagnosticIntOption('low_level', 2)) options = cls.process_options(options, options_available) diff --git a/earthdiagnostics/ocean/verticalmean.py b/earthdiagnostics/ocean/verticalmean.py index c4ebd7fd..21bd4a88 100644 --- a/earthdiagnostics/ocean/verticalmean.py +++ b/earthdiagnostics/ocean/verticalmean.py @@ -64,7 +64,7 @@ class VerticalMean(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticVariableOption(), + options_available = (DiagnosticVariableOption(diags.data_manager.config.var_manager), DiagnosticIntOption('min_depth', -1), DiagnosticIntOption('max_depth', -1)) options = cls.process_options(options, options_available) diff --git a/earthdiagnostics/ocean/verticalmeanmeters.py b/earthdiagnostics/ocean/verticalmeanmeters.py index d3141af5..8951f493 100644 --- a/earthdiagnostics/ocean/verticalmeanmeters.py +++ b/earthdiagnostics/ocean/verticalmeanmeters.py @@ -64,7 +64,7 @@ class VerticalMeanMeters(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticVariableOption(), + options_available = (DiagnosticVariableOption(diags.data_manager.config.var_manager), DiagnosticFloatOption('min_depth', -1), DiagnosticFloatOption('max_depth', -1), DiagnosticDomainOption(default_value=ModelingRealms.ocean), diff --git a/earthdiagnostics/statistics/climatologicalpercentile.py b/earthdiagnostics/statistics/climatologicalpercentile.py index 85760a0e..bc953248 100644 --- a/earthdiagnostics/statistics/climatologicalpercentile.py +++ b/earthdiagnostics/statistics/climatologicalpercentile.py @@ -66,7 +66,7 @@ class ClimatologicalPercentile(Diagnostic): :return: """ options_available = (DiagnosticDomainOption(), - DiagnosticVariableOption(), + DiagnosticVariableOption(diags.data_manager.config.var_manager), DiagnosticIntOption('start_year'), DiagnosticIntOption('end_year'), DiagnosticListIntOption('forecast_month'), diff --git a/earthdiagnostics/statistics/discretize.py b/earthdiagnostics/statistics/discretize.py index 8d07af23..0f1dbd55 100644 --- a/earthdiagnostics/statistics/discretize.py +++ b/earthdiagnostics/statistics/discretize.py @@ -101,7 +101,7 @@ class Discretize(Diagnostic): :return: """ options_available = (DiagnosticDomainOption(), - DiagnosticVariableOption(), + DiagnosticVariableOption(diags.data_manager.config.var_manager), DiagnosticIntOption('bins', 2000), DiagnosticFloatOption('min_value', float('nan')), DiagnosticFloatOption('max_value', float('nan')), diff --git a/earthdiagnostics/variable.py b/earthdiagnostics/variable.py index 4d34edf1..9a623469 100644 --- a/earthdiagnostics/variable.py +++ b/earthdiagnostics/variable.py @@ -18,8 +18,6 @@ class VariableJsonException(Exception): class VariableManager(object): - __metaclass__ = SingletonType - def __init__(self): self._cmor_tables_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'cmor_tables') self._aliases_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'variable_alias') diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index 55cea8db..4c0eb11b 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -54,8 +54,8 @@ class WorkManager(object): self.threads = self.config.max_cores Log.info('Using {0} threads', self.threads) - self.downloader = ThreadPoolExecutor(1) - self.uploader = ThreadPoolExecutor(1) + self.downloader = ThreadPoolExecutor(self.config.parallel_downloads) + self.uploader = ThreadPoolExecutor(self.config.parallel_uploads) self.executor = ThreadPoolExecutor(self.threads) for job in self.jobs: diff --git a/test/unit/general/test_attribute.py b/test/unit/general/test_attribute.py index b857ec9c..da6d4146 100644 --- a/test/unit/general/test_attribute.py +++ b/test/unit/general/test_attribute.py @@ -3,7 +3,6 @@ from unittest import TestCase from earthdiagnostics.diagnostic import DiagnosticVariableOption from earthdiagnostics.box import Box -from earthdiagnostics.frequency import Frequencies from earthdiagnostics.general.attribute import Attribute from mock import Mock, patch diff --git a/test/unit/test_config.py b/test/unit/test_config.py new file mode 100644 index 00000000..0e24a876 --- /dev/null +++ b/test/unit/test_config.py @@ -0,0 +1,117 @@ +# coding=utf-8 +from unittest import TestCase + +from earthdiagnostics.config import CMORConfig +from earthdiagnostics.modelingrealm import ModelingRealms + + +class VariableMock(object): + def __init__(self): + self.domain = ModelingRealms.ocean + self.short_name = 'tos' + + def __eq__(self, other): + return self.domain == other.domain and self.short_name == other.short_name + + +class VariableManagerMock(object): + def get_variable(self, alias, silent=False): + if alias == 'bad': + return None + var = VariableMock() + var.short_name = alias + return var + + +class ParserMock(object): + + def __init__(self): + self._values = {} + + def add_value(self, section, var, value): + self._values[self.get_var_string(section, var)] = value + + def get_var_string(self, section, var): + return '{0}:{1}'.format(section, var) + + def get_value(self, section, var, default): + try: + return self._values[self.get_var_string(section, var)] + except KeyError: + return default + + def get_bool_option(self, section, var, default): + return self.get_value(section, var, default) + + def get_int_option(self, section, var, default): + return self.get_value(section, var, default) + + def get_int_list_option(self, section, var, default=list(), separator=' '): + try: + return [int(val) for val in self._values[self.get_var_string(section, var)].split(separator)] + except KeyError: + return default + + def get_option(self, section, var, default): + return self.get_value(section, var, default) + + +class TestCMORConfig(TestCase): + + def setUp(self): + self.mock_parser = ParserMock() + self.var_manager = VariableManagerMock() + + def test_basic_config(self): + config = CMORConfig(self.mock_parser, self.var_manager) + self.assertEquals(config.ocean, True) + self.assertEquals(config.atmosphere, True) + self.assertEquals(config.force, False) + self.assertEquals(config.force_untar, False) + self.assertEquals(config.use_grib, True) + self.assertEquals(config.activity, 'CMIP') + self.assertEquals(config.associated_experiment, 'to be filled') + self.assertEquals(config.associated_model, 'to be filled') + self.assertEquals(config.initialization_description, 'to be filled') + self.assertEquals(config.initialization_method, '1') + self.assertEquals(config.initialization_number, 1) + self.assertEquals(config.source, 'to be filled') + self.assertEquals(config.version, '') + self.assertEquals(config.physics_version, '1') + self.assertEquals(config.physics_description, 'to be filled') + self.assertEquals(config.filter_files, '') + self.assertEquals(config.default_atmos_grid, 'gr') + self.assertEquals(config.default_ocean_grid, 'gn') + + def test_cmorize(self): + config = CMORConfig(self.mock_parser, self.var_manager) + self.assertTrue(config.cmorize(VariableMock)) + + def test_not_cmorize(self): + self.mock_parser.add_value('CMOR', 'VARIABLE_LIST', 'ocean:tos') + + config = CMORConfig(self.mock_parser, self.var_manager) + self.assertTrue(config.cmorize(VariableMock())) + + tas_mock = VariableMock() + tas_mock.domain = ModelingRealms.atmos + tas_mock.short_name = 'tas' + self.assertFalse(config.cmorize(tas_mock)) + + thetao_mock = VariableMock() + thetao_mock.domain = ModelingRealms.ocean + thetao_mock.short_name = 'thetao' + self.assertFalse(config.cmorize(thetao_mock)) + + def test_cmorization_chunk(self): + config = CMORConfig(self.mock_parser, self.var_manager) + self.assertTrue(config.chunk_cmorization_requested(1)) + + def test_cmorize_only_some_chunks(self): + self.mock_parser.add_value('CMOR', 'CHUNKS', '3 5') + config = CMORConfig(self.mock_parser, self.var_manager) + self.assertTrue(config.chunk_cmorization_requested(3)) + self.assertTrue(config.chunk_cmorization_requested(5)) + self.assertFalse(config.chunk_cmorization_requested(1)) + self.assertFalse(config.chunk_cmorization_requested(4)) + self.assertFalse(config.chunk_cmorization_requested(6)) diff --git a/test/unit/test_diagnostic.py b/test/unit/test_diagnostic.py index 93cbe9b1..ae9921fa 100644 --- a/test/unit/test_diagnostic.py +++ b/test/unit/test_diagnostic.py @@ -250,46 +250,39 @@ class TestDiagnosticVariableOption(TestCase): mock.short_name = name return mock - @patch('earthdiagnostics.variable.VariableManager.get_variable') - def test_parse(self, get_variable_mock): - get_variable_mock.return_value = self.get_var_mock('var1') + def test_parse(self): + var_manager_mock = Mock() + var_manager_mock.get_variable.return_value = self.get_var_mock('var1') - diag = DiagnosticVariableOption() + diag = DiagnosticVariableOption(var_manager_mock) self.assertEqual('var1', diag.parse('var1')) - @patch('earthdiagnostics.variable.VariableManager.get_variable') - def test_parse(self, get_variable_mock): - get_variable_mock.return_value = self.get_var_mock('var1') + def test_not_recognized(self): + var_manager_mock = Mock() + var_manager_mock.get_variable.return_value = None - diag = DiagnosticVariableOption() - self.assertEqual('var1', diag.parse('var1')) - - @patch('earthdiagnostics.variable.VariableManager.get_variable') - def test_not_recognized(self, get_variable_mock): - get_variable_mock.return_value = None - - diag = DiagnosticVariableOption() + diag = DiagnosticVariableOption(var_manager_mock) self.assertEqual('var1', diag.parse('var1')) class TestDiagnosticVariableListOption(TestCase): - @patch('earthdiagnostics.variable.VariableManager.get_variable') - def test_parse_multiple(self, get_variable_mock): - get_variable_mock.side_effect = (self.get_var_mock('var1'), self.get_var_mock('var2')) - diag = DiagnosticVariableListOption('variables') + def test_parse_multiple(self): + var_manager_mock = Mock() + var_manager_mock.get_variable.side_effect = (self.get_var_mock('var1'), self.get_var_mock('var2')) + diag = DiagnosticVariableListOption(var_manager_mock, 'variables') self.assertEqual(['var1', 'var2'], diag.parse('var1-var2')) - @patch('earthdiagnostics.variable.VariableManager.get_variable') - def test_parse_one(self, get_variable_mock): - get_variable_mock.return_value = self.get_var_mock('var1') - diag = DiagnosticVariableListOption('variables') + def test_parse_one(self): + var_manager_mock = Mock() + var_manager_mock.get_variable.return_value = self.get_var_mock('var1') + diag = DiagnosticVariableListOption(var_manager_mock, 'variables') self.assertEqual(['var1'], diag.parse('var1')) - @patch('earthdiagnostics.variable.VariableManager.get_variable') - def test_not_recognized(self, get_variable_mock): - get_variable_mock.return_value = None - diag = DiagnosticVariableListOption('variables') + def test_not_recognized(self): + var_manager_mock = Mock() + var_manager_mock.get_variable.return_value = None + diag = DiagnosticVariableListOption(var_manager_mock, 'variables') self.assertEqual(['var1'], diag.parse('var1')) def get_var_mock(self, name): @@ -300,8 +293,7 @@ class TestDiagnosticVariableListOption(TestCase): class TestDiagnostic(TestCase): - @classmethod - def setUpClass(cls): + def setUp(self): class MockDiag(Diagnostic): @classmethod def generate_jobs(cls, diags, options): @@ -316,7 +308,7 @@ class TestDiagnostic(TestCase): def compute(self): pass - TestDiagnostic.MockDiag = MockDiag + self.MockDiag = MockDiag def test_str(self): self.assertEqual(str(Diagnostic(None)), 'Developer must override base class __str__ method') @@ -350,16 +342,16 @@ class TestDiagnostic(TestCase): Diagnostic.register(TestDiagnostic) with self.assertRaises(ValueError): - Diagnostic.register(TestDiagnostic.MockDiag) + Diagnostic.register(self.MockDiag) - TestDiagnostic.MockDiag.alias = 'mock' - Diagnostic.register(TestDiagnostic.MockDiag) + self.MockDiag.alias = 'mock' + Diagnostic.register(self.MockDiag) def test_get_diagnostic(self): self.assertIsNone(Diagnostic.get_diagnostic('none')) - TestDiagnostic.MockDiag.alias = 'mock' - Diagnostic.register(TestDiagnostic.MockDiag) - self.assertIs(TestDiagnostic.MockDiag, Diagnostic.get_diagnostic('mock')) + self.MockDiag.alias = 'mock' + Diagnostic.register(self.MockDiag) + self.assertIs(self.MockDiag, Diagnostic.get_diagnostic('mock')) def test_generate_jobs(self): with self.assertRaises(NotImplementedError): -- GitLab From ef91fddc5f1f1b4898898ee5513f35ac5a9013a6 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 20 Sep 2017 17:05:31 +0200 Subject: [PATCH 75/82] Improved days over logging --- diags.conf | 3 +- earthdiagnostics/config.py | 17 +- .../statistics/daysoverpercentile.py | 34 ++-- test/unit/test_config.py | 157 +++++++++++++++++- 4 files changed, 187 insertions(+), 24 deletions(-) diff --git a/diags.conf b/diags.conf index 31d3bd91..0c550965 100644 --- a/diags.conf +++ b/diags.conf @@ -16,8 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = discretize,atmos,sfcWind,2000,0,40 -# climpercent,atmos,sfcWind,1991,1992,11 +DIAGS = climpercent,atmos,sfcWind,1981,2012,11 # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. diff --git a/earthdiagnostics/config.py b/earthdiagnostics/config.py index 2e4d54f9..a6a77f4d 100644 --- a/earthdiagnostics/config.py +++ b/earthdiagnostics/config.py @@ -2,15 +2,19 @@ import os import six -from bscearth.utils.log import Log -from bscearth.utils.date import parse_date, chunk_start_date, chunk_end_date, date2str from bscearth.utils.config_parser import ConfigParser +from bscearth.utils.date import parse_date, chunk_start_date, chunk_end_date, date2str +from bscearth.utils.log import Log from earthdiagnostics.frequency import Frequency, Frequencies from earthdiagnostics.variable import VariableManager from modelingrealm import ModelingRealm +class ConfigException(Exception): + pass + + class Config(object): """ Class to read and manage the configuration @@ -103,7 +107,7 @@ class Config(object): self.scratch_dir = os.path.join(self.scratch_dir, 'diags', self.experiment.expid) - self.cmor = CMORConfig(parser) + self.cmor = CMORConfig(parser, self.var_manager) self.thredds = THREDDSConfig(parser) self.report = ReportConfig(parser) @@ -149,12 +153,15 @@ class CMORConfig(object): splitted = domain_var.split(':') cmor_var = self.var_manager.get_variable(splitted[1], silent=True) if not cmor_var: + Log.warning('Variable {0} not recognized. It will not be cmorized', domain_var) continue if ModelingRealm(splitted[0]) != cmor_var.domain: Log.warning('Domain {0} for variable {1} is not correct: is {2}', splitted[0], cmor_var.short_name, cmor_var.domain) - return + continue self._variable_list.append('{0.domain}:{0.short_name}'.format(cmor_var)) + if len(self._variable_list) == 0: + raise ConfigException('Variable list value is specified, but no variables were found') else: self._variable_list = None @@ -223,7 +230,7 @@ class CMORConfig(object): return self._var_daily elif frequency == Frequencies.monthly: return self._var_monthly - raise Exception('Frequency not recognized: {0}'.format(frequency)) + raise ValueError('Frequency not recognized: {0}'.format(frequency)) def get_levels(self, frequency, variable): return self.get_variables(frequency)[variable] diff --git a/earthdiagnostics/statistics/daysoverpercentile.py b/earthdiagnostics/statistics/daysoverpercentile.py index aaa0065d..96c7acba 100644 --- a/earthdiagnostics/statistics/daysoverpercentile.py +++ b/earthdiagnostics/statistics/daysoverpercentile.py @@ -1,17 +1,17 @@ # coding=utf-8 -from bscearth.utils.date import parse_date, add_months - -from earthdiagnostics.statistics.climatologicalpercentile import ClimatologicalPercentile -from earthdiagnostics.diagnostic import * -from earthdiagnostics.frequency import Frequencies import iris -import iris.exceptions -import iris.coord_categorisation -from iris.time import PartialDateTime import iris.analysis +import iris.coord_categorisation import iris.coords +import iris.exceptions import numpy as np +from bscearth.utils.date import parse_date, add_months +from bscearth.utils.log import Log +from iris.time import PartialDateTime +from earthdiagnostics.diagnostic import * +from earthdiagnostics.frequency import Frequencies +from earthdiagnostics.statistics.climatologicalpercentile import ClimatologicalPercentile from earthdiagnostics.utils import Utils, TempFile @@ -28,15 +28,14 @@ class DaysOverPercentile(Diagnostic): alias = 'daysover' "Diagnostic alias for the configuration file" - def __init__(self, data_manager, domain, variable, start_year, end_year, year_to_compute, forecast_month): + def __init__(self, data_manager, domain, variable, start_year, end_year, startdate, forecast_month): Diagnostic.__init__(self, data_manager) self.variable = variable self.domain = domain self.start_year = start_year self.end_year = end_year - self.year_to_compute = year_to_compute self.forecast_month = forecast_month - self.startdate = '{0}{1:02}01'.format(self.year_to_compute, self.forecast_month) + self.startdate = startdate def __eq__(self, other): return self.startdate == other.startdate and self.domain == other.domain and \ @@ -66,13 +65,11 @@ class DaysOverPercentile(Diagnostic): options = cls.process_options(options, options_available) job_list = list() - year = options['start_year'] - while year <= options['end_year']: + for startdate in diags.config.experiment.startdates: for forecast_month in options['forecast_month']: job_list.append(DaysOverPercentile(diags.data_manager, options['domain'], options['variable'], options['start_year'], options['end_year'], - year, forecast_month)) - year += 1 + startdate, forecast_month)) return job_list def request_data(self): @@ -150,6 +147,7 @@ class DaysOverPercentile(Diagnostic): 'climatology'.format(self) for leadtime in leadtimes.keys(): + Log.debug('Computing startdate {0} leadtime {1}', self.startdate, leadtime) leadtime_slice = var.extract(iris.Constraint(leadtime=leadtime)) if len(percentiles.coords('leadtime')) > 0: percentiles_leadtime = percentiles.extract(iris.Constraint(leadtime=leadtime)) @@ -175,6 +173,7 @@ class DaysOverPercentile(Diagnostic): time_coord, var_days_below, long_name_days_below) results_below[percentile].append(result) + Log.debug('Saving percentiles startdate {0}', self.startdate) for perc in ClimatologicalPercentile.Percentiles: iris.FUTURE.netcdf_no_unlimited = True temp = TempFile.get() @@ -189,6 +188,11 @@ class DaysOverPercentile(Diagnostic): must_exist=False, rename_dimension=True) self.days_below_file[perc].set_local_file(temp, rename_var='daysbelow') + del self.days_over_file + del self.days_below_file + del self.lat_coord + del self.lon_coord + def create_results_cube(self, days_over, percentile, realization_coord, time_coord, var_name, long_name): result = iris.cube.Cube(days_over.astype(np.float32), var_name=var_name, long_name=long_name, units=1.0) diff --git a/test/unit/test_config.py b/test/unit/test_config.py index 0e24a876..64ed67b2 100644 --- a/test/unit/test_config.py +++ b/test/unit/test_config.py @@ -1,7 +1,8 @@ # coding=utf-8 from unittest import TestCase -from earthdiagnostics.config import CMORConfig +from earthdiagnostics.config import CMORConfig, ConfigException, THREDDSConfig, ReportConfig +from earthdiagnostics.frequency import Frequencies from earthdiagnostics.modelingrealm import ModelingRealms @@ -43,6 +44,9 @@ class ParserMock(object): def get_bool_option(self, section, var, default): return self.get_value(section, var, default) + def get_path_option(self, section, var, default): + return self.get_value(section, var, default) + def get_int_option(self, section, var, default): return self.get_value(section, var, default) @@ -85,7 +89,32 @@ class TestCMORConfig(TestCase): def test_cmorize(self): config = CMORConfig(self.mock_parser, self.var_manager) - self.assertTrue(config.cmorize(VariableMock)) + self.assertTrue(config.cmorize(VariableMock())) + self.assertTrue(config.cmorize(None)) + + def test_cmorize_list(self): + self.mock_parser.add_value('CMOR', 'VARIABLE_LIST', 'ocean:thetao ocean:tos') + + config = CMORConfig(self.mock_parser, self.var_manager) + self.assertTrue(config.cmorize(VariableMock())) + + thetao_mock = VariableMock() + thetao_mock.domain = ModelingRealms.ocean + thetao_mock.short_name = 'thetao' + self.assertTrue(config.cmorize(thetao_mock)) + + def test_bad_list(self): + self.mock_parser.add_value('CMOR', 'VARIABLE_LIST', '#ocean:tos') + with self.assertRaises(ConfigException): + CMORConfig(self.mock_parser, self.var_manager) + + self.mock_parser.add_value('CMOR', 'VARIABLE_LIST', 'atmos:tos') + with self.assertRaises(ConfigException): + CMORConfig(self.mock_parser, self.var_manager) + + self.mock_parser.add_value('CMOR', 'VARIABLE_LIST', 'ocean:bad') + with self.assertRaises(ConfigException): + CMORConfig(self.mock_parser, self.var_manager) def test_not_cmorize(self): self.mock_parser.add_value('CMOR', 'VARIABLE_LIST', 'ocean:tos') @@ -93,6 +122,8 @@ class TestCMORConfig(TestCase): config = CMORConfig(self.mock_parser, self.var_manager) self.assertTrue(config.cmorize(VariableMock())) + self.assertFalse(config.cmorize(None)) + tas_mock = VariableMock() tas_mock.domain = ModelingRealms.atmos tas_mock.short_name = 'tas' @@ -103,6 +134,21 @@ class TestCMORConfig(TestCase): thetao_mock.short_name = 'thetao' self.assertFalse(config.cmorize(thetao_mock)) + def test_comment(self): + self.mock_parser.add_value('CMOR', 'VARIABLE_LIST', 'ocean:tos #ocean:thetao ') + + config = CMORConfig(self.mock_parser, self.var_manager) + self.assertTrue(config.cmorize(VariableMock())) + + thetao_mock = VariableMock() + thetao_mock.domain = ModelingRealms.ocean + thetao_mock.short_name = 'thetao' + self.assertFalse(config.cmorize(thetao_mock)) + + self.mock_parser.add_value('CMOR', 'VARIABLE_LIST', '#ocean:tos ocean:thetao ') + with self.assertRaises(ConfigException): + CMORConfig(self.mock_parser, self.var_manager) + def test_cmorization_chunk(self): config = CMORConfig(self.mock_parser, self.var_manager) self.assertTrue(config.chunk_cmorization_requested(1)) @@ -115,3 +161,110 @@ class TestCMORConfig(TestCase): self.assertFalse(config.chunk_cmorization_requested(1)) self.assertFalse(config.chunk_cmorization_requested(4)) self.assertFalse(config.chunk_cmorization_requested(6)) + + def test_any_required(self): + config = CMORConfig(self.mock_parser, self.var_manager) + self.assertTrue(config.any_required(['tos'])) + + self.mock_parser.add_value('CMOR', 'VARIABLE_LIST', 'ocean:tos ocean:thetao') + config = CMORConfig(self.mock_parser, self.var_manager) + self.assertTrue(config.any_required(['tos', 'thetao', 'tas'])) + self.assertTrue(config.any_required(['tos', 'tas'])) + self.assertTrue(config.any_required(['thetao'])) + + self.assertFalse(config.any_required(['tas'])) + + def test_hourly_vars(self): + config = CMORConfig(self.mock_parser, self.var_manager) + self.assertEquals(config.get_variables(Frequencies.six_hourly), {}) + + self.mock_parser.add_value('CMOR', 'ATMOS_HOURLY_VARS', '128,129:1,130:1-2,131:1:10,132:0:10:5') + config = CMORConfig(self.mock_parser, self.var_manager) + self.assertEquals(config.get_variables(Frequencies.six_hourly), {128: None, + 129: '1', + 130: '1,2', + 131: '1,2,3,4,5,6,7,8,9', + 132: '0,5'}) + + self.assertEquals(config.get_levels(Frequencies.six_hourly, 128), None) + self.assertEquals(config.get_levels(Frequencies.six_hourly, 129), '1') + self.assertEquals(config.get_levels(Frequencies.six_hourly, 130), '1,2') + self.assertEquals(config.get_levels(Frequencies.six_hourly, 131), '1,2,3,4,5,6,7,8,9',) + self.assertEquals(config.get_levels(Frequencies.six_hourly, 132), '0,5') + + def test_daily_vars(self): + config = CMORConfig(self.mock_parser, self.var_manager) + self.assertEquals(config.get_variables(Frequencies.daily), {}) + + self.mock_parser.add_value('CMOR', 'ATMOS_DAILY_VARS', '128,129:1,130:1-2,131:1:10,132:0:10:5') + config = CMORConfig(self.mock_parser, self.var_manager) + self.assertEquals(config.get_variables(Frequencies.daily), {128: None, + 129: '1', + 130: '1,2', + 131: '1,2,3,4,5,6,7,8,9', + 132: '0,5'}) + + self.assertEquals(config.get_levels(Frequencies.daily, 128), None) + self.assertEquals(config.get_levels(Frequencies.daily, 129), '1') + self.assertEquals(config.get_levels(Frequencies.daily, 130), '1,2') + self.assertEquals(config.get_levels(Frequencies.daily, 131), '1,2,3,4,5,6,7,8,9',) + self.assertEquals(config.get_levels(Frequencies.daily, 132), '0,5') + + def test_monthly_vars(self): + config = CMORConfig(self.mock_parser, self.var_manager) + self.assertEquals(config.get_variables(Frequencies.monthly), {}) + + self.mock_parser.add_value('CMOR', 'ATMOS_MONTHLY_VARS', '128,129:1,130:1-2,131:1:10,132:0:10:5') + config = CMORConfig(self.mock_parser, self.var_manager) + self.assertEquals(config.get_variables(Frequencies.monthly), {128: None, + 129: '1', + 130: '1,2', + 131: '1,2,3,4,5,6,7,8,9', + 132: '0,5'}) + + self.assertEquals(config.get_levels(Frequencies.monthly, 128), None) + self.assertEquals(config.get_levels(Frequencies.monthly, 129), '1') + self.assertEquals(config.get_levels(Frequencies.monthly, 130), '1,2') + self.assertEquals(config.get_levels(Frequencies.monthly, 131), '1,2,3,4,5,6,7,8,9',) + self.assertEquals(config.get_levels(Frequencies.monthly, 132), '0,5') + + def test_bad_frequency_vars(self): + config = CMORConfig(self.mock_parser, self.var_manager) + with self.assertRaises(ValueError): + self.assertEquals(config.get_variables(Frequencies.climatology), {}) + + +class TestTHREDDSConfig(TestCase): + + def setUp(self): + self.mock_parser = ParserMock() + + def test_basic_config(self): + config = THREDDSConfig(self.mock_parser) + self.assertEquals(config.server_url, '') + + def test_url(self): + self.mock_parser.add_value('THREDDS', 'SERVER_URL', 'test_url') + config = THREDDSConfig(self.mock_parser) + self.assertEquals(config.server_url, 'test_url') + + +class TestReportConfig(TestCase): + + def setUp(self): + self.mock_parser = ParserMock() + + def test_basic_config(self): + config = ReportConfig(self.mock_parser) + self.assertEquals(config.path, '') + self.assertEquals(config.maximum_priority, 10) + + def test_path(self): + self.mock_parser.add_value('REPORT', 'PATH', 'new_path') + config = ReportConfig(self.mock_parser) + self.assertEquals(config.path, 'new_path') + + def test_priority(self): + self.mock_parser.add_value('REPORT', 'MAXIMUM_PRIORITY', 3) + config = ReportConfig(self.mock_parser) + self.assertEquals(config.maximum_priority, 3) -- GitLab From f3782d5d8a8a94b7d8a48af631536ffe053ad2ef Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 21 Sep 2017 18:35:16 +0200 Subject: [PATCH 76/82] Added custom downloader to prioritize better the downloads --- diags.conf | 4 +- earthdiagnostics/datafile.py | 19 ++++- earthdiagnostics/diagnostic.py | 7 +- earthdiagnostics/earthdiags.py | 20 +++-- .../statistics/daysoverpercentile.py | 5 +- earthdiagnostics/work_manager.py | 76 +++++++++++++++++-- 6 files changed, 100 insertions(+), 31 deletions(-) diff --git a/diags.conf b/diags.conf index 0c550965..36683e33 100644 --- a/diags.conf +++ b/diags.conf @@ -16,7 +16,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = climpercent,atmos,sfcWind,1981,2012,11 +DIAGS = daysover,atmos,sfcWind,1981,2012,11 # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. @@ -87,7 +87,7 @@ OCEAN_TIMESTEP = 6 # CHUNK_SIZE is the size of each data file, given in months # CHUNKS is the number of chunks. You can specify less chunks than present on the experiment EXPID = testing_erainterim -STARTDATES = 20001101 20011101 20021101 20031101 20041101 20051101 20061101 20071101 20081101 20091101 20101101 20111101 20121101 20131101 20141101 20151101 20161101 +STARTDATES = 20131101 20141101 20151101 # STARTDATES = 19840101 19850101 MEMBERS = 0 MEMBER_DIGITS = 1 diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index d37877ef..da44f5f8 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -1,15 +1,15 @@ # coding: utf-8 import csv +import os import shutil from datetime import datetime import numpy as np -import os from bscearth.utils.log import Log +from earthdiagnostics.modelingrealm import ModelingRealms from earthdiagnostics.utils import Utils, TempFile from publisher import Publisher -from earthdiagnostics.modelingrealm import ModelingRealms from variable_type import VariableType @@ -59,6 +59,12 @@ class DataFile(Publisher): super(DataFile, self).unsubscribe(who) self._clean_local() + @property + def size(self): + if self.local_status == LocalStatus.READY: + os.path.getsize(self.local_file) + return None + def _clean_local(self): if self.local_status != LocalStatus.READY or len(self.suscribers) > 0 or self.upload_required(): return @@ -72,7 +78,6 @@ class DataFile(Publisher): return self.local_status == LocalStatus.READY and self.storage_status == StorageStatus.PENDING def download_required(self): - if not self.local_status == LocalStatus.PENDING: return False @@ -459,4 +464,12 @@ class NetCDFFile(DataFile): except Exception as ex: Log.error('Can not create link to {1}: {0}'.format(ex, self.remote_file)) + @property + def size(self): + if self.local_status == LocalStatus.READY: + return os.path.getsize(self.local_file) + if self.storage_status == StorageStatus.READY: + return os.path.getsize(self.remote_file) + return None + diff --git a/earthdiagnostics/diagnostic.py b/earthdiagnostics/diagnostic.py index 5b839839..4e90373d 100644 --- a/earthdiagnostics/diagnostic.py +++ b/earthdiagnostics/diagnostic.py @@ -4,9 +4,8 @@ import datetime from datafile import StorageStatus, LocalStatus from earthdiagnostics.constants import Basins, Basin from earthdiagnostics.frequency import Frequency -from earthdiagnostics.variable_type import VariableType from earthdiagnostics.modelingrealm import ModelingRealms -from earthdiagnostics.variable import VariableManager +from earthdiagnostics.variable_type import VariableType from publisher import Publisher @@ -238,8 +237,10 @@ class Diagnostic(Publisher): request.unsubscribe(self) def all_requests_in_storage(self): - return not any(request.storage_status != StorageStatus.READY for request in self._requests) + return self.pending_requests() == 0 + def pending_requests(self): + return len([request.storage_status != StorageStatus.READY for request in self._requests]) class DiagnosticOption(object): diff --git a/earthdiagnostics/earthdiags.py b/earthdiagnostics/earthdiags.py index 4eb31b5f..4b7f1384 100755 --- a/earthdiagnostics/earthdiags.py +++ b/earthdiagnostics/earthdiags.py @@ -1,26 +1,24 @@ #!/usr/bin/env python # coding=utf-8 import argparse +import os import shutil -import pkg_resources +import tempfile +from distutils.spawn import find_executable +import bscearth.utils.path import netCDF4 -import os +import pkg_resources from bscearth.utils.date import * -import bscearth.utils.path -import tempfile -from earthdiagnostics.constants import Basins -from earthdiagnostics.config import Config +from earthdiagnostics import cdftools from earthdiagnostics.cmormanager import CMORManager -from earthdiagnostics.threddsmanager import THREDDSManager +from earthdiagnostics.config import Config +from earthdiagnostics.constants import Basins from earthdiagnostics.obsreconmanager import ObsReconManager -from earthdiagnostics import cdftools +from earthdiagnostics.threddsmanager import THREDDSManager from earthdiagnostics.utils import TempFile, Utils - -from earthdiagnostics.variable import VariableManager from work_manager import WorkManager -from distutils.spawn import find_executable class EarthDiags(object): diff --git a/earthdiagnostics/statistics/daysoverpercentile.py b/earthdiagnostics/statistics/daysoverpercentile.py index 96c7acba..ea7e36d7 100644 --- a/earthdiagnostics/statistics/daysoverpercentile.py +++ b/earthdiagnostics/statistics/daysoverpercentile.py @@ -99,6 +99,7 @@ class DaysOverPercentile(Diagnostic): """ Runs the diagnostic """ + raise Exception('Pues me enfado y no respiro!!!') iris.FUTURE.netcdf_promote = True percentiles = iris.load_cube(self.percentiles_file.local_file) @@ -206,7 +207,3 @@ class DaysOverPercentile(Diagnostic): result.add_aux_coord(iris.coords.AuxCoord(percentile, long_name='percentile')) result.add_aux_coord(time_coord) return result - - - - diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index 4c0eb11b..ef3a62ea 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -1,19 +1,19 @@ # coding=utf-8 import datetime - import operator +import threading +import time + from bscearth.utils.log import Log # noinspection PyCompatibility from concurrent.futures import ThreadPoolExecutor from earthdiagnostics.datafile import StorageStatus, LocalStatus from earthdiagnostics.diagnostic import DiagnosticStatus, Diagnostic, DiagnosticOptionError -from earthdiagnostics.utils import Utils, TempFile -import threading - -from earthdiagnostics.ocean import * from earthdiagnostics.general import * +from earthdiagnostics.ocean import * from earthdiagnostics.statistics import * +from earthdiagnostics.utils import Utils, TempFile class WorkManager(object): @@ -54,7 +54,7 @@ class WorkManager(object): self.threads = self.config.max_cores Log.info('Using {0} threads', self.threads) - self.downloader = ThreadPoolExecutor(self.config.parallel_downloads) + self.downloader = Downloader() self.uploader = ThreadPoolExecutor(self.config.parallel_uploads) self.executor = ThreadPoolExecutor(self.threads) @@ -69,8 +69,9 @@ class WorkManager(object): for file_object in self.data_manager.requested_files.values(): file_object.subscribe(self, self._file_object_status_changed) if file_object.download_required(): - self.downloader.submit(file_object.download) + self.downloader.submit(file_object) + self.downloader.start() self.lock = threading.Lock() self.lock.acquire() @@ -96,7 +97,7 @@ class WorkManager(object): def _file_object_status_changed(self, file_object): if file_object.download_required(): - self.downloader.submit(file_object.download) + self.downloader.submit(file_object) return if file_object.upload_required(): self.uploader.submit(file_object.upload) @@ -223,3 +224,62 @@ class WorkManager(object): Diagnostic.register(VerticalGradient) +class Downloader(object): + def __init__(self): + self._downloads = [] + self._lock = threading.Lock() + self._wait = threading.Semaphore() + self.stop = False + + def start(self): + self._thread = threading.Thread(target=self.downloader) + self._thread.start() + + def submit(self, datafile): + self._lock.acquire() + self._downloads.append(datafile) + self._lock.release() + + def downloader(self): + try: + def suscribers_waiting(datafile): + waiting = 0 + for diag in datafile.suscribers: + if not isinstance(diag, Diagnostic): + continue + if diag.pending_requests() == 1: + waiting += 1 + return waiting + + def prioritize(datafile1, datafile2): + waiting = suscribers_waiting(datafile1) - suscribers_waiting(datafile2) + if waiting: + return -waiting + + suscribers = len(datafile1.suscribers) - len(datafile2.suscribers) + if suscribers: + return -suscribers + + size = datafile1.size - datafile2.size + if size: + return -size + return 0 + + while True: + with self._lock: + if len(self._downloads) == 0: + if self.stop: + return + time.sleep(0.01) + break + self._downloads.sort(prioritize) + datafile = self._downloads[0] + self._downloads.remove(datafile) + datafile.download() + except Exception as ex: + Log.critical('Unhandled error at downloader: {0}', ex) + + def shutdown(self): + self.stop = True + self._thread.join() + -- GitLab From 67fa241629ffd086c3d3835da04d0dbae2502b15 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 27 Sep 2017 10:52:56 +0200 Subject: [PATCH 77/82] Reworked days_over chain to adapt again to datasets without members --- diags.conf | 14 ++-- earthdiagnostics/ocean/interpolatecdo.py | 15 ++-- .../statistics/climatologicalpercentile.py | 40 +++++------ .../statistics/daysoverpercentile.py | 70 ++++++++++++++----- earthdiagnostics/statistics/discretize.py | 60 +++++++++------- 5 files changed, 125 insertions(+), 74 deletions(-) diff --git a/diags.conf b/diags.conf index 36683e33..be870c8b 100644 --- a/diags.conf +++ b/diags.conf @@ -6,7 +6,7 @@ SCRATCH_DIR = /scratch/Earth/$USER # Root path for the cmorized data to use DATA_DIR = /esnas:/esarchive # Specify if your data is from an experiment (exp), observation (obs) or reconstructions (recon) -DATA_TYPE = exp +DATA_TYPE = recon # CMORization type to use. Important also for THREDDS as it affects variable name conventions. # Options: SPECS (default), PRIMAVERA, CMIP6 DATA_CONVENTION = SPECS @@ -16,8 +16,10 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty -DIAGS = daysover,atmos,sfcWind,1981,2012,11 -# DIAGS = OHC +#DIAGS = discretize,atmos,sfcWind,,0,40 +#DIAGS = climpercent,atmos,sfcWind,2010,2012,11 +DIAGS = daysover,atmos,sfcWind,2010,2012,11 +#DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. FREQUENCY = 6hr @@ -26,7 +28,7 @@ CDFTOOLS_PATH = ~jvegas/CDFTOOLS/bin # If true, copies the mesh files regardless of presence in scratch dir RESTORE_MESHES = False # Limits the maximum amount of threads used. Default: 0 (no limitation, one per virtual core available)z -#MAX_CORES = 1 +MAX_CORES = 2 [CMOR] # If true, recreates CMOR files regardless of presence. Default = False @@ -70,7 +72,7 @@ SERVER_URL = https://earth.bsc.es/thredds [EXPERIMENT] # Experiments parameters as defined in CMOR standard INSTITUTE = ecmwf -MODEL = system4_m1 +MODEL = erainterim # Model version: Available versions MODEL_VERSION = Ec3.2_O1L75 # Atmospheric output timestep in hours @@ -87,7 +89,7 @@ OCEAN_TIMESTEP = 6 # CHUNK_SIZE is the size of each data file, given in months # CHUNKS is the number of chunks. You can specify less chunks than present on the experiment EXPID = testing_erainterim -STARTDATES = 20131101 20141101 20151101 +STARTDATES = 20101101 20111101 20121101 # STARTDATES = 19840101 19850101 MEMBERS = 0 MEMBER_DIGITS = 1 diff --git a/earthdiagnostics/ocean/interpolatecdo.py b/earthdiagnostics/ocean/interpolatecdo.py index eefa4dc7..0bc7709b 100644 --- a/earthdiagnostics/ocean/interpolatecdo.py +++ b/earthdiagnostics/ocean/interpolatecdo.py @@ -1,10 +1,11 @@ # coding=utf-8 -from earthdiagnostics.diagnostic import * -from earthdiagnostics.utils import Utils, TempFile +import os -from earthdiagnostics.modelingrealm import ModelingRealm, ModelingRealms import numpy as np -import os + +from earthdiagnostics.diagnostic import * +from earthdiagnostics.modelingrealm import ModelingRealm, ModelingRealms +from earthdiagnostics.utils import Utils, TempFile class InterpolateCDO(Diagnostic): @@ -201,6 +202,7 @@ class InterpolateCDO(Diagnostic): must_exist=False, rename_dimension=True) handler = Utils.openCdf(variable_file) var = handler.variables[self.variable] + units = var.units coordinates = list() for dim in var.dimensions: if dim == 'i': @@ -225,6 +227,11 @@ class InterpolateCDO(Diagnostic): temp = TempFile.get() Utils.cdo.remap(','.join((self.grid.split('_')[0], self.weights)), input=variable_file, output=temp) + + handler = Utils.openCdf(temp) + handler.variables[self.variable].units = units + handler.close() + self.regridded.set_local_file(temp) diff --git a/earthdiagnostics/statistics/climatologicalpercentile.py b/earthdiagnostics/statistics/climatologicalpercentile.py index bc953248..229ad020 100644 --- a/earthdiagnostics/statistics/climatologicalpercentile.py +++ b/earthdiagnostics/statistics/climatologicalpercentile.py @@ -1,4 +1,9 @@ # coding=utf-8 +import iris +import iris.coord_categorisation +import iris.coords +import iris.exceptions +import numpy as np import six from bscearth.utils.log import Log @@ -7,11 +12,6 @@ from earthdiagnostics.diagnostic import Diagnostic, DiagnosticVariableOption, Di from earthdiagnostics.frequency import Frequencies from earthdiagnostics.utils import TempFile from earthdiagnostics.variable_type import VariableType -import numpy as np -import iris -import iris.coord_categorisation -import iris.exceptions -import iris.coords class ClimatologicalPercentile(Diagnostic): @@ -107,24 +107,12 @@ class ClimatologicalPercentile(Diagnostic): def _save_results(self, percentile_values): temp = TempFile.get() - percentile_coord = iris.coords.DimCoord(ClimatologicalPercentile.Percentiles, long_name='percentile') - results = iris.cube.CubeList() - for leadtime, data in percentile_values.items(): - result = iris.cube.Cube(data, var_name='percent', - units=self.distribution.coord('bin').units) - result.add_dim_coord(percentile_coord, 0) - result.add_dim_coord(self.distribution.coord('latitude'), 1) - result.add_dim_coord(self.distribution.coord('longitude'), 2) - result.add_aux_coord(iris.coords.AuxCoord(np.int8(leadtime), long_name='leadtime')) - results.append(result) iris.FUTURE.netcdf_no_unlimited = True - iris.save(results.merge_cube(), temp, zlib=True) - + iris.save(percentile_values.merge_cube(), temp, zlib=True) self.percentiles_file.set_local_file(temp, rename_var='percent') def _calculate_percentiles(self): Log.debug('Calculating percentiles') - percentiles = {} bins = self.distribution.coord('bin').points @@ -135,10 +123,18 @@ class ClimatologicalPercentile(Diagnostic): index = np.searchsorted(cs, percentile_values) return [bins[i] for i in index] + results = iris.cube.CubeList() + percentile_coord = iris.coords.DimCoord(ClimatologicalPercentile.Percentiles, long_name='percentile') + print(self.distribution) for leadtime_slice in self.distribution.slices_over('leadtime'): - leadtime = leadtime_slice.coord('leadtime').points[0] - percentiles[leadtime] = np.apply_along_axis(calculate, 0, leadtime_slice.data) - return percentiles + result = iris.cube.Cube(np.apply_along_axis(calculate, 0, leadtime_slice.data), var_name='percent', + units=self.distribution.coord('bin').units) + result.add_dim_coord(percentile_coord, 0) + result.add_dim_coord(leadtime_slice.coord('latitude'), 1) + result.add_dim_coord(leadtime_slice.coord('longitude'), 2) + result.add_aux_coord(leadtime_slice.coord('leadtime')) + results.append(result) + return results def _get_distribution(self): for startdate, startdate_file in six.iteritems(self.leadtime_files): @@ -148,3 +144,5 @@ class ClimatologicalPercentile(Diagnostic): self.distribution = data_cube else: self.distribution += data_cube + if len(self.distribution.coords('leadtime')) == 0: + self.distribution.add_aux_coord(iris.coords.AuxCoord(1, var_name='leadtime', units='months')) diff --git a/earthdiagnostics/statistics/daysoverpercentile.py b/earthdiagnostics/statistics/daysoverpercentile.py index ea7e36d7..74fb3c5e 100644 --- a/earthdiagnostics/statistics/daysoverpercentile.py +++ b/earthdiagnostics/statistics/daysoverpercentile.py @@ -1,4 +1,6 @@ # coding=utf-8 +import os + import iris import iris.analysis import iris.coord_categorisation @@ -80,17 +82,17 @@ class DaysOverPercentile(Diagnostic): self.variable_file = self.request_chunk(self.domain, self.variable, self.startdate, None, None) def declare_data_generated(self): - var_over = self.variable + '_daysover_q{0}' - var_below = self.variable + '_daysbelow_q{0}' + var_over = self.variable + '_daysover_q{0}_{1.start_year}-{1.end_year}' + var_below = self.variable + '_daysbelow_q{0}_{1.start_year}-{1.end_year}' self.days_over_file = {} self.days_below_file = {} for perc in ClimatologicalPercentile.Percentiles: - self.days_over_file[perc] = self.declare_chunk(self.domain, var_over.format(int(perc * 100)), + self.days_over_file[perc] = self.declare_chunk(self.domain, var_over.format(int(perc * 100), self), self.startdate, None, None, frequency=Frequencies.monthly, vartype=VariableType.STATISTIC) - self.days_below_file[perc] = self.declare_chunk(self.domain, var_below.format(int(perc * 100)), + self.days_below_file[perc] = self.declare_chunk(self.domain, var_below.format(int(perc * 100), self), self.startdate, None, None, frequency=Frequencies.monthly, vartype=VariableType.STATISTIC) @@ -99,7 +101,6 @@ class DaysOverPercentile(Diagnostic): """ Runs the diagnostic """ - raise Exception('Pues me enfado y no respiro!!!') iris.FUTURE.netcdf_promote = True percentiles = iris.load_cube(self.percentiles_file.local_file) @@ -140,8 +141,8 @@ class DaysOverPercentile(Diagnostic): results_over = {perc: iris.cube.CubeList() for perc in ClimatologicalPercentile.Percentiles} results_below = {perc: iris.cube.CubeList() for perc in ClimatologicalPercentile.Percentiles} - var_daysover = 'daysover' - var_days_below = 'daysbelow' + var_daysover = 'days_over' + var_days_below = 'days_below' long_name_days_over = 'Proportion of days over a given percentile for {0.start_year}-{0.end_year} ' \ 'climatology'.format(self) long_name_days_below = 'Proportion of days below a given percentile for {0.start_year}-{0.end_year} ' \ @@ -177,23 +178,56 @@ class DaysOverPercentile(Diagnostic): Log.debug('Saving percentiles startdate {0}', self.startdate) for perc in ClimatologicalPercentile.Percentiles: iris.FUTURE.netcdf_no_unlimited = True - temp = TempFile.get() - iris.save(results_over[perc].merge_cube(), temp, zlib=True, unlimited_dimensions=['time']) - Utils.rename_variables(temp, {'dim2': 'ensemble', 'dim1': 'ensemble'}, - must_exist=False, rename_dimension=True) - self.days_over_file[perc].set_local_file(temp, rename_var='daysover') - - temp = TempFile.get() - iris.save(results_below[perc].merge_cube(), temp, zlib=True, unlimited_dimensions=['time']) - Utils.rename_variables(temp, {'dim2': 'ensemble', 'dim1': 'ensemble'}, - must_exist=False, rename_dimension=True) - self.days_below_file[perc].set_local_file(temp, rename_var='daysbelow') + self.days_over_file[perc].set_local_file(self.save_to_file(perc, results_over, var_daysover), + rename_var=var_daysover) + self.days_below_file[perc].set_local_file(self.save_to_file(perc, results_below, var_days_below), + rename_var=var_days_below) del self.days_over_file del self.days_below_file del self.lat_coord del self.lon_coord + def save_to_file(self, perc, results_over, var_daysover): + temp = TempFile.get() + iris.save(results_over[perc].merge_cube(), temp, zlib=True, unlimited_dimensions=['time']) + Utils.rename_variables(temp, {'dim2': 'ensemble', 'dim1': 'ensemble'}, + must_exist=False, rename_dimension=True) + handler = Utils.openCdf(temp) + if 'time' not in handler.dimensions: + new_file = TempFile.get() + new_handler = Utils.openCdf(new_file, 'w') + + new_handler.createDimension('time', 1) + for dimension in handler.dimensions: + Utils.copy_dimension(handler, new_handler, dimension) + + for variable in handler.variables.keys(): + if variable in (var_daysover, 'time', 'time_bnds'): + continue + Utils.copy_variable(handler, new_handler, variable) + old_var = handler.variables[var_daysover] + new_var = new_handler.createVariable(var_daysover, old_var.dtype, ('time',) + old_var.dimensions, + zlib=True, fill_value=1.0e20) + Utils.copy_attributes(new_var, old_var) + new_var[0, :] = old_var[:] + + old_var = handler.variables['time'] + new_var = new_handler.createVariable('time', old_var.dtype, ('time',)) + Utils.copy_attributes(new_var, old_var) + new_var[0] = old_var[0] + + old_var = handler.variables['time_bnds'] + new_var = new_handler.createVariable('time_bnds', old_var.dtype, ('time',) + old_var.dimensions) + Utils.copy_attributes(new_var, old_var) + new_var[0, :] = old_var[:] + + new_handler.close() + os.remove(temp) + temp = new_file + handler.close() + return temp + def create_results_cube(self, days_over, percentile, realization_coord, time_coord, var_name, long_name): result = iris.cube.Cube(days_over.astype(np.float32), var_name=var_name, long_name=long_name, units=1.0) diff --git a/earthdiagnostics/statistics/discretize.py b/earthdiagnostics/statistics/discretize.py index 0f1dbd55..81ed8232 100644 --- a/earthdiagnostics/statistics/discretize.py +++ b/earthdiagnostics/statistics/discretize.py @@ -1,21 +1,23 @@ # coding=utf-8 -from bscearth.utils.date import parse_date, add_months -from bscearth.utils.log import Log +import math -from earthdiagnostics.diagnostic import Diagnostic, DiagnosticVariableOption, DiagnosticDomainOption, \ - DiagnosticIntOption, DiagnosticFloatOption -from earthdiagnostics.utils import Utils, TempFile -from earthdiagnostics.variable_type import VariableType -import numpy as np import iris -from iris.cube import Cube import iris.coord_categorisation -from iris.time import PartialDateTime -import iris.exceptions import iris.coords -import math +import iris.exceptions +import iris.unit +import numpy as np import psutil import six +from bscearth.utils.date import parse_date, add_months, add_days +from bscearth.utils.log import Log +from iris.cube import Cube +from iris.time import PartialDateTime + +from earthdiagnostics.diagnostic import Diagnostic, DiagnosticVariableOption, DiagnosticDomainOption, \ + DiagnosticIntOption, DiagnosticFloatOption +from earthdiagnostics.utils import Utils, TempFile +from earthdiagnostics.variable_type import VariableType class Discretize(Diagnostic): @@ -184,14 +186,25 @@ class Discretize(Diagnostic): cubes = iris.cube.CubeList() + date = parse_date(self.startdate) + date = add_days(date, 14, self.data_manager.config.experiment.calendar) + for leadtime, distribution in six.iteritems(self.distribution): leadtime_cube = Cube(distribution.astype(np.uint32), var_name=self.data_cube.var_name, standard_name=self.data_cube.standard_name, units='1') leadtime_cube.add_dim_coord(bins_coord, 0) leadtime_cube.add_dim_coord(self.data_cube.coord('latitude'), 1) leadtime_cube.add_dim_coord(self.data_cube.coord('longitude'), 2) - leadtime_cube.add_aux_coord(iris.coords.AuxCoord(np.array((leadtime,), np.int8), var_name='leadtime', + leadtime_cube.add_aux_coord(iris.coords.AuxCoord(leadtime, + var_name='leadtime', units='months')) + lead_date = add_months(date, leadtime - 1, self.data_manager.config.experiment.calendar) + leadtime_cube.add_aux_coord(iris.coords.AuxCoord(iris.unit.date2num(lead_date, + unit='days since 1950-01-01', + calendar="standard"), + var_name='time', + units='days since 1950-01-01')) + cubes.append(leadtime_cube) temp = TempFile.get() iris.FUTURE.netcdf_no_unlimited = True @@ -201,23 +214,20 @@ class Discretize(Diagnostic): def _get_distribution(self): self.distribution = {} Log.debug('Discretizing...') - for leadtime in set(self.data_cube.coord('leadtime').points): Log.debug('Discretizing leadtime {0}', leadtime) leadtime_cube = self.data_cube.extract(iris.Constraint(leadtime=leadtime)) - for realization in self.data_cube.coord('realization').points: - Log.debug('Discretizing realization {0}', realization) + if 'realization' in leadtime_cube.coords(): + for realization_cube in self.data_cube.slices_over('realization'): + Log.debug('Discretizing realization {0}', realization_cube.coord('realization').points[0]) + self.print_memory_used() + if leadtime not in self.distribution: + self.distribution[leadtime] = self._calculate_distribution(realization_cube) + else: + self.distribution[leadtime] += self._calculate_distribution(realization_cube) + else: self.print_memory_used() - try: - realization_cube = leadtime_cube.extract(iris.Constraint(realization=realization)) - except iris.exceptions.CoordinateNotFoundError: - realization_cube = leadtime_cube - if realization_cube is None and realization == 0: - realization_cube = leadtime_cube - if leadtime not in self.distribution: - self.distribution[leadtime] = self._calculate_distribution(realization_cube) - else: - self.distribution[leadtime] += self._calculate_distribution(realization_cube) + self.distribution[leadtime] = self._calculate_distribution(leadtime_cube) # noinspection PyTypeChecker def _get_value_interval(self): -- GitLab From afaadd63be269bd9f45ab6d8a612d7e3b164562f Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 27 Sep 2017 11:10:32 +0200 Subject: [PATCH 78/82] Fixed bug that was preventing to add the f6h to the recon and exp variables --- diags.conf | 3 +-- earthdiagnostics/datamanager.py | 11 +++++------ earthdiagnostics/obsreconmanager.py | 2 +- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/diags.conf b/diags.conf index be870c8b..7c063c69 100644 --- a/diags.conf +++ b/diags.conf @@ -17,8 +17,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # an alias defined in the ALIAS section (see more below). If you are using the diagnostics just to CMORize, leave it # empty #DIAGS = discretize,atmos,sfcWind,,0,40 -#DIAGS = climpercent,atmos,sfcWind,2010,2012,11 -DIAGS = daysover,atmos,sfcWind,2010,2012,11 +DIAGS = climpercent,atmos,sfcWind,2010,2012,11 daysover,atmos,sfcWind,2010,2012,11 #DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. diff --git a/earthdiagnostics/datamanager.py b/earthdiagnostics/datamanager.py index fe26f90b..e3799d31 100644 --- a/earthdiagnostics/datamanager.py +++ b/earthdiagnostics/datamanager.py @@ -1,15 +1,14 @@ # coding: utf-8 import csv -import shutil -import threading import os import re +import shutil +import threading +from earthdiagnostics.datafile import NetCDFFile as NCfile, StorageStatus, LocalStatus +from earthdiagnostics.modelingrealm import ModelingRealms from earthdiagnostics.utils import Utils -from earthdiagnostics.variable import VariableManager from earthdiagnostics.variable_type import VariableType -from earthdiagnostics.modelingrealm import ModelingRealms -from earthdiagnostics.datafile import NetCDFFile as NCfile, StorageStatus, LocalStatus class DataManager(object): @@ -63,7 +62,7 @@ class DataManager(object): if grid: var = '{0}-{1}'.format(var, grid) - if domain in [ModelingRealms.ocean, ModelingRealms.seaIce]: + if domain in [ModelingRealms.ocean, ModelingRealms.seaIce, ModelingRealms.ocnBgchem]: return '{0}_f{1}h'.format(var, self.experiment.ocean_timestep) else: return '{0}_f{1}h'.format(var, self.experiment.atmos_timestep) diff --git a/earthdiagnostics/obsreconmanager.py b/earthdiagnostics/obsreconmanager.py index ee732dc0..661986c9 100644 --- a/earthdiagnostics/obsreconmanager.py +++ b/earthdiagnostics/obsreconmanager.py @@ -98,7 +98,7 @@ class ObsReconManager(DataManager): def _get_folder_path(self, frequency, domain, variable, grid, vartype): - if self.config.data_type == 'exp' and not frequency.frequency.endswith('hr'): + if not frequency.frequency.endswith('hr'): var_folder = self.get_varfolder(domain, variable, grid) else: var_folder = variable -- GitLab From 7118b8f280b55542060fe256dcc38263a1482f64 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 27 Sep 2017 12:04:34 +0200 Subject: [PATCH 79/82] Added regex support for startdate definition --- earthdiagnostics/config.py | 9 +++- setup.py | 6 ++- .../statistics/test_daysoverpercentile.py | 5 +- test/unit/test_config.py | 49 +++++++++++++++++-- 4 files changed, 61 insertions(+), 8 deletions(-) diff --git a/earthdiagnostics/config.py b/earthdiagnostics/config.py index a6a77f4d..4b25e94c 100644 --- a/earthdiagnostics/config.py +++ b/earthdiagnostics/config.py @@ -274,7 +274,14 @@ class ExperimentConfig(object): members.append(int(mem)) self.members = members - self.startdates = parser.get_option('EXPERIMENT', 'STARTDATES').split() + startdates = parser.get_list_option('EXPERIMENT', 'STARTDATES') + + import exrex + self.startdates = [] + for startdate_pattern in startdates: + for startdate in exrex.generate(startdate_pattern): + self.startdates.append(startdate) + self.chunk_size = parser.get_int_option('EXPERIMENT', 'CHUNK_SIZE') self.num_chunks = parser.get_int_option('EXPERIMENT', 'CHUNKS') self.chunk_list = parser.get_int_list_option('EXPERIMENT', 'CHUNK_LIST', []) diff --git a/setup.py b/setup.py index 0a88cf58..0e68a089 100644 --- a/setup.py +++ b/setup.py @@ -5,8 +5,9 @@ Installation script for EarthDiagnostics package """ from os import path -from setuptools import setup + from setuptools import find_packages +from setuptools import setup here = path.abspath(path.dirname(__file__)) @@ -26,7 +27,8 @@ setup( keywords=['climate', 'weather', 'diagnostic'], setup_requires=['pyproj'], install_requires=['numpy', 'netCDF4', 'bscearth.utils', 'cdo>=1.3.4', 'nco>=0.0.3', 'iris>=1.12.0', 'coverage', - 'pygrib', 'openpyxl', 'mock', 'futures', 'cf_units', 'cfunits', 'xxhash', 'six', 'psutil'], + 'pygrib', 'openpyxl', 'mock', 'futures', 'cf_units', 'cfunits', 'xxhash', 'six', 'psutil', + 'exrex'], packages=find_packages(), include_package_data=True, scripts=['bin/earthdiags'] diff --git a/test/unit/statistics/test_daysoverpercentile.py b/test/unit/statistics/test_daysoverpercentile.py index afa37276..cc225cd6 100644 --- a/test/unit/statistics/test_daysoverpercentile.py +++ b/test/unit/statistics/test_daysoverpercentile.py @@ -1,10 +1,10 @@ # coding=utf-8 from unittest import TestCase -from earthdiagnostics.statistics.daysoverpercentile import DaysOverPercentile from mock import Mock from earthdiagnostics.modelingrealm import ModelingRealms +from earthdiagnostics.statistics.daysoverpercentile import DaysOverPercentile class TestDaysOverPercentile(TestCase): @@ -12,6 +12,7 @@ class TestDaysOverPercentile(TestCase): def setUp(self): self.data_manager = Mock() self.diags = Mock() + self.diags.config.experiment.get_chunk_list.return_value = (('20011101', 0, 0), ('20011101', 0, 1)) def test_generate_jobs(self): jobs = DaysOverPercentile.generate_jobs(self.diags, ['monpercent', 'ocean', 'var', '2000', '2001', '11']) @@ -27,6 +28,6 @@ class TestDaysOverPercentile(TestCase): DaysOverPercentile.generate_jobs(self.diags, ['monpercent', 'ocean', 'var', '2000', '2001', '11', 'extra']) def test_str(self): - diagnostic = DaysOverPercentile(self.data_manager, ModelingRealms.ocean, 'var', 2000, 2001, 2000, 11) + diagnostic = DaysOverPercentile(self.data_manager, ModelingRealms.ocean, 'var', 2000, 2001, '20001101', 11) self.assertEquals(str(diagnostic), 'Days over percentile Startdate: 20001101 Variable: ocean:var ' 'Climatology: 2000-2001') diff --git a/test/unit/test_config.py b/test/unit/test_config.py index 64ed67b2..ea9e9e73 100644 --- a/test/unit/test_config.py +++ b/test/unit/test_config.py @@ -1,7 +1,7 @@ # coding=utf-8 from unittest import TestCase -from earthdiagnostics.config import CMORConfig, ConfigException, THREDDSConfig, ReportConfig +from earthdiagnostics.config import CMORConfig, ConfigException, THREDDSConfig, ReportConfig, ExperimentConfig from earthdiagnostics.frequency import Frequencies from earthdiagnostics.modelingrealm import ModelingRealms @@ -47,16 +47,23 @@ class ParserMock(object): def get_path_option(self, section, var, default): return self.get_value(section, var, default) - def get_int_option(self, section, var, default): + def get_int_option(self, section, var, default=0): return self.get_value(section, var, default) + def get_int_list_option(self, section, var, default=list(), separator=' '): try: return [int(val) for val in self._values[self.get_var_string(section, var)].split(separator)] except KeyError: return default - def get_option(self, section, var, default): + def get_list_option(self, section, var, default=list(), separator=' '): + try: + return [val for val in self._values[self.get_var_string(section, var)].split(separator)] + except KeyError: + return default + + def get_option(self, section, var, default=None): return self.get_value(section, var, default) @@ -268,3 +275,39 @@ class TestReportConfig(TestCase): self.mock_parser.add_value('REPORT', 'MAXIMUM_PRIORITY', 3) config = ReportConfig(self.mock_parser) self.assertEquals(config.maximum_priority, 3) + + +class TestExperimentConfig(TestCase): + + def setUp(self): + self.mock_parser = ParserMock() + + def test_basic_config(self): + config = ExperimentConfig(self.mock_parser) + + self.assertEquals(config.startdates, []) + self.assertEquals(config.members, []) + self.assertEquals(config.chunk_size, 0) + self.assertEquals(config.num_chunks, 0) + + self.assertEquals(config.atmos_grid, '') + self.assertEquals(config.atmos_timestep, 6) + self.assertEquals(config.ocean_timestep, 6) + + def test_startdates(self): + self.mock_parser.add_value('EXPERIMENT', 'STARTDATES', '20001101 20011101') + config = ExperimentConfig(self.mock_parser) + self.assertEquals(config.startdates, ['20001101', '20011101']) + + self.mock_parser.add_value('EXPERIMENT', 'STARTDATES', '200(0|1)1101') + config = ExperimentConfig(self.mock_parser) + self.assertEquals(config.startdates, ['20001101', '20011101']) + + self.mock_parser.add_value('EXPERIMENT', 'STARTDATES', '200[0-2](02|05|08|11)01') + config = ExperimentConfig(self.mock_parser) + print(config.startdates) + self.assertEquals(config.startdates, [u'20000201', u'20000501', u'20000801', u'20001101', u'20010201', + u'20010501', u'20010801', u'20011101', u'20020201', u'20020501', + u'20020801', u'20021101']) + + -- GitLab From 57cf4053162b607e7e5954b40194610132ac62e6 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 28 Sep 2017 11:52:33 +0200 Subject: [PATCH 80/82] Fixed climpercent variable name in daysover --- earthdiagnostics/statistics/daysoverpercentile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/earthdiagnostics/statistics/daysoverpercentile.py b/earthdiagnostics/statistics/daysoverpercentile.py index 74fb3c5e..aad012a4 100644 --- a/earthdiagnostics/statistics/daysoverpercentile.py +++ b/earthdiagnostics/statistics/daysoverpercentile.py @@ -75,7 +75,7 @@ class DaysOverPercentile(Diagnostic): return job_list def request_data(self): - var_name = '{0.variable}prct{0.start_year}{0.forecast_month}-{0.end_year}{0.forecast_month}'.format(self) + var_name = '{0.variable}prct{0.start_year}{0.forecast_month}-{0.end_year}{0.forecast_month:02d}'.format(self) self.percentiles_file = self.request_chunk(self.domain, var_name, None, None, None, frequency=Frequencies.climatology) -- GitLab From 19b3cff95a14afe55e80d03e437c8a18550e8c71 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Fri, 6 Oct 2017 11:09:46 +0200 Subject: [PATCH 81/82] Fixed atmos cmorization --- earthdiagnostics/cmorizer.py | 11 +++++------ earthdiagnostics/cmormanager.py | 19 ++++++++++++++----- earthdiagnostics/config.py | 1 + earthdiagnostics/earthdiags.py | 1 + test/unit/test_config.py | 1 + 5 files changed, 22 insertions(+), 11 deletions(-) diff --git a/earthdiagnostics/cmorizer.py b/earthdiagnostics/cmorizer.py index 7f97802d..7cf8c5c3 100644 --- a/earthdiagnostics/cmorizer.py +++ b/earthdiagnostics/cmorizer.py @@ -1,19 +1,18 @@ # coding=utf-8 import glob +import os +import pygrib import shutil import uuid - -import os from datetime import datetime -import pygrib -from bscearth.utils.log import Log from bscearth.utils.date import parse_date, chunk_end_date, previous_day, date2str, add_months +from bscearth.utils.log import Log +from earthdiagnostics.datafile import NetCDFFile from earthdiagnostics.frequency import Frequency, Frequencies from earthdiagnostics.modelingrealm import ModelingRealms from earthdiagnostics.utils import TempFile, Utils -from earthdiagnostics.datafile import NetCDFFile class Cmorizer(object): @@ -449,7 +448,7 @@ class Cmorizer(object): def get_date_str(self, file_path): file_parts = os.path.basename(file_path).split('_') - if file_parts[0] in (self.experiment.expid, 'MMA', 'MMASH', 'MMAGG', 'MMO') or file_parts[0].startswith('ORCA'): + if file_parts[0] in (self.experiment.expid, 't00o', 'MMA', 'MMASH', 'MMAGG', 'MMO') or file_parts[0].startswith('ORCA'): # Model output if file_parts[-1].endswith('.tar'): file_parts = file_parts[-1][0:-4].split('-') diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index e116190b..58ce7e23 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -1,10 +1,10 @@ # coding=utf-8 import glob +import os from datetime import datetime -import os -from bscearth.utils.log import Log from bscearth.utils.date import parse_date, chunk_start_date, chunk_end_date, previous_day +from bscearth.utils.log import Log from datafile import StorageStatus from diagnostic import Diagnostic @@ -391,6 +391,7 @@ class CMORManager(DataManager): startdate_path = self._get_startdate_path(startdate) if not os.path.isdir(startdate_path): return False + count = 0 if self.config.data_convention == 'specs': for freq in os.listdir(startdate_path): domain_path = os.path.join(startdate_path, freq, @@ -401,21 +402,29 @@ class CMORManager(DataManager): var_path = self.get_file_path(startdate, member, domain, var, cmor_var, chunk, Frequency(freq)) if os.path.isfile(var_path): - return True + count += 1 + if count >= self.config.cmor.min_cmorized_vars: + return True + else: + continue else: member_path = os.path.join(startdate_path, self._get_member_str(member)) if not os.path.isdir(member_path): return False freq = Frequencies.monthly table = domain.get_table(freq, self.config.data_convention) - table_dir = os.path.join(member_path, table) + table_dir = os.path.join(member_path, table.name) if not os.path.isdir(table_dir): return False for var in os.listdir(table_dir): cmor_var = self.variable_list.get_variable(var, True) var_path = self.get_file_path(startdate, member, domain, var, cmor_var, chunk, frequency=freq) if os.path.isfile(var_path): - return True + count += 1 + if count >= self.config.cmor.min_cmorized_vars: + return True + else: + continue return False def _cmorize_member(self, startdate, member): diff --git a/earthdiagnostics/config.py b/earthdiagnostics/config.py index 4b25e94c..133bd6da 100644 --- a/earthdiagnostics/config.py +++ b/earthdiagnostics/config.py @@ -142,6 +142,7 @@ class CMORConfig(object): self.default_ocean_grid = parser.get_option('CMOR', 'DEFAULT_OCEAN_GRID', 'gn') self.default_atmos_grid = parser.get_option('CMOR', 'DEFAULT_ATMOS_GRID', 'gr') self.activity = parser.get_option('CMOR', 'ACTIVITY', 'CMIP') + self.min_cmorized_vars = parser.get_int_option('CMOR', 'MIN_CMORIZED_VARS', 10) vars_string = parser.get_option('CMOR', 'VARIABLE_LIST', '') self.var_manager = var_manager diff --git a/earthdiagnostics/earthdiags.py b/earthdiagnostics/earthdiags.py index 4b7f1384..ed285b11 100755 --- a/earthdiagnostics/earthdiags.py +++ b/earthdiagnostics/earthdiags.py @@ -306,6 +306,7 @@ class EarthDiags(object): Utils.give_group_write_permissions(self.config.scratch_masks) mesh_mask_scratch_path = os.path.join(self.config.scratch_masks, mesh_mask) + if self._copy_file(mesh_mask_path, mesh_mask_scratch_path, restore_meshes): Utils.give_group_write_permissions(mesh_mask_scratch_path) diff --git a/test/unit/test_config.py b/test/unit/test_config.py index ea9e9e73..a1c8d25b 100644 --- a/test/unit/test_config.py +++ b/test/unit/test_config.py @@ -93,6 +93,7 @@ class TestCMORConfig(TestCase): self.assertEquals(config.filter_files, '') self.assertEquals(config.default_atmos_grid, 'gr') self.assertEquals(config.default_ocean_grid, 'gn') + self.assertEquals(config.min_cmorized_vars, 10) def test_cmorize(self): config = CMORConfig(self.mock_parser, self.var_manager) -- GitLab From bf55806de87f7500666242f6fd3c98059d8fa8bd Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Fri, 6 Oct 2017 16:57:17 +0200 Subject: [PATCH 82/82] Fixed bug in regionmean --- earthdiagnostics/datafile.py | 4 ++-- earthdiagnostics/earthdiags.py | 6 ++++-- earthdiagnostics/ocean/regionmean.py | 4 ++-- earthdiagnostics/utils.py | 26 ++++++++++++++------------ test/unit/test_config.py | 7 ++++++- 5 files changed, 28 insertions(+), 19 deletions(-) diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index da44f5f8..ddaf1b10 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -268,12 +268,12 @@ class DataFile(Publisher): else: self._update_var_with_region_data() self._correct_metadata() - Utils.nco.ncks(input=self.local_file, output=self.local_file, options='-O --fix_rec_dmn region') + Utils.nco.ncks(input=self.local_file, output=self.local_file, options=['--fix_rec_dmn region']) def _update_var_with_region_data(self): temp = TempFile.get() shutil.copyfile(self.remote_file, temp) - Utils.nco.ncks(input=temp, output=temp, options='-O --mk_rec_dmn region') + Utils.nco.ncks(input=temp, output=temp, options=['--mk_rec_dmn region']) handler = Utils.openCdf(temp) handler_send = Utils.openCdf(self.local_file) value = handler_send.variables[self.final_name][:] diff --git a/earthdiagnostics/earthdiags.py b/earthdiagnostics/earthdiags.py index ed285b11..b9ef5aa2 100755 --- a/earthdiagnostics/earthdiags.py +++ b/earthdiagnostics/earthdiags.py @@ -350,12 +350,14 @@ class EarthDiags(object): return False if not force and os.path.exists(destiny): - if os.stat(source).st_size == os.stat(destiny).st_size: + # Small size differences can be due to the renaming of variables + delta_size = abs(os.stat(source).st_size - os.stat(destiny).st_size) + if delta_size < 512: Log.info('File {0} already exists', destiny) return True Log.info('Copying file {0}', destiny) - shutil.copy(source, destiny) + shutil.copyfile(source, destiny) Log.info('File {0} ready', destiny) Utils.rename_variables(destiny, self.dic_variables, False, True) return True diff --git a/earthdiagnostics/ocean/regionmean.py b/earthdiagnostics/ocean/regionmean.py index 038f9f98..1d6cebb8 100644 --- a/earthdiagnostics/ocean/regionmean.py +++ b/earthdiagnostics/ocean/regionmean.py @@ -6,8 +6,8 @@ from earthdiagnostics.box import Box from earthdiagnostics.constants import Basins from earthdiagnostics.diagnostic import Diagnostic, DiagnosticOption, DiagnosticIntOption, DiagnosticDomainOption, \ DiagnosticBoolOption, DiagnosticBasinOption, DiagnosticVariableOption -from earthdiagnostics.utils import Utils, TempFile from earthdiagnostics.modelingrealm import ModelingRealms +from earthdiagnostics.utils import Utils, TempFile class RegionMean(Diagnostic): @@ -158,7 +158,7 @@ class RegionMean(Diagnostic): levels = '' temp2 = TempFile.get() - Utils.nco.ncks(input=mean_file, output=temp2, options=('-O -v {0},lat,lon{1}'.format(original_name, levels),)) + Utils.nco.ncks(input=mean_file, output=temp2, options=('-v {0},lat,lon{1}'.format(original_name, levels),)) self.declared[final_name].set_local_file(temp2, rename_var=original_name) def declare_var(self, var, threed, box_save): diff --git a/earthdiagnostics/utils.py b/earthdiagnostics/utils.py index 79d5cc6c..42304aa0 100644 --- a/earthdiagnostics/utils.py +++ b/earthdiagnostics/utils.py @@ -1,26 +1,27 @@ # coding=utf-8 +import datetime +import os +import re import shutil +import stat import subprocess +import sys import tarfile -import datetime +import tempfile +from contextlib import contextmanager +import iris +import iris.exceptions import netCDF4 import numpy as np -import os -import stat -import re -import tempfile - import six +import xxhash from bscearth.utils.log import Log -from cdo import Cdo, CDOException +from cdo import Cdo from cfunits import Units from nco import Nco from earthdiagnostics.constants import Basins -from contextlib import contextmanager -import sys -import xxhash @contextmanager @@ -168,8 +169,9 @@ class Utils(object): def check_netcdf_file(filepath): with suppress_stdout(): try: - Utils.cdo.showvar(input=filepath) - except CDOException: + iris.FUTURE.netcdf_promote = True + iris.load(filepath) + except iris.exceptions.IrisError: return False return True diff --git a/test/unit/test_config.py b/test/unit/test_config.py index a1c8d25b..419be475 100644 --- a/test/unit/test_config.py +++ b/test/unit/test_config.py @@ -273,7 +273,6 @@ class TestReportConfig(TestCase): self.assertEquals(config.path, 'new_path') def test_priority(self): - self.mock_parser.add_value('REPORT', 'MAXIMUM_PRIORITY', 3) config = ReportConfig(self.mock_parser) self.assertEquals(config.maximum_priority, 3) @@ -295,6 +294,12 @@ class TestExperimentConfig(TestCase): self.assertEquals(config.atmos_timestep, 6) self.assertEquals(config.ocean_timestep, 6) + def test_cmor_version_required(self): + self.mock_parser.add_value('CMOR', 'VERSION', '20001101') + self.mock_parser.add_value('EXPERIMENT', 'DATA_CONVENTION', 'Primavera') + config = ExperimentConfig(self.mock_parser) + self.assertEquals(config.path, 'new_path') + def test_startdates(self): self.mock_parser.add_value('EXPERIMENT', 'STARTDATES', '20001101 20011101') config = ExperimentConfig(self.mock_parser) -- GitLab