datamanager.py 11.7 KB
Newer Older
"""Base data manager for Earth diagnostics"""
import csv
import os
from earthdiagnostics.datafile import NetCDFFile as NCfile, StorageStatus, LocalStatus
from earthdiagnostics.modelingrealm import ModelingRealms
from earthdiagnostics.variable_type import VariableType


class DataManager(object):
    """
    Class to manage the data repositories
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed

    Parameters
    ----------
    config: Config
    def __init__(self, config):
        self.config = config
        self.experiment = config.experiment
        self._checked_vars = list()
        self.variable_list = config.var_manager
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        UnitConversion.load_conversions()
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        self.requested_files = {}
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
    def _get_file_from_storage(self, filepath):
        if filepath not in self.requested_files:
            self.requested_files[filepath] = NCfile.from_storage(filepath, self.config.data_convention)
        file_object = self.requested_files[filepath]
        file_object.local_satatus = LocalStatus.PENDING
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        return self.requested_files[filepath]

    def _declare_generated_file(self, remote_file, domain, final_var, cmor_var, data_convention,
                                region, diagnostic, grid, var_type, original_var):
        if remote_file not in self.requested_files:
            self.requested_files[remote_file] = NCfile.to_storage(remote_file, data_convention)
        file_object = self.requested_files[remote_file]
        file_object.diagnostic = diagnostic
        file_object.var_type = var_type
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        file_object.grid = grid
        file_object.data_manager = self
        file_object.domain = domain
        file_object.var = original_var
        file_object.final_name = final_var
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        file_object.cmor_var = cmor_var
        file_object.region = region
        file_object.storage_status = StorageStatus.PENDING
        return file_object

    @staticmethod
    def _get_final_var_name(box, var):
        if box:
            var += box.get_lon_str() + box.get_lat_str() + box.get_depth_str()
        return var

    def get_varfolder(self, domain, var, grid=None, frequency=None):
        """Get variable folder name for <frequency>_<var_type> folder"""
        if grid:
            var = '{0}-{1}'.format(var, grid)

        if domain in [ModelingRealms.ocean, ModelingRealms.seaIce, ModelingRealms.ocnBgchem]:
            return DataManager._apply_fxh(var, self.experiment.ocean_timestep, frequency)
            return DataManager._apply_fxh(var, self.experiment.atmos_timestep, frequency)
    @staticmethod
    def _apply_fxh(folder_name, timestep, frequency=None):
        is_base_frequency = frequency is not None and frequency.frequency.endswith('hr')
        if not is_base_frequency and timestep > 0:
            return '{0}_f{1}h'.format(folder_name, timestep)
        return folder_name
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
    def create_link(self, domain, filepath, frequency, var, grid, move_old, vartype):
        """
        Create file link

        Must be implementd by the derived classes. If not, this method will have no effect

        Parameters
        ----------
        domain: ModelingRealm
        filepath: str
        frequency: Frequency
        var: str
        grid: str
        move_old: bool
        vartype: VariableType

        """
    def link_file(self, domain, var, cmor_var, startdate, member, chunk=None, grid=None,
                  frequency=None, year=None, date_str=None, move_old=False, vartype=VariableType.MEAN):
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        """
        Create the link of a given file from the CMOR repository.
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        :param move_old:
        :param date_str:
        :param year: if frequency is yearly, this parameter is used to give the corresponding year
        :type year: int
        :param domain: CMOR domain
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        :param var: variable name
        :type var: str
        :param startdate: file's startdate
        :type startdate: str
        :param member: file's member
        :type member: int
        :param chunk: file's chunk
        :type chunk: int
        :param grid: file's grid (only needed if it is not the original)
        :type grid: str
        :param frequency: file's frequency (only needed if it is different from the default)
        :type frequency: str
        :param vartype: Variable type (mean, statistic)
        :type vartype: VariableType
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        :return: path to the copy created on the scratch folder
        :rtype: str
        """
    def prepare(self):
        """Prepare the data to be used by Earth Diagnostics"""
    def request_chunk(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=None):
        """
        Request a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy

        Parameters
        ----------
        domain: ModelingRealm
        var: str
        startdate: str
        member: int
        chunk: int
        grid: str or None, optional
        box: Box or None, optional
        frequency: Frequency or None, optional
        vartype: VariableType or None, optional

        Returns
        -------
        DataFile
        Raises
        ------
        NotImplementedError
            If not implemented by derived classes

        """
        raise NotImplementedError('Class must override request_chunk method')

    def request_year(self, diagnostic, domain, var, startdate, member, year, grid=None, box=None, frequency=None):
        """
        Request a given year for a variavle from a CMOR repository

        Parameters
        ----------
        diagnostic: Diagnostic
        domain: ModelingRealm
        var: str
        startdate: str
        member: int
        year: int
        grid: str or None, optional
        box: Box or None, optional
        frequency: Frequency or None, optional

        Returns
        -------
        DataFile

        Raises
        ------
        NotImplementedError
            If not implemented by derived classes

        """
        raise NotImplementedError('Class must override request_year method')

    def declare_chunk(self, domain, var, startdate, member, chunk, grid=None, region=None, box=None, frequency=None,
                      vartype=VariableType.MEAN, diagnostic=None):
        """
        Declare a variable chunk to be generated by a diagnostic

        Parameters
        ----------
        domain: ModelingRealm
        var: str
        startdate: str
        member: int
        chunk: int
        grid: str or None, optional
        region: Basin or None, optional
        box: Box or None, optional
        frequency: Frequency or None, optional
        vartype: VariableType, optional
        diagnostic: Diagnostic, optional

        Returns
        -------
        DataFile

        Raises
        ------
        NotImplementedError
            If not implemented by derived classes

        """
        raise NotImplementedError('Class must override declare_chunk method')

    def declare_year(self, domain, var, startdate, member, year, grid=None, box=None,
                     vartype=VariableType.MEAN, diagnostic=None):
        """
        Declare a variable year to be generated by a diagnostic

        Parameters
        ----------
        domain: ModelingRealm
        var: str
        startdate: str
        member: int
        year: int
        grid: str or None, optional
        box: Box or None, optional
        vartype: VariableType, optional
        diagnostic: Diagnostic, optional

        Returns
        -------
        DataFile

        Raises
        ------
        NotImplementedError
            If not implemented by derived classes

        """
        raise NotImplementedError('Class must override declare_year method')

    def file_exists(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None,
                    vartype=VariableType.MEAN, possible_versions=None):
        """
        Check if a file exists in  the storage

        Parameters
        ----------
        domain: ModelingRealm
        var: str
        startdate: str
        member: int
        chunk: int
        grid: str or None, optional
        box: Box or None, optional
        frequency: Frequency or None, optional
        vartype: VariableType, optional
        possible_versions: iterable od str or None, optional

        Raises
        ------
        NotImplementedError
            If not implemented by derived classes

        Returns
        -------
        bool

        """
        raise NotImplementedError('Class must override file_exists method')

Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
class UnitConversion(object):
    """
    Class to manage unit conversions

    Parameters
    ----------
    source: str
    destiny: str
    factor: float
    offset: float

Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
    _dict_conversions = None

    def __init__(self, source, destiny, factor, offset):
        self.source = source
        self.destiny = destiny
        self.factor = float(factor)
        self.offset = float(offset)

Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
    @classmethod
    def load_conversions(cls):
        """Load conversions from the configuration file"""
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        cls._dict_conversions = dict()
        with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'conversions.csv'), 'r') as csvfile:
            reader = csv.reader(csvfile, dialect='excel')
            for line in reader:
                if line[0] == 'original':
                    continue
                cls.add_conversion(UnitConversion(line[0], line[1], line[2], line[3]))
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed

    @classmethod
    def add_conversion(cls, conversion):
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        """
        Adds a conversion to the dictionary

        :param conversion: conversion to add
        :type conversion: UnitConversion
        """
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        cls._dict_conversions[(conversion.source, conversion.destiny)] = conversion

    @classmethod
    def get_conversion_factor_offset(cls, input_units, output_units):
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        """
        Get the conversion factor and offset for two units.

        The conversion has to be done in the following way:
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        converted = original * factor + offset

        :param input_units: original units
        :type input_units: str
        :param output_units: destiny units
        :type output_units: str
        :return: factor and offset
        :rtype: [float, float]
        """
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        units = input_units.split()
        if len(units) == 1:
            scale_unit = 1
            unit = units[0]
        else:
            if '^' in units[0]:
                values = units[0].split('^')
                scale_unit = pow(int(values[0]), int(values[1]))
            else:
                scale_unit = float(units[0])
            unit = units[1]

        units = output_units.split()
        if len(units) == 1:
            scale_new_unit = 1
            new_unit = units[0]
        else:
            if '^' in units[0]:
                values = units[0].split('^')
                scale_new_unit = pow(int(values[0]), int(values[1]))
            else:
                scale_new_unit = float(units[0])
            new_unit = units[1]

        factor, offset = UnitConversion._get_factor(new_unit, unit)
        if factor is None:
            return None, None
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        factor = factor * scale_unit / float(scale_new_unit)
        offset /= float(scale_new_unit)

        return factor, offset

    @classmethod
    def _get_factor(cls, new_unit, unit):
        # Add  only the conversions with a factor greater than 1
        if unit == new_unit:
            return 1, 0
        elif (unit, new_unit) in cls._dict_conversions:
            conversion = cls._dict_conversions[(unit, new_unit)]
            return conversion.factor, conversion.offset
        elif (new_unit, unit) in cls._dict_conversions:
            conversion = cls._dict_conversions[(new_unit, unit)]
            return 1 / conversion.factor, -conversion.offset
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        else:
            return None, None