datamanager.py 7.14 KB
Newer Older
"""Base data manager for Earth diagnostics"""
from earthdiagnostics.datafile import NetCDFFile as NCfile, StorageStatus, LocalStatus, UnitConversion
from earthdiagnostics.variable import VariableType


class DataManager(object):
    """
    Class to manage the data repositories
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed

    Parameters
    ----------
    config: Config
    def __init__(self, config):
        self.config = config
        self.experiment = config.experiment
        self.variable_list = config.var_manager
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        UnitConversion.load_conversions()
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        self.requested_files = {}
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
    def _get_file_from_storage(self, filepath):
        if filepath not in self.requested_files:
            self.requested_files[filepath] = NCfile.from_storage(filepath, self.config.data_convention)
        file_object = self.requested_files[filepath]
        file_object.local_status = LocalStatus.PENDING
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        return self.requested_files[filepath]

    def _declare_generated_file(self, remote_file, domain, final_var, cmor_var, data_convention,
                                region, diagnostic, grid, var_type, original_var):
        if remote_file not in self.requested_files:
            self.requested_files[remote_file] = NCfile.to_storage(remote_file, data_convention)
        file_object = self.requested_files[remote_file]
        file_object.diagnostic = diagnostic
        file_object.var_type = var_type
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        file_object.grid = grid
        file_object.data_manager = self
        file_object.domain = domain
        file_object.var = original_var
        file_object.final_name = final_var
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        file_object.cmor_var = cmor_var
        file_object.region = region
        file_object.storage_status = StorageStatus.PENDING
        return file_object

    @staticmethod
    def _get_final_var_name(box, var):
        if box:
            var += box.get_lon_str() + box.get_lat_str() + box.get_depth_str()
        return var

    def link_file(self, domain, var, cmor_var, startdate, member, chunk=None, grid=None,
                  frequency=None, year=None, date_str=None, move_old=False, vartype=VariableType.MEAN):
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        """
        Create the link of a given file from the CMOR repository.
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        :param move_old:
        :param date_str:
        :param year: if frequency is yearly, this parameter is used to give the corresponding year
        :type year: int
        :param domain: CMOR domain
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        :param var: variable name
        :type var: str
        :param startdate: file's startdate
        :type startdate: str
        :param member: file's member
        :type member: int
        :param chunk: file's chunk
        :type chunk: int
        :param grid: file's grid (only needed if it is not the original)
        :type grid: str
        :param frequency: file's frequency (only needed if it is different from the default)
        :type frequency: str
        :param vartype: Variable type (mean, statistic)
        :type vartype: VariableType
Javier Vegas-Regidor's avatar
Javier Vegas-Regidor committed
        :return: path to the copy created on the scratch folder
        :rtype: str
        """
    def prepare(self):
        """Prepare the data to be used by Earth Diagnostics"""
    def request_chunk(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=None):
        """
        Request a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy

        Parameters
        ----------
        domain: ModelingRealm
        var: str
        startdate: str
        member: int
        chunk: int
        grid: str or None, optional
        box: Box or None, optional
        frequency: Frequency or None, optional
        vartype: VariableType or None, optional

        Returns
        -------
        DataFile
        Raises
        ------
        NotImplementedError
            If not implemented by derived classes

        """
        raise NotImplementedError('Class must override request_chunk method')

    def request_year(self, diagnostic, domain, var, startdate, member, year, grid=None, box=None, frequency=None):
        """
        Request a given year for a variavle from a CMOR repository

        Parameters
        ----------
        diagnostic: Diagnostic
        domain: ModelingRealm
        var: str
        startdate: str
        member: int
        year: int
        grid: str or None, optional
        box: Box or None, optional
        frequency: Frequency or None, optional

        Returns
        -------
        DataFile

        Raises
        ------
        NotImplementedError
            If not implemented by derived classes

        """
        raise NotImplementedError('Class must override request_year method')

    def declare_chunk(self, domain, var, startdate, member, chunk, grid=None, region=None, box=None, frequency=None,
                      vartype=VariableType.MEAN, diagnostic=None):
        """
        Declare a variable chunk to be generated by a diagnostic

        Parameters
        ----------
        domain: ModelingRealm
        var: str
        startdate: str
        member: int
        chunk: int
        grid: str or None, optional
        region: Basin or None, optional
        box: Box or None, optional
        frequency: Frequency or None, optional
        vartype: VariableType, optional
        diagnostic: Diagnostic, optional

        Returns
        -------
        DataFile

        Raises
        ------
        NotImplementedError
            If not implemented by derived classes

        """
        raise NotImplementedError('Class must override declare_chunk method')

    def declare_year(self, domain, var, startdate, member, year, grid=None, box=None,
                     vartype=VariableType.MEAN, diagnostic=None):
        """
        Declare a variable year to be generated by a diagnostic

        Parameters
        ----------
        domain: ModelingRealm
        var: str
        startdate: str
        member: int
        year: int
        grid: str or None, optional
        box: Box or None, optional
        vartype: VariableType, optional
        diagnostic: Diagnostic, optional

        Returns
        -------
        DataFile

        Raises
        ------
        NotImplementedError
            If not implemented by derived classes

        """
        raise NotImplementedError('Class must override declare_year method')

    def file_exists(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None,
                    vartype=VariableType.MEAN, possible_versions=None):
        """
        Check if a file exists in  the storage

        Parameters
        ----------
        domain: ModelingRealm
        var: str
        startdate: str
        member: int
        chunk: int
        grid: str or None, optional
        box: Box or None, optional
        frequency: Frequency or None, optional
        vartype: VariableType, optional
        possible_versions: iterable od str or None, optional

        Raises
        ------
        NotImplementedError
            If not implemented by derived classes

        Returns
        -------
        bool

        """
        raise NotImplementedError('Class must override file_exists method')