data_convention.py 11.6 KB
Newer Older
import os

from bscearth.utils.date import parse_date, chunk_start_date, chunk_end_date, previous_day
from bscearth.utils.log import Log

from earthdiagnostics.frequency import Frequency, Frequencies
from earthdiagnostics.modelingrealm import ModelingRealms
from earthdiagnostics.variable import VariableType


class DataConvention(object):

    def __init__(self, name, config):
        self.config = config
        self.name = name
        self.lat_name = 'lat'
        self.lon_name = 'lon'
        self.time_separator = '-'

    def get_file_name(self, startdate, member, domain, var, cmor_var, frequency, chunk, year, date_str, grid, ):
        raise NotImplementedError

    def get_cmor_folder_path(self, startdate, member, domain, var, frequency, grid, cmor_var):
        raise NotImplementedError

    def get_startdate_path(self, startdate):
        """
        Return the path to the startdate's CMOR folder

        Parameters
        ----------
        startdate: str

        Returns
        -------
        str
        """
        return os.path.join(self.config.data_dir, self.config.experiment.expid, 'cmorfiles', self.config.cmor.activity,
                            self.config.experiment.institute, self.config.experiment.model,
                            self.experiment_name(startdate))

    def experiment_name(self, startdate):
        """
        Get experiment name, appending startdate if needed

        Parameters
        ----------
        startdate: str

        Returns
        -------
        str

        """
        if self.config.cmor.append_startdate:
            return '{}S{}'.format(self.config.experiment.experiment_name, startdate)
        else:
            return self.config.experiment.experiment_name

    def get_member_str(self, member):
        raise NotImplementedError

    def create_links(self, cmor_manager, member_str, path):
        raise NotImplementedError()

    def _get_time_component(self, chunk, date_str, frequency, startdate, year):
        if chunk is not None:
            time_bound = self._get_chunk_time_bounds(startdate, chunk)
        elif year:
            if frequency != Frequencies.yearly:
                raise ValueError('Year may be provided instead of chunk only if frequency is "yr"')
            time_bound = str(year)
        else:
            time_bound = date_str
        return time_bound

    def _get_chunk_time_bounds(self, startdate, chunk):
        start = parse_date(startdate)
        chunk_start = chunk_start_date(start, chunk, self.experiment.chunk_size, 'month', self.experiment.calendar)
        chunk_end = chunk_end_date(chunk_start, self.experiment.chunk_size, 'month', self.experiment.calendar)
        chunk_end = previous_day(chunk_end, self.experiment.calendar)
        time_bound = "{0:04}{1:02}{4}{2:04}{3:02}".format(chunk_start.year, chunk_start.month, chunk_end.year,
                                                          chunk_end.month, self.time_separator)
        return time_bound


class Cmor2Convention(DataConvention):

    def get_scratch_masks(self, scratch_masks):
        return scratch_masks

    def get_file_name(self, startdate, member, domain, var, cmor_var, frequency, chunk, year, date_str, grid, ):
        if cmor_var is None:
            cmor_table = domain.get_table(frequency, self.config.data_convention)
        else:
            cmor_table = cmor_var.get_table(frequency, self.config.data_convention)

        time_bound = self._get_time_component(chunk, date_str, frequency, startdate, year)
        time_bound = '_{0}.nc'.format(time_bound)

        file_name = '{0}_{1}_{2}_{3}_S{4}_{5}{6}'.format(var, cmor_table.name, self.experiment.model,
                                                         self.experiment_name(startdate), startdate,
                                                         self._get_member_str(member), time_bound)
        return file_name

    def get_cmor_folder_path(self, startdate, member, domain, var, frequency, grid, cmor_var):
        folder_path = os.path.join(self.get_startdate_path(startdate), str(frequency), domain.name, var)
        if grid:
            folder_path = os.path.join(folder_path, grid)
        folder_path = os.path.join(folder_path, self._get_member_str(member))
        if self.config.cmor.version:
            folder_path = os.path.join(folder_path, self.config.cmor.version)
        return folder_path

    def get_member_str(self, member):
        template = 'r{0}i{1}p1'
        return template.format(member + 1 - self.experiment.member_count_start, self.config.cmor.initialization_number)

    def create_links(self, cmor_manager, member_str, path):
        for freq in os.listdir(path):
            Log.debug('Creating links for frequency {0}', freq)
            frequency = Frequency.parse(freq)
            for domain in os.listdir(os.path.join(path, freq)):
                Log.debug('Creating links for domain {0}', domain)
                for var in os.listdir(os.path.join(path, freq, domain)):
                    for member in os.listdir(os.path.join(path, freq, domain, var)):
                        if member_str is not None and member_str != member:
                            continue
                        for name in os.listdir(os.path.join(path, freq, domain, var, member)):
                            filepath = os.path.join(path, freq, domain, var, member, name)
                            if os.path.isfile(filepath):
                                cmor_manager.create_link(domain, filepath, frequency, var, "", False,
                                                         vartype=VariableType.MEAN)
                            else:
                                for filename in os.listdir(filepath):
                                    cmor_manager.create_link(domain, os.path.join(filepath, filename), frequency, var,
                                                             "", False, vartype=VariableType.MEAN)

class SPECSConvention(Cmor2Convention):

    def get_startdate_path(self, startdate):
        return os.path.join(self.config.data_dir, self.experiment.expid, 'cmorfiles', self.experiment.institute,
                            self.experiment.model, self.experiment_name(startdate), 'S' + startdate)

class PrefaceConvention(Cmor2Convention):

    def __init__(self, name, config):
        super(PrefaceConvention, self).__init__(name, config)
        self.time_separator = '_'

    def get_startdate_path(self, startdate):
        return os.path.join(self.config.data_dir, self.experiment.expid, 'cmorfiles', self.experiment.institute,
                                self.experiment_name(startdate), 'S' + startdate)

class Cmor3Convention(DataConvention):

    def __init__(self, name, config):
        super(Cmor3Convention, self).__init__(name, config)
        self.lat_name = 'latitude'
        self.lon_name = 'longitude'

    def get_scratch_masks(self, scratch_masks):
         return os.path.join(scratch_masks, self.name)

    def get_file_name(self, startdate, member, domain, var, cmor_var, frequency, chunk, year, date_str, grid, ):
        if cmor_var is None:
            cmor_table = domain.get_table(frequency, self.config.data_convention)
        else:
            cmor_table = cmor_var.get_table(frequency, self.config.data_convention)

        time_bound = self._get_time_component(chunk, date_str, frequency, startdate, year)
        time_bound = '_{0}.nc'.format(time_bound)

        if not grid:
            if domain in [ModelingRealms.ocnBgchem, ModelingRealms.seaIce, ModelingRealms.ocean]:
                grid = self.config.cmor.default_ocean_grid
            else:
                grid = self.config.cmor.default_atmos_grid
        file_name = '{0}_{1}_{2}_{3}_{4}_{5}{6}'.format(var, cmor_table.name, self.experiment.model,
                                                        self.experiment_name(startdate),
                                                        self._get_member_str(member),
                                                        grid, time_bound)
        return file_name

    def get_cmor_folder_path(self, startdate, member, domain, var, frequency, grid, cmor_var):
        if not self.config.cmor.version:
            raise ValueError('CMOR version is mandatory for PRIMAVERA and CMIP6')
        if not grid:
            if domain in [ModelingRealms.ocnBgchem, ModelingRealms.seaIce, ModelingRealms.ocean]:
                grid = self.config.cmor.default_ocean_grid
            else:
                grid = self.config.cmor.default_atmos_grid
        if cmor_var is None:
            table_name = domain.get_table(frequency, self.config.data_convention).name
        else:
            table_name = cmor_var.get_table(frequency, self.config.data_convention).name
        folder_path = os.path.join(self.get_startdate_path(startdate), self._get_member_str(member),
                                   table_name, var,
                                   grid, self.config.cmor.version)
        return folder_path

    def create_links(self, cmor_manager, member_str, path):
        for member in os.listdir(path):
            for table in os.listdir(os.path.join(path, member)):
                frequency = self.config.var_manager.tables[table].frequency
                domain = None
                Log.debug('Creating links for table {0}', table)
                for var in os.listdir(os.path.join(path, member, table)):
                    for grid in os.listdir(os.path.join(path, member, table, var)):
                        if member_str is not None and member_str != member:
                            continue
                        for name in os.listdir(os.path.join(path, member, table, var, grid)):
                            filepath = os.path.join(path, member, table, var, grid, name)
                            if os.path.isfile(filepath):
                                self.create_link(domain, filepath, frequency, var, "", False,
                                                 vartype=VariableType.MEAN)
                            else:
                                for filename in os.listdir(filepath):
                                    cmorfile = os.path.join(filepath, filename)
                                    self.create_link(domain, cmorfile, frequency, var, "",
                                                     False, vartype=VariableType.MEAN)

    def get_member_str(self, member):
        template = 'r{0}i{1}p1f1'
        return template.format(member + 1 - self.config.experiment.member_count_start,
                               self.config.cmor.initialization_number)

class CMIP6Convention(Cmor3Convention):
    pass

class PrimaveraConvention(Cmor3Convention):
    pass

class MeteoFranceConvention(DataConvention):

    def get_file_name(self, startdate, member, domain, var, cmor_var, frequency, chunk, year, date_str, grid,):
        time_bound = self._get_chunk_time_bounds(startdate, chunk)
        file_name = '{0}_{1}_{2}_{3}.nc'.format(var, frequency, time_bound, self._get_member_str(member))
        return file_name

    def get_cmor_folder_path(self, startdate, member, domain, var, frequency, grid, cmor_var):
        folder_path = os.path.join(self.config.data_dir, self.experiment_name(startdate),
                                   'H{0}'.format(chr(64 + int(startdate[4:6]))),
                                   startdate[0:4])
        return folder_path

    def get_member_str(self, member):
        return '{0:02d}'.format(member)

    def _get_chunk_time_bounds(self, startdate, chunk):
        start = parse_date(startdate)
        chunk_start = chunk_start_date(start, chunk, self.experiment.chunk_size, 'month', self.experiment.calendar)
        time_bound = "{0:04}{1:02}".format(chunk_start.year, chunk_start.month)
        return time_bound