diff --git a/Jupyter_notebooks/Jupyter_bash_nord3v2.cmd b/Jupyter_notebooks/Jupyter_bash_nord3v2.cmd index 256a6b05f01553700688ba425987c5e52884b6d9..7b2fd04640aa2992d698f7e91096c94c676a4c83 100644 --- a/Jupyter_notebooks/Jupyter_bash_nord3v2.cmd +++ b/Jupyter_notebooks/Jupyter_bash_nord3v2.cmd @@ -30,8 +30,6 @@ module load cfunits/1.8-foss-2019b-Python-3.7.4 module load xarray/0.19.0-foss-2019b-Python-3.7.4 export PYTHONPATH=/gpfs/scratch/bsc32/bsc32538/NES_tests/NES:${PYTHONPATH} -export PYTHONPATH=/gpfs/scratch/bsc32/bsc32538/SNES_tests/NES/nes:${PYTHONPATH} - # DON'T USE ADDRESS BELOW. # DO USE TOKEN BELOW diff --git a/README.md b/README.md index c49354d3c212bcb364b78046449d1ad191a2aa2f..95ad6c17febf3a5bd178894d65f3ee51bcaac208 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ # NES -NetCDF for Earth Science \ No newline at end of file +NetCDF for Earth Science + +test local \ No newline at end of file diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 6e4163aecadd76160f063e4db6c1cca685eb3b09..da61023e4f50cfddb2c0f0ff938c9c073e993a1e 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -81,7 +81,7 @@ class Nes(object): Tuple with the name of the dimensions of the Longitude values """ def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None): + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create=False): """ Initialize the Nes class @@ -105,6 +105,7 @@ class Nes(object): Indicates the parallelization method that you want. Default over Y axis accepted values: ['Y', 'T'] """ + # MPI Initialization if comm is None: self.comm = MPI.COMM_WORLD @@ -153,7 +154,7 @@ class Nes(object): # Axis limits self.parallel_method = parallel_method - + self.read_axis_limits = self.set_read_axis_limits() # Dimensions screening @@ -162,7 +163,7 @@ class Nes(object): self.lat = self._get_coordinate_values(self._lat, 'Y') self.lon = self._get_coordinate_values(self._lon, 'X') self.time_bnds = self._time_bnds - + self.write_axis_limits = self.set_write_axis_limits() # NetCDF attributes @@ -286,8 +287,9 @@ class Nes(object): self._time_bnds = deepcopy(time_bnds) self.time_bnds = deepcopy(time_bnds) else: - msg = 'WARNING!!! ' - msg += 'The given time bounds list has a different length than the time array. Time bounds will not be set.' + msg = "WARNING!!! " + msg += "The given time bounds list has a different length than the time array. " + msg += "(time:{0}, bnds:{1}). Time bounds will not be set.".format(len(self._time), len(time_bnds)) warnings.warn(msg) else: msg = 'WARNING!!! ' @@ -296,29 +298,176 @@ class Nes(object): warnings.warn(msg) return None - def get_time_id(self, hours, first=True): + def free_vars(self, var_list): """ - Get the index of the corresponding time value. + Erase the selected variables from the variables information. Parameters ---------- - hours : int - Number of hours to avoid - first : bool - Indicates if youy want to avoid from the first hours (True) or from the last (False) - Default: True + var_list : list, str + List (or single string) of the variables to be loaded + """ + if isinstance(var_list, str): + var_list = [var_list] - Returns - ------- - int - Possition of the time array + if self.is_xarray: + self.dataset = self.dataset.drop_vars(var_list) + self.variables = self.__get_lazy_variables() + else: + for var_name in var_list: + del self.variables[var_name] + return None + + def keep_vars(self, var_list): """ - from datetime import timedelta - if first: - idx = self._time.index(self._time[0] + timedelta(hours=hours)) + Keep the selected variables and erases the rest. + + Parameters + ---------- + var_list : list, str + List (or single string) of the variables to be loaded + """ + if isinstance(var_list, str): + var_list = [var_list] + + to_remove = list(set(self.variables.keys()).difference(set(var_list))) + + self.free_vars(to_remove) + return None + + def get_time_interval(self): + time_interval = self._time[1] - self._time[0] + time_interval = int(time_interval.seconds // 3600) + return time_interval + + # ================================================================================================================== + # Statistics + # ================================================================================================================== + + def last_time_step(self): + if self.parallel_method == 'T': + raise NotImplementedError("Statistics are not implemented on time axis paralelitation method.") + aux_time = self._time[0].replace(hour=0, minute=0, second=0, microsecond=0) + self._time = [aux_time] + self.time = [aux_time] + + for var_name, var_info in self.variables.items(): + if var_info['data'] is None: + self.load(var_name) + aux_data = var_info['data'][-1, :] + if len(aux_data.shape) == 3: + aux_data = aux_data.reshape((1, aux_data.shape[0], aux_data.shape[1], aux_data.shape[2])) + self.variables[var_name]['data'] = aux_data + self.hours_start = 0 + self.hours_end = 0 + return None + + def daily_statistic(self, op, type_op='calendar'): + if self.parallel_method == 'T': + raise NotImplementedError("Statistics are not implemented on time axis paralelitation method.") + time_interval = self.get_time_interval() + if type_op == 'calendar': + aux_time_bounds = [] + aux_time = [] + day_list = [date_aux.day for date_aux in self.time] + for var_name, var_info in self.variables.items(): + if var_info['data'] is None: + self.load(var_name) + stat_data = None + for day in np.unique(day_list): + idx_first = next(i for i, val in enumerate(day_list, 0) if val == day) + idx_last = len(day_list) - next(i for i, val in enumerate(reversed(day_list), 1) if val == day) + if idx_first != idx_last: # To avoid single time step statistic + if idx_last != len(day_list): + if op == 'mean': + data_aux = var_info['data'][idx_first:idx_last + 1, :, :, :].mean(axis=0) + elif op == 'max': + data_aux = var_info['data'][idx_first:idx_last + 1, :, :, :].max(axis=0) + elif op == 'min': + data_aux = var_info['data'][idx_first:idx_last + 1, :, :, :].min(axis=0) + else: + raise NotImplementedError("Statistic operation '{0}' is not implemented.".format(op)) + aux_time_bounds.append([self.time[idx_first], self.time[idx_last]]) + else: + if op == 'mean': + data_aux = data[idx_first:, :, :, :].mean(axis=0) + elif op == 'max': + data_aux = var_info['data'][idx_first:, :, :, :].max(axis=0) + elif op == 'min': + data_aux = var_info['data'][idx_first:, :, :, :].min(axis=0) + else: + raise NotImplementedError("Statistic operation '{0}' is not implemented.".format(op)) + aux_time_bounds.append([self.time[idx_first], self.time[-1]]) + + data_aux = data_aux.reshape((1, data_aux.shape[0], data_aux.shape[1], data_aux.shape[2])) + aux_time.append(self.time[idx_first].replace(hour=0, minute=0, second=0)) + # Append over time dimension + if stat_data is None: + stat_data = data_aux.copy() + else: + stat_data = np.vstack([stat_data, data_aux]) + self.variables[var_name]['data'] = stat_data + self.variables[var_name]['cell_methods'] = "time: {0} (interval: {1}hr)".format(op, time_interval) + self.time = aux_time + self._time = self.time + + self.set_time_bnds(aux_time_bounds) + + elif type_op == 'alltsteps': + for var_name, var_info in self.variables.items(): + if var_info['data'] is None: + self.load(var_name) + if op == 'mean': + aux_data = var_info['data'].mean(axis=0) + elif op == 'max': + aux_data = var_info['data'].max(axis=0) + elif op == 'min': + aux_data = var_info['data'].min(axis=0) + else: + raise NotImplementedError("Statistic operation '{0}' is not implemented.".format(op)) + if len(aux_data.shape) == 3: + aux_data = aux_data.reshape((1, aux_data.shape[0], aux_data.shape[1], aux_data.shape[2])) + self.variables[var_name]['data'] = aux_data + self.variables[var_name]['cell_methods'] = "time: {0} (interval: {1}hr)".format(op, time_interval) + + aux_time = self.time[0].replace(hour=0, minute=0, second=0, microsecond=0) + aux_time_bounds = [[self.time[0], self.time[-1]]] + self.time = [aux_time] + self._time = self.time + + self.set_time_bnds(aux_time_bounds) + + elif type_op == 'withoutt0': + for var_name, var_info in self.variables.items (): + if var_info['data'] is None: + self.load(var_name) + if op == 'mean': + aux_data = var_info['data'][1:, :].mean(axis=0) + elif op == 'max': + aux_data = var_info['data'][1:, :].max(axis=0) + elif op == 'min': + aux_data = var_info['data'][1:, :].min(axis=0) + else: + raise NotImplementedError("Statistic operation '{0}' is not implemented.".format(op)) + if len(aux_data.shape) == 3: + aux_data = aux_data.reshape((1, aux_data.shape[0], aux_data.shape[1], aux_data.shape[2])) + self.variables[var_name]['data'] = aux_data + self.variables[var_name]['cell_methods'] = "time: {0} (interval: {1}hr)".format(op, time_interval) + aux_time = self._time[1].replace(hour=0, minute=0, second=0, microsecond=0) + aux_time_bounds = [[self._time[1], self._time[-1]]] + self.time = [aux_time] + self._time = self.time + + self.set_time_bnds(aux_time_bounds) else: - idx = self._time.index(self._time[-1] - timedelta(hours=hours)) + 1 - return idx + raise NotImplementedError("Statistic operation type '{0}' is not implemented.".format(type_op)) + self.hours_start = 0 + self.hours_end = 0 + return None + + # ================================================================================================================== + # Reading + # ================================================================================================================== def set_read_axis_limits(self): """ @@ -334,7 +483,7 @@ class Nes(object): 'y_min': None, 'y_max': None, 'z_min': None, 'z_max': None, 't_min': None, 't_max': None} - + if self.parallel_method == 'Y': y_len = self._lat['data'].shape[0] axis_limits['y_min'] = (y_len // self.size) * self.rank @@ -374,41 +523,29 @@ class Nes(object): axis_limits['z_max'] = self.last_level return axis_limits - def set_write_axis_limits(self): + def get_time_id(self, hours, first=True): """ - Calculate the 4D writing axis limits + Get the index of the corresponding time value. + + Parameters + ---------- + hours : int + Number of hours to avoid + first : bool + Indicates if youy want to avoid from the first hours (True) or from the last (False) + Default: True Returns ------- - dict - Dictionary with the 4D limits of the rank data to write. - t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max + int + Possition of the time array """ - axis_limits = {'x_min': None, 'x_max': None, - 'y_min': None, 'y_max': None, - 'z_min': None, 'z_max': None, - 't_min': None, 't_max': None} - - if self.parallel_method == 'Y': - y_len = self._lat['data'].shape[0] - axis_limits['y_min'] = (y_len // self.size) * self.rank - if self.rank + 1 < self.size: - axis_limits['y_max'] = (y_len // self.size) * (self.rank + 1) - elif self.parallel_method == 'X': - x_len = self._lon['data'].shape[-1] - axis_limits['x_min'] = (x_len // self.size) * self.rank - if self.rank + 1 < self.size: - axis_limits['x_max'] = (x_len // self.size) * (self.rank + 1) - elif self.parallel_method == 'T': - t_len = len(self._time) - axis_limits['t_min'] = ((t_len // self.size) * self.rank) - if self.rank + 1 < self.size: - axis_limits['t_max'] = (t_len // self.size) * (self.rank + 1) - + from datetime import timedelta + if first: + idx = self._time.index(self._time[0] + timedelta(hours=hours)) else: - raise NotImplementedError("Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( - meth=self.parallel_method, accept=['X', 'Y', 'T'])) - return axis_limits + idx = self._time.index(self._time[-1] - timedelta(hours=hours)) + 1 + return idx def open(self): """ @@ -747,42 +884,45 @@ class Nes(object): gl_attrs[attrname] = getattr(self.netcdf, attrname) return gl_attrs - def free_vars(self, var_list): - """ - Erase the selected variables from the variables information. - - Parameters - ---------- - var_list : list, str - List (or single string) of the variables to be loaded - """ - if isinstance(var_list, str): - var_list = [var_list] - - if self.is_xarray: - self.dataset = self.dataset.drop_vars(var_list) - self.variables = self.__get_lazy_variables() - else: - for var_name in var_list: - del self.variables[var_name] - return None + # ================================================================================================================== + # Writing + # ================================================================================================================== - def keep_vars(self, var_list): + def set_write_axis_limits(self): """ - Keep the selected variables and erases the rest. + Calculate the 4D writing axis limits - Parameters - ---------- - var_list : list, str - List (or single string) of the variables to be loaded + Returns + ------- + dict + Dictionary with the 4D limits of the rank data to write. + t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max """ - if isinstance(var_list, str): - var_list = [var_list] + axis_limits = {'x_min': None, 'x_max': None, + 'y_min': None, 'y_max': None, + 'z_min': None, 'z_max': None, + 't_min': None, 't_max': None} - to_remove = list(set(self.variables.keys()).difference(set(var_list))) + if self.parallel_method == 'Y': + y_len = self._lat['data'].shape[0] + axis_limits['y_min'] = (y_len // self.size) * self.rank + if self.rank + 1 < self.size: + axis_limits['y_max'] = (y_len // self.size) * (self.rank + 1) + elif self.parallel_method == 'X': + x_len = self._lon['data'].shape[-1] + axis_limits['x_min'] = (x_len // self.size) * self.rank + if self.rank + 1 < self.size: + axis_limits['x_max'] = (x_len // self.size) * (self.rank + 1) + elif self.parallel_method == 'T': + t_len = len(self._time) + axis_limits['t_min'] = ((t_len // self.size) * self.rank) + if self.rank + 1 < self.size: + axis_limits['t_max'] = (t_len // self.size) * (self.rank + 1) - self.free_vars(to_remove) - return None + else: + raise NotImplementedError("Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( + meth=self.parallel_method, accept=['X', 'Y', 'T'])) + return axis_limits def _create_dimensions(self, netcdf): """