diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c3ddb0642f6e154c9619b355473d46d05b27bc13..467e1e9eaafa8f69a9ebe3c0d6ebe6672a19e8a7 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,16 @@ CHANGELOG .. start-here +1.1.10 +============ + +* Release date: 2025/07/03 +* Changes and new features: + + * Expand & Contract + * Bugfix on Coordinates metadata conventions. + * New entry point to check NaN and Inf values: `nes_check` + 1.1.9 ============ diff --git a/Makefile b/Makefile index 0578d4bafa75b11b101807d64b324628bdc82309..6c8788b3237f5f519e8f901cd95b8d817d93263b 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -version=1.1.9 +version=1.1.10 ### PATHS: Change them if needed # Paths to the NES software diff --git a/environment.yml b/environment.yml index c49e204f5e011c000b00e02e817abe547bbd72d0..e2e658aa526698599359a006bdc13c43f1f204e9 100755 --- a/environment.yml +++ b/environment.yml @@ -4,9 +4,9 @@ channels: dependencies: - python=3.10 - - libnetcdf=*=mpi_mpich* - - netCDF4=*=mpi_mpich* - - h5py=*=mpi_mpich* + - libnetcdf=*=mpi_* + - netCDF4=*=mpi_* + - h5py=*=mpi_* - pytest-cov - pycodestyle>=2.10.0 - geopandas>=0.10.2 @@ -15,8 +15,7 @@ dependencies: - pyproj~=3.2.1 - setuptools>=66.1.1 - pytest>=7.2.1 - - shapely<2.0 - - pygeos>=0.10.0 + - shapely - mpi4py ~= 3.1.4 - eccodes - python-eccodes diff --git a/nes/__init__.py b/nes/__init__.py index a8e71e3b8a6ff9522fd99a9f6a94be939984fc34..8bc6b4333dbc3e3b19c6cf5303c1de1e23274514 100644 --- a/nes/__init__.py +++ b/nes/__init__.py @@ -1,5 +1,5 @@ -__date__ = "2025-04-22" -__version__ = "1.1.9" +__date__ = "2025-07-03" +__version__ = "1.1.10" __all__ = [ 'open_netcdf', 'concatenate_netcdfs', 'create_nes', 'from_shapefile', 'calculate_geometry_area', 'Nes', 'LatLonNes', 'LCCNes', 'RotatedNes', 'RotatedNestedNes', 'MercatorNes', 'PointsNesProvidentia', 'PointsNesGHOST', 'PointsNes' diff --git a/nes/methods/horizontal_interpolation.py b/nes/methods/horizontal_interpolation.py index 717a3b0c2e013bc039a5e833e92daf040ec5ec2b..af256cc17d695105ed0b0bb8492f7207c84bac03 100644 --- a/nes/methods/horizontal_interpolation.py +++ b/nes/methods/horizontal_interpolation.py @@ -21,7 +21,8 @@ CONSERVATIVE_OPTS = ["Conservative", "Area_Conservative", "cons", "conservative" def interpolate_horizontal(self, dst_grid, weight_matrix_path=None, kind="NearestNeighbour", n_neighbours=4, - info=False, to_providentia=False, only_create_wm=False, wm=None, flux=False, keep_nan=False): + info=False, to_providentia=False, only_create_wm=False, wm=None, flux=False, keep_nan=False, + fix_border=False): """ Horizontal methods from one grid to another one. @@ -48,7 +49,10 @@ def interpolate_horizontal(self, dst_grid, weight_matrix_path=None, kind="Neares flux : bool Indicates if you want to calculate the weight matrix for flux variables. keep_nan : bool - Indicates if you want to keep nan values after the interpolation + Indicates if you want to keep nan values after the interpolation + fix_border : bool + Indicates if you want to fix the borders on the NN interpolation by expanding it 1 cell and contracting + it later. """ if info and self.master: print("Creating Weight Matrix") @@ -56,11 +60,11 @@ def interpolate_horizontal(self, dst_grid, weight_matrix_path=None, kind="Neares # Obtain weight matrix if self.parallel_method == "T": weights, idx = __get_weights_idx_t_axis(self, dst_grid, weight_matrix_path, kind, n_neighbours, - only_create_wm, wm, flux) + only_create_wm, wm, flux, fix_border) elif self.parallel_method in ["Y", "X"]: weights, idx = __get_weights_idx_xy_axis(self, dst_grid, weight_matrix_path, kind, n_neighbours, - only_create_wm, wm, flux) + only_create_wm, wm, flux, fix_border) else: raise NotImplementedError("Parallel method {0} is not implemented yet for horizontal interpolations.".format( self.parallel_method) + "Use 'T'") @@ -189,6 +193,7 @@ def __get_src_data(comm, var_data, idx, parallel_method): if parallel_method == "T": var_data = var_data.flatten() else: + print(f"[Rank {comm.Get_rank()}] var data {var_data}") var_data = comm.gather(var_data, root=0) if comm.Get_rank() == 0: if parallel_method == "Y": @@ -200,7 +205,7 @@ def __get_src_data(comm, var_data, idx, parallel_method): var_data = concatenate(var_data, axis=axis) var_data = var_data.flatten() - var_data = comm.bcast(var_data) + var_data = comm.bcast(var_data, root=0) var_data = pad(var_data, [1, 1], "constant", constant_values=nan).take(idx + 1, mode="clip") @@ -208,7 +213,7 @@ def __get_src_data(comm, var_data, idx, parallel_method): # noinspection DuplicatedCode -def __get_weights_idx_t_axis(self, dst_grid, weight_matrix_path, kind, n_neighbours, only_create, wm, flux): +def __get_weights_idx_t_axis(self, dst_grid, weight_matrix_path, kind, n_neighbours, only_create, wm, flux, fix_border): """ To obtain the weights and source data index through the T axis. @@ -230,7 +235,9 @@ def __get_weights_idx_t_axis(self, dst_grid, weight_matrix_path, kind, n_neighbo Weight matrix Nes File. flux : bool Indicates if you want to calculate the weight matrix for flux variables. - + fix_border : bool + Indicates if you want to fix the borders on the NN interpolation by expanding it 1 cell and contracting + it later. Returns ------- tuple @@ -254,7 +261,8 @@ def __get_weights_idx_t_axis(self, dst_grid, weight_matrix_path, kind, n_neighbo warn("The selected weight matrix does not have the same number of nearest neighbours." + "Re-calculating again but not saving it.") sys.stderr.flush() - weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours) + weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours, + fix_border=fix_border) else: weight_matrix = True @@ -262,7 +270,7 @@ def __get_weights_idx_t_axis(self, dst_grid, weight_matrix_path, kind, n_neighbo if self.master: if kind in NEAREST_OPTS: weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours, - wm_path=weight_matrix_path) + wm_path=weight_matrix_path, fix_border=fix_border) elif kind in CONSERVATIVE_OPTS: weight_matrix = __create_area_conservative_weight_matrix( self, dst_grid, wm_path=weight_matrix_path, flux=flux) @@ -276,7 +284,8 @@ def __get_weights_idx_t_axis(self, dst_grid, weight_matrix_path, kind, n_neighbo else: if self.master: if kind in NEAREST_OPTS: - weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours) + weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours, + fix_border=fix_border) elif kind in CONSERVATIVE_OPTS: weight_matrix = __create_area_conservative_weight_matrix(self, dst_grid, flux=flux) else: @@ -307,7 +316,7 @@ def __get_weights_idx_t_axis(self, dst_grid, weight_matrix_path, kind, n_neighbo # noinspection DuplicatedCode -def __get_weights_idx_xy_axis(self, dst_grid, weight_matrix_path, kind, n_neighbours, only_create, wm, flux): +def __get_weights_idx_xy_axis(self, dst_grid, weight_matrix_path, kind, n_neighbours, only_create, wm, flux, fix_border): """ To obtain the weights and source data index through the X or Y axis. @@ -329,6 +338,9 @@ def __get_weights_idx_xy_axis(self, dst_grid, weight_matrix_path, kind, n_neighb Weight matrix Nes File. flux : bool Indicates if you want to calculate the weight matrix for flux variables. + fix_border : bool + Indicates if you want to fix the borders on the NN interpolation by expanding it 1 cell and contracting + it later. Returns ------- @@ -359,14 +371,15 @@ def __get_weights_idx_xy_axis(self, dst_grid, weight_matrix_path, kind, n_neighb warn("The selected weight matrix does not have the same number of nearest neighbours." + "Re-calculating again but not saving it.") sys.stderr.flush() - weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours) + weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours, + fix_border=fix_border) else: weight_matrix = True else: if kind in NEAREST_OPTS: if self.master: weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours, - wm_path=weight_matrix_path) + wm_path=weight_matrix_path, fix_border=fix_border) else: weight_matrix = True elif kind in CONSERVATIVE_OPTS: @@ -380,7 +393,8 @@ def __get_weights_idx_xy_axis(self, dst_grid, weight_matrix_path, kind, n_neighb else: if kind in NEAREST_OPTS: if self.master: - weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours) + weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours, + fix_border=fix_border) else: weight_matrix = True elif kind in CONSERVATIVE_OPTS: @@ -404,8 +418,8 @@ def __get_weights_idx_xy_axis(self, dst_grid, weight_matrix_path, kind, n_neighb weights = None idx = None - weights = self.comm.bcast(weights, root=0) - idx = self.comm.bcast(idx, root=0) + weights = dst_grid.comm.bcast(weights, root=0) + idx = dst_grid.comm.bcast(idx, root=0) # if isinstance(dst_grid, nes.PointsNes): # print("weights 1 ->", weights.shape) @@ -456,7 +470,7 @@ def __read_weight_matrix(weight_matrix_path, comm=None, parallel_method="T"): # noinspection DuplicatedCode,PyProtectedMember -def __create_nn_weight_matrix(self, dst_grid, n_neighbours=4, wm_path=None, info=False): +def __create_nn_weight_matrix(self, dst_grid, n_neighbours=4, wm_path=None, info=False, fix_border=False): """ To create the weight matrix with the nearest neighbours method. @@ -472,12 +486,17 @@ def __create_nn_weight_matrix(self, dst_grid, n_neighbours=4, wm_path=None, info Path where write the weight matrix. info: bool Indicates if you want to print extra info during the methods process. + fix_border : bool + Indicates if you want to fix the borders on the NN interpolation by expanding it 1 cell and contracting + it later. Returns ------- nes.Nes Weight matrix. """ + if fix_border: + dst_grid.expand(n_cells=1) # Only master is here. if info and self.master: print("\tCreating Nearest Neighbour Weight Matrix with {0} neighbours".format(n_neighbours)) @@ -525,7 +544,7 @@ def __create_nn_weight_matrix(self, dst_grid, n_neighbours=4, wm_path=None, info # self.nearest_neighbour_inds = \ # column_stack(unravel_index(idx, lon.shape)) - weight_matrix = dst_grid.copy() + weight_matrix = dst_grid.copy(new_comm=MPI.COMM_SELF) weight_matrix.time = [datetime(year=2000, month=1, day=1, hour=0, second=0, microsecond=0)] weight_matrix._full_time = [datetime(year=2000, month=1, day=1, hour=0, second=0, microsecond=0)] weight_matrix._full_time_bnds = None @@ -535,7 +554,6 @@ def __create_nn_weight_matrix(self, dst_grid, n_neighbours=4, wm_path=None, info weight_matrix.hours_start = 0 weight_matrix.hours_end = 0 - weight_matrix.set_communicator(MPI.COMM_SELF) # take the reciprocals of the nearest neighbours distances dists[dists < 1] = 1 inverse_dists = reciprocal(dists) @@ -546,8 +564,12 @@ def __create_nn_weight_matrix(self, dst_grid, n_neighbours=4, wm_path=None, info weight_matrix.variables["idx"] = {"data": idx_transf, "units": ""} weight_matrix.lev = {"data": arange(inverse_dists_transf.shape[1]), "units": ""} weight_matrix._full_lev = {"data": arange(inverse_dists_transf.shape[1]), "units": ""} + + if fix_border: + weight_matrix.contract(n_cells=1) + if wm_path is not None: - weight_matrix.to_netcdf(wm_path) + weight_matrix.to_netcdf(wm_path, serial=True) return weight_matrix @@ -666,7 +688,8 @@ def __create_area_conservative_weight_matrix(self, dst_nes, wm_path=None, flux=F intersection_df = intersection_df.set_index( ["FID_dst", intersection_df.groupby("FID_dst").cumcount()]).rename_axis(("FID", "level")).sort_index() intersection_df.rename(columns={"FID_src": "idx"}, inplace=True) - weight_matrix = dst_nes.copy() + + weight_matrix = dst_nes.copy(new_comm=MPI.COMM_SELF) weight_matrix.time = [datetime(year=2000, month=1, day=1, hour=0, second=0, microsecond=0)] weight_matrix._full_time = [datetime(year=2000, month=1, day=1, hour=0, second=0, microsecond=0)] weight_matrix._full_time_bnds = None @@ -675,9 +698,7 @@ def __create_area_conservative_weight_matrix(self, dst_nes, wm_path=None, flux=F weight_matrix.first_level = 0 weight_matrix.hours_start = 0 weight_matrix.hours_end = 0 - - weight_matrix.set_communicator(MPI.COMM_SELF) - + weight_matrix.set_levels({"data": arange(intersection_df.index.get_level_values("level").max() + 1), "dimensions": ("lev",), "units": "", diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 59b27fb81f911d22f134792d7c9af7198bec3a46..66b126025c8b51d9236a717a1b9bd974e2b08c40 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -1,13 +1,12 @@ #!/usr/bin/env python -import os import sys from gc import collect from warnings import warn from math import isclose from numpy import (array, ndarray, abs, mean, diff, dstack, append, tile, empty, unique, stack, vstack, full, isnan, flipud, nan, float32, float64, ma, generic, character, issubdtype, arange, newaxis, concatenate, - split, cumsum, zeros, column_stack, argsort, take) + split, cumsum, zeros, column_stack, hstack, argsort, take) from pandas import Index, concat from geopandas import GeoDataFrame from datetime import timedelta, datetime @@ -291,6 +290,7 @@ class Nes(object): # Set NetCDF attributes self.global_attrs = self.__get_global_attributes(create_nes) + self.loaded = True else: if dataset is not None: @@ -338,6 +338,7 @@ class Nes(object): # Projection data self.projection_data = self._get_projection_data(create_nes, **kwargs) self.projection = self._get_pyproj_projection() + self.loaded = False # Writing options self.zip_lvl = 0 @@ -360,19 +361,6 @@ class Nes(object): self.last_level = None self.first_level = None - def __test_mpi__(self, num_test=None): - print(f"{self.rank} Barrier {num_test}") - sys.stdout.flush() - self.comm.Barrier() - if self.master: - data = 1 - else: - data = 0 - data = self.comm.bcast(data, root=0) - print(f"{self.rank} data {data}") - sys.stdout.flush() - return None - @staticmethod def new(comm=None, path=None, info=False, dataset=None, parallel_method="Y", avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, @@ -567,7 +555,55 @@ class Nes(object): """ return self.variables[key]["data"] - def copy(self, copy_vars: bool = False): + def copy(self, copy_vars: bool = False, new_comm=None): + """ + Copy the Nes object. + The copy avoids duplicating dataset and MPI communicator (unless specified). + + Parameters + ---------- + copy_vars : bool + If True, includes variables and cell_measures in the copy. + new_comm : MPI.Comm, optional + If provided, sets a new MPI communicator. + + Returns + ------- + Nes + A copy of the Nes object. + """ + # Temporarily remove non-copyable elements + original_comm = self.comm + original_dataset = self.dataset + + self.comm = None + self.dataset = None + + nessy = deepcopy(self) + + # Restore to original + self.comm = original_comm + self.dataset = original_dataset + + # Reattach communicator and dataset + if new_comm is not None: + nessy.set_communicator(new_comm) + else: + nessy.comm = self.comm + + # Reattach dataset manually if needed + nessy.dataset = None + + if copy_vars: + nessy.variables = deepcopy(self.variables) + nessy.cell_measures = deepcopy(self.cell_measures) + else: + nessy.variables = {} + nessy.cell_measures = {} + + return nessy + + def copy2(self, copy_vars: bool = False, new_comm=None): """ Copy the Nes object. The copy will avoid to copy the communicator, dataset and variables by default. @@ -576,6 +612,8 @@ class Nes(object): ---------- copy_vars: bool Indicates if you want to copy the variables (in lazy mode). + new_comm : Comm + New MPI communicator Returns ------- @@ -585,15 +623,17 @@ class Nes(object): nessy = deepcopy(self) nessy.dataset = None + if copy_vars: - nessy.set_communicator(self.comm) nessy.variables = deepcopy(self.variables) nessy.cell_measures = deepcopy(self.cell_measures) else: nessy.variables = {} nessy.cell_measures = {} + if new_comm is not None: + nessy.set_communicator(new_comm) - return nessy + return nessy def get_full_times(self) -> List[datetime]: """ @@ -604,6 +644,9 @@ class Nes(object): List[datetime] The complete list of original time step values from the netCDF data. """ + if self.size == 1: + return self._full_time + if self.master: data = self._full_time else: @@ -623,7 +666,9 @@ class Nes(object): List[datetime] The complete list of original time step boundary values from the netCDF data. """ - data = self.comm.bcast(self._full_time_bnds) + if self.size == 1: + return self._full_time_bnds + data = self.comm.bcast(self._full_time_bnds, root=0) return data def get_full_levels(self) -> Dict[str, Any]: @@ -641,7 +686,9 @@ class Nes(object): ... } """ - data = self.comm.bcast(self._full_lev) + if self.size == 1: + return self._full_lev + data = self.comm.bcast(self._full_lev, root=0) return data def get_full_latitudes(self) -> Dict[str, Any]: @@ -659,7 +706,9 @@ class Nes(object): ... } """ - data = self.comm.bcast(self._full_lat) + if self.size == 1: + return self._full_lat + data = self.comm.bcast(self._full_lat, root=0) return data @@ -678,7 +727,9 @@ class Nes(object): ... } """ - data = self.comm.bcast(self._full_lon) + if self.size == 1: + return self._full_lon + data = self.comm.bcast(self._full_lon, root=0) return data def get_full_latitudes_boundaries(self) -> Dict[str, Any]: @@ -696,7 +747,9 @@ class Nes(object): ... } """ - data = self.comm.bcast(self._full_lat_bnds) + if self.size == 1: + return self._full_lat_bnds + data = self.comm.bcast(self._full_lat_bnds, root=0) return data def get_full_longitudes_boundaries(self) -> Dict[str, Any]: @@ -714,7 +767,9 @@ class Nes(object): ... } """ - data = self.comm.bcast(self._full_lon_bnds) + if self.size == 1: + return self._full_lon_bnds + data = self.comm.bcast(self._full_lon_bnds, root=0) return data def set_full_times(self, data: List[datetime]) -> None: @@ -850,23 +905,30 @@ class Nes(object): Returns ------- - array + fids: Array 2D array with the FID data. """ if self.master: fids = arange(self._full_lat["data"].shape[0] * self._full_lon["data"].shape[-1]) fids = fids.reshape((self._full_lat["data"].shape[0], self._full_lon["data"].shape[-1])) + if self.size == 1: + return fids else: fids = None - fids = self.comm.bcast(fids) + + fids = self.comm.bcast(fids, root=0) if use_read: fids = fids[self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] else: - fids = fids[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] - + try: + fids = fids[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] + except TypeError as e: + print(self.rank, fids, self.write_axis_limits) + sys.stdout.flush() + raise e return fids def get_full_shape(self): @@ -880,9 +942,11 @@ class Nes(object): """ if self.master: shape = (self._full_lat["data"].shape[0], self._full_lon["data"].shape[-1]) + if self.size == 1: + return shape else: shape = None - shape = self.comm.bcast(shape) + shape = self.comm.bcast(shape, root=0) return shape @@ -956,9 +1020,9 @@ class Nes(object): """ self.comm = comm - self.rank = self.comm.Get_rank() - self.master = self.rank == 0 - self.size = self.comm.Get_size() + self.rank = comm.Get_rank() + self.master = comm.Get_rank() == 0 + self.size = comm.Get_size() self.read_axis_limits = self._get_read_axis_limits() self.write_axis_limits = self._get_write_axis_limits() @@ -1098,7 +1162,7 @@ class Nes(object): Parameters ---------- - coordinates : array + coordinates : Array Coordinates in degrees (latitude or longitude). inc : float Increment between centre values. @@ -1258,7 +1322,7 @@ class Nes(object): else: time_interval = None - return self.comm.bcast(time_interval) + return self.comm.bcast(time_interval, root=0) def sel_time(self, time, inplace=True): """ @@ -1408,6 +1472,888 @@ class Nes(object): return None + def expand(self, n_cells: int = 1) -> None: + """ + Expands the grid by increasing the number of cells in the spatial dimensions. + + Parameters + ---------- + n_cells : int, optional + Number of cells to expand in each direction. Default is 1. + + Returns + ------- + None + """ + is_first = self.rank == 0 + is_last = self.rank == self.size - 1 + + self._expand_coordinates(n_cells, is_first, is_last) + self._expand_variables(n_cells, is_first, is_last) + self._expand_full_coordinates(n_cells) + self._update_axis_limits(expand=True, n_cells=n_cells, is_first=is_first, is_last=is_last) + self._post_expand_processing() + + def contract(self, n_cells: int = 1) -> None: + """ + Contracts the grid by removing `n_cells` from the spatial dimensions. + + Parameters + ---------- + n_cells : int, optional + The number of cells to remove in each direction. Default is 1. + + Returns + ------- + None + """ + is_first = self.rank == 0 + is_last = self.rank == self.size - 1 + + self._contract_coordinates(n_cells, is_first, is_last) + self._contract_variables(n_cells, is_first, is_last) + self._contract_full_coordinates(n_cells) + self._update_axis_limits(expand=False, n_cells=n_cells, is_first=is_first, is_last=is_last) + self._post_expand_processing() + + def _expand_coordinates(self, n_cells: int, is_first: bool, is_last: bool) -> None: + """ + Expands the latitude and longitude coordinates. + + Parameters + ---------- + n_cells : int + Number of cells to expand. + is_first : bool + Whether the current rank is the first in parallel processing. + is_last : bool + Whether the current rank is the last in parallel processing. + + Returns + ------- + None + """ + raise NotImplementedError("It is not possible to expand Default coordinates") + + def _contract_coordinates(self, n_cells: int, is_first: bool, is_last: bool) -> None: + """ + Contracts latitude and longitude coordinates by removing `n_cells` from the borders. + + Parameters + ---------- + n_cells : int + The number of cells to remove from each border. + is_first : bool + Indicates whether the current rank is the first in parallel processing. + is_last : bool + Indicates whether the current rank is the last in parallel processing. + + Returns + ------- + None + """ + raise NotImplementedError("It is not possible to contract Default coordinates") + + def _expand_full_coordinates(self, n_cells: int) -> None: + """ + Expands the full coordinates if `self.master` is True. + + Parameters + ---------- + n_cells : int + Number of cells to expand. + + Returns + ------- + None + """ + raise NotImplementedError("It is not possible to expand Default full coordinates") + + def _contract_full_coordinates(self, n_cells: int) -> None: + """ + Contracts full latitude and longitude coordinates if `self.master` is True. + + Parameters + ---------- + n_cells : int + The number of cells to remove from each side. + + Returns + ------- + None + """ + raise NotImplementedError("It is not possible to contract Default full coordinates") + + def _expand_variables(self, n_cells: int, is_first: bool, is_last: bool) -> None: + """ + Expands the grid variables. + + Parameters + ---------- + n_cells : int + Number of cells to expand. + is_first : bool + Whether the current rank is the first in parallel processing. + is_last : bool + Whether the current rank is the last in parallel processing. + + Returns + ------- + None + """ + expand_x = (self.parallel_method in ["X", None]) + expand_y = (self.parallel_method in ["Y", None]) + + for var_name, var_data in self.variables.items(): + if isinstance(self.variables[var_name]['data'], ndarray): + self.variables[var_name]['data'] = self._expand_variables_data( + var_data['data'], n_cells=n_cells, + left=is_first if expand_x else True, + right=is_last if expand_x else True, + top=is_last if expand_y else True, + bottom=is_first if expand_y else True + ) + else: + self.variables[var_name]['data'] = var_data['data'] + return None + + def _contract_variables(self, n_cells: int, is_first: bool, is_last: bool) -> None: + """ + Contracts the grid variables by removing `n_cells` from the spatial dimensions. + + Parameters + ---------- + n_cells : int + The number of cells to remove. + is_first : bool + Indicates whether the current rank is the first in parallel processing. + is_last : bool + Indicates whether the current rank is the last in parallel processing. + + Returns + ------- + None + """ + contract_x = (self.parallel_method in ["X", None]) + contract_y = (self.parallel_method in ["Y", None]) + + for var_name, var_data in self.variables.items(): + if isinstance(self.variables[var_name]['data'], ndarray): + self.variables[var_name]['data'] = self._contract_variables_data( + var_data['data'], n_cells=n_cells, + left=is_first if contract_x else True, + right=is_last if contract_x else True, + top=is_last if contract_y else True, + bottom=is_first if contract_y else True + ) + else: + self.variables[var_name]['data'] = var_data['data'] + return None + + @staticmethod + def _expand_1d(coordinate: ndarray, n_cells: int, left: bool = True, right: bool = True) -> ndarray or None: + """ + Expands a 1D or 2D coordinate array by adding `n_cells` values on each side. + + If `coordinate` is 2D (bounds format), each row is expanded by extrapolating the first and last bounds. + + Parameters + ---------- + coordinate : np.ndarray + The original 1D or 2D array of coordinates. + n_cells : int + The number of cells to expand on each side. + left : bool, optional + If True, expand on the left side (default is True). + right : bool, optional + If True, expand on the right side (default is True). + + Returns + ------- + np.ndarray or None + The expanded coordinate array. + + Raises + ------ + ValueError + If `coordinate` is neither 1D nor 2D. + + Notes + ----- + - Assumes uniform spacing between coordinates. + - If `coordinate` is `None`, returns `None`. + - If `n_cells` is 0 or neither `left` nor `right` are True, the array remains unchanged. + - Supports both 1D (center points) and 2D (bounds) coordinates. + + Examples + -------- + Expanding a 1D coordinate array: + + coords = np.array([10, 20, 30, 40]) + _expand_1d(None, coords, 2) + array([ -10, 0, 10, 20, 30, 40, 50, 60]) + + Expanding a 2D coordinate array (bounds): + + bounds = np.array([[10, 11], [20, 21], [30, 31], [40, 41]]) + _expand_1d(None, bounds, 2) + array([[ -10, -9], + [ 0, 1], + [ 10, 11], + [ 20, 21], + [ 30, 31], + [ 40, 41], + [ 50, 51], + [ 60, 61]]) + """ + if coordinate is None: + return None + + if n_cells <= 0 or not (left or right): + return coordinate # No expansion needed + + if coordinate.ndim == 1: + # Expansion for 1D coordinates + step = coordinate[1] - coordinate[0] + + expanded_left = arange(coordinate[0] - n_cells * step, coordinate[0], step) if left else empty((0,)) + expanded_right = arange(coordinate[-1] + step, coordinate[-1] + (n_cells + 1) * step, + step) if right else empty((0,)) + + result = concatenate([expanded_left, coordinate, expanded_right]) + + elif coordinate.ndim == 2: + # Expansion for 2D coordinates (bounds) + step = coordinate[1, 0] - coordinate[0, 0] # Assuming uniform step size + expanded_left = array([[coordinate[0, 0] - i * step, coordinate[0, 1] - i * step] for i in + range(n_cells, 0, -1)]) if left else empty((0, 2)) + expanded_right = array([[coordinate[-1, 0] + i * step, coordinate[-1, 1] + i * step] for i in + range(1, n_cells + 1)]) if right else empty((0, 2)) + + result = concatenate([expanded_left, coordinate, expanded_right]) + + else: + raise ValueError("Input coordinate array must be either 1D or 2D.") + + return result + + @staticmethod + def _contract_1d(coordinate: ndarray, n_cells: int, left: bool = True, + right: bool = True) -> ndarray or None: + """ + Contracts a 1D or 2D coordinate array by removing `n_cells` from each specified side. + + Parameters + ---------- + coordinate : np.ndarray + The input 1D or 2D array of coordinates. + n_cells : int + The number of cells to remove from each border. + left : bool, optional + If True, removes `n_cells` from the left side. Default is True. + right : bool, optional + If True, removes `n_cells` from the right side. Default is True. + + Returns + ------- + np.ndarray or None + The contracted coordinate array. + + Raises + ------ + ValueError + If `coordinate` is neither 1D nor 2D. + IndexError + If `n_cells` is too large and would remove all data. + """ + if coordinate is None: + return None + + if n_cells <= 0 or not (left or right): + return coordinate # No contraction needed + + if coordinate.ndim == 1: + if n_cells * (left + right) >= coordinate.shape[0]: + raise IndexError("n_cells is too large, would remove all elements in the 1D array.") + result = coordinate[n_cells:] if left else coordinate + result = result[:-n_cells] if right else result + + elif coordinate.ndim == 2: + if n_cells * (left + right) >= coordinate.shape[0]: + raise IndexError("n_cells is too large, would remove all rows in the 2D array.") + result = coordinate[n_cells:] if left else coordinate + result = result[:-n_cells] if right else result + + else: + raise ValueError("Input coordinate array must be either 1D or 2D.") + + return result + + @staticmethod + def _expand_2d(coordinate: ndarray, n_cells: int, + left: bool = True, right: bool = True, + top: bool = True, bottom: bool = True) -> ndarray or None: + """ + Expands a 2D coordinate grid by adding `n_cells` on each specified side. + + Parameters + ---------- + coordinate : np.ndarray + A 2D array representing latitude or longitude grid points. + n_cells : int + The number of cells to expand on each side. + left : bool, optional + If True, expand on the left side (default is True). + right : bool, optional + If True, expand on the right side (default is True). + top : bool, optional + If True, expand on the top side (default is True). + bottom : bool, optional + If True, expand on the bottom side (default is True). + + Returns + ------- + np.ndarray or None + The expanded coordinate array with `n_cells` added on each selected side. + """ + if coordinate is None: + return None + + if n_cells <= 0 or not (left or right or top or bottom): + return coordinate # No expansion needed + + ny, nx = coordinate.shape + + # Expand left and right + if left: + left_extension = tile(coordinate[:, [0]], (1, n_cells)) + left_extension -= arange(n_cells, 0, -1).reshape(1, -1) * (coordinate[:, [1]] - coordinate[:, [0]]) + else: + left_extension = empty((ny, 0)) + + if right: + right_extension = tile(coordinate[:, [-1]], (1, n_cells)) + right_extension += arange(1, n_cells + 1).reshape(1, -1) * (coordinate[:, [-1]] - coordinate[:, [-2]]) + else: + right_extension = empty((ny, 0)) + + expanded = hstack([left_extension, coordinate, right_extension]) if left or right else coordinate + + # Expand top and bottom + if top: + top_extension = tile(expanded[[0], :], (n_cells, 1)) + top_extension -= arange(n_cells, 0, -1).reshape(-1, 1) * (expanded[[1], :] - expanded[[0], :]) + else: + top_extension = empty((0, expanded.shape[1])) + + if bottom: + bottom_extension = tile(expanded[[-1], :], (n_cells, 1)) + bottom_extension += arange(1, n_cells + 1).reshape(-1, 1) * (expanded[[-1], :] - expanded[[-2], :]) + else: + bottom_extension = empty((0, expanded.shape[1])) + + expanded = vstack([top_extension, expanded, bottom_extension]) if top or bottom else expanded + + return expanded + + @staticmethod + def _expand_2d_bounds(coordinate: ndarray, n_cells: int, + left: bool = True, right: bool = True, + top: bool = True, bottom: bool = True) -> ndarray or None: + """ + Expands a 3D coordinate grid (bounds format) by adding `n_cells` on each specified side. + + Parameters + ---------- + coordinate : np.ndarray + A 3D array where the last dimension contains 4 values representing bounds. + n_cells : int + The number of cells to expand on each side. + left : bool, optional + If True, expand on the left side (default is True). + right : bool, optional + If True, expand on the right side (default is True). + top : bool, optional + If True, expand on the top side (default is True). + bottom : bool, optional + If True, expand on the bottom side (default is True). + + Returns + ------- + np.ndarray or None + The expanded coordinate array with `n_cells` added on each selected side. + """ + if coordinate is None: + return None + + if coordinate.ndim != 3 or coordinate.shape[2] != 4: + raise ValueError("Input coordinate array must be 3D with the last dimension of size 4 (bounds format).") + + if n_cells <= 0 or not (left or right or top or bottom): + return coordinate # No expansion needed + + ny, nx, nbounds = coordinate.shape + + # Expand left and right + if left: + left_extension = tile(coordinate[:, [0], :], (1, n_cells, 1)) + left_extension -= arange(n_cells, 0, -1).reshape(1, -1, 1) * (coordinate[:, [1], :] - coordinate[:, [0], :]) + else: + left_extension = empty((ny, 0, nbounds)) + + if right: + right_extension = tile(coordinate[:, [-1], :], (1, n_cells, 1)) + right_extension += arange(1, n_cells + 1).reshape(1, -1, 1) * ( + coordinate[:, [-1], :] - coordinate[:, [-2], :]) + else: + right_extension = empty((ny, 0, nbounds)) + + expanded = hstack([left_extension, coordinate, right_extension]) if left or right else coordinate + + # Expand top and bottom + if top: + top_extension = tile(expanded[[0], :, :], (n_cells, 1, 1)) + top_extension -= arange(n_cells, 0, -1).reshape(-1, 1, 1) * (expanded[[1], :, :] - expanded[[0], :, :]) + else: + top_extension = empty((0, expanded.shape[1], nbounds)) + + if bottom: + bottom_extension = tile(expanded[[-1], :, :], (n_cells, 1, 1)) + bottom_extension += arange(1, n_cells + 1).reshape(-1, 1, 1) * (expanded[[-1], :, :] - expanded[[-2], :, :]) + else: + bottom_extension = empty((0, expanded.shape[1], nbounds)) + + expanded = vstack([top_extension, expanded, bottom_extension]) if top or bottom else expanded + + return expanded + + @staticmethod + def _contract_2d(coordinate: ndarray, n_cells: int, + left: bool = True, right: bool = True, + top: bool = True, bottom: bool = True) -> ndarray or None: + """ + Contracts a 2D coordinate grid by removing `n_cells` from each specified side. + + Parameters + ---------- + coordinate : np.ndarray + A 2D array representing latitude or longitude grid points. + n_cells : int + The number of cells to remove from each side. + left : bool, optional + If True, remove `n_cells` from the left side (default is True). + right : bool, optional + If True, remove `n_cells` from the right side (default is True). + top : bool, optional + If True, remove `n_cells` from the top side (default is True). + bottom : bool, optional + If True, remove `n_cells` from the bottom side (default is True). + + Returns + ------- + np.ndarray or None + The contracted coordinate array with `n_cells` removed from each selected side. + """ + if coordinate is None: + return None + + if n_cells <= 0 or not (left or right or top or bottom): + return coordinate # No contraction needed + + ny, nx = coordinate.shape + + if left: + coordinate = coordinate[:, n_cells:] + if right: + coordinate = coordinate[:, :-n_cells] + if top: + coordinate = coordinate[n_cells:, :] + if bottom: + coordinate = coordinate[:-n_cells, :] + + return coordinate + + @staticmethod + def _contract_2d_bounds(coordinate: ndarray, n_cells: int, + left: bool = True, right: bool = True, + top: bool = True, bottom: bool = True) -> ndarray or None: + """ + Contracts a 3D coordinate grid (bounds format) by removing `n_cells` from each specified side. + + Parameters + ---------- + coordinate : np.ndarray + A 3D array where the last dimension contains 4 values representing bounds. + n_cells : int + The number of cells to remove from each side. + left : bool, optional + If True, remove `n_cells` from the left side (default is True). + right : bool, optional + If True, remove `n_cells` from the right side (default is True). + top : bool, optional + If True, remove `n_cells` from the top side (default is True). + bottom : bool, optional + If True, remove `n_cells` from the bottom side (default is True). + + Returns + ------- + np.ndarray or None + The contracted coordinate array with `n_cells` removed from each selected side. + """ + if coordinate is None: + return None + + if coordinate.ndim != 3 or coordinate.shape[2] != 4: + raise ValueError("Input coordinate array must be 3D with the last dimension of size 4 (bounds format).") + + if n_cells <= 0 or not (left or right or top or bottom): + return coordinate # No contraction needed + + ny, nx, nbounds = coordinate.shape + + if left: + coordinate = coordinate[:, n_cells:, :] + if right: + coordinate = coordinate[:, :-n_cells, :] + if top: + coordinate = coordinate[n_cells:, :, :] + if bottom: + coordinate = coordinate[:-n_cells, :, :] + + return coordinate + + @staticmethod + def _expand_variables_data(data: ndarray, n_cells: int, left: bool = True, right: bool = True, top: bool = True, + bottom: bool = True) -> ndarray or None: + """ + Expands a 4D data array by adding zero-filled cells along the spatial dimensions (Y and X). + + Parameters + ---------- + data : np.ndarray + The original 4D array of shape (time, level, Y, X). + n_cells : int + The number of cells to expand along each specified spatial dimension. + left : bool, optional + If True, expand along the left (X) dimension (default is True). + right : bool, optional + If True, expand along the right (X) dimension (default is True). + top : bool, optional + If True, expand along the top (Y) dimension (default is True). + bottom : bool, optional + If True, expand along the bottom (Y) dimension (default is True). + + Returns + ------- + np.ndarray or None + The expanded 4D data array with added zero-filled cells in the selected directions. + + Raises + ------ + ValueError + If `data` is not a 4D array. + + Notes + ----- + - The function assumes the input array has the shape (time, level, Y, X). + - Expands only the last two dimensions (Y and X) by adding zeros. + - If `n_cells` is 0 or all expansion flags are False, the array remains unchanged. + + Examples + -------- + Expanding a 4D array: + + data = np.ones((2, 3, 4, 5)) # Example 4D array + expanded_data = _expand_variables(None, data, 1, left=True, right=True, top=True, bottom=True) + expanded_data.shape + (2, 3, 6, 7) # Expanded by 1 cell in all directions + """ + if data is None: + return None + + if data.ndim != 4: + raise ValueError("Input data must be a 4D array (time, level, Y, X).") + + time_dim, level_dim, y_dim, x_dim = data.shape + + if n_cells <= 0 or not (left or right or top or bottom): + return data # No expansion needed + + zero_pad = lambda shape: zeros(shape, dtype=data.dtype) + + # Expand Y dimension (top and bottom) + if top: + top_padding = zero_pad((time_dim, level_dim, n_cells, x_dim)) + data = concatenate([top_padding, data], axis=2) + if bottom: + bottom_padding = zero_pad((time_dim, level_dim, n_cells, x_dim)) + data = concatenate([data, bottom_padding], axis=2) + + # Expand X dimension (left and right) + if left: + left_padding = zero_pad((time_dim, level_dim, data.shape[2], n_cells)) + data = concatenate([left_padding, data], axis=3) + if right: + right_padding = zero_pad((time_dim, level_dim, data.shape[2], n_cells)) + data = concatenate([data, right_padding], axis=3) + + return data + + @staticmethod + def _contract_variables_data(data: ndarray, n_cells: int, + left: bool = True, right: bool = True, + top: bool = True, bottom: bool = True) -> ndarray or None: + """ + Contracts a 4D data array by removing `n_cells` along the spatial dimensions (Y and X). + + Parameters + ---------- + data : np.ndarray + The original 4D array of shape (time, level, Y, X). + n_cells : int + The number of cells to remove along each specified spatial dimension. + left : bool, optional + If True, removes `n_cells` from the left (X) dimension. Default is True. + right : bool, optional + If True, removes `n_cells` from the right (X) dimension. Default is True. + top : bool, optional + If True, removes `n_cells` from the top (Y) dimension. Default is True. + bottom : bool, optional + If True, removes `n_cells` from the bottom (Y) dimension. Default is True. + + Returns + ------- + np.ndarray or None + The contracted 4D data array with removed cells in the selected directions. + + Raises + ------ + ValueError + If `data` is not a 4D array. + IndexError + If `n_cells` is too large and would remove all data. + + Notes + ----- + - The function assumes the input array has the shape (time, level, Y, X). + - Contracts only the last two dimensions (Y and X), keeping `time` and `level` unchanged. + - If `n_cells` is 0 or all contraction flags are False, the function returns the array unchanged. + """ + + if data is None: + return None + + if data.ndim != 4: + raise ValueError("Input data must be a 4D array (time, level, Y, X).") + + time_dim, level_dim, y_dim, x_dim = data.shape + + if n_cells <= 0 or not (left or right or top or bottom): + return data # No contraction needed + + # Validate that n_cells does not exceed the available dimensions + if n_cells >= y_dim and (top or bottom): + raise IndexError("n_cells is too large, would remove all rows in Y dimension.") + if n_cells >= x_dim and (left or right): + raise IndexError("n_cells is too large, would remove all columns in X dimension.") + + # Contract Y dimension (top and bottom) + if top: + data = data[:, :, n_cells:, :] + if bottom: + data = data[:, :, :-n_cells, :] + + # Contract X dimension (left and right) + if left: + data = data[:, :, :, n_cells:] + if right: + data = data[:, :, :, :-n_cells] + + return data + + def _post_expand_processing(self) -> None: + """ + Performs additional processing after expansion, including shapefile update and grid area calculation. + + Returns + ------- + None + """ + if self.shapefile is not None: + self.create_shapefile(overwrite=True) + if 'cell_area' in self.cell_measures: + self.calculate_grid_area(overwrite=True) + + def _update_axis_limits(self, expand: bool = True, n_cells: int = 1, is_first=True, is_last=True) -> None: + """ + Update the axis limits by expanding or contracting the read and write boundaries. + + Parameters + ---------- + expand : bool, optional + If True, expands the limits; if False, contracts them. Default is True. + n_cells : int, optional + Number of cells to expand or contract, default is 1. + is_first : bool + Whether the current rank is the first in parallel processing. + is_last : bool + Whether the current rank is the last in parallel processing. + + Returns + ------- + None + """ + # self.read_axis_limits = self._adjust_axis_limits(axis_limits=self.read_axis_limits, n_cells=n_cells, expand=expand, is_first=is_first, is_last=is_last) + # self.write_axis_limits = self._adjust_axis_limits(axis_limits=self.write_axis_limits, n_cells=n_cells, expand=expand, is_first=is_first, is_last=is_last) + + self._adjust_read_axis_limits(n_cells=n_cells, expand=expand, is_first=is_first, is_last=is_last) + self._adjust_write_axis_limits(n_cells=n_cells, expand=expand, is_first=is_first, is_last=is_last) + + def _adjust_axis_limits(self, axis_limits, n_cells: int, expand: bool, is_first=True, is_last=True) -> dict: + """ + Adjust read axis limits by expanding or contracting them based on parallelization method. + + Parameters + ---------- + n_cells : int + Number of cells to adjust. + expand : bool + If True, expands the limits; otherwise, contracts them. + is_first : bool + Whether the current rank is the first in parallel processing. + is_last : bool + Whether the current rank is the last in parallel processing. + + Returns + ------- + dict + Updated axis limits + """ + sign = 1 if expand else -1 + + if self.parallel_method == "Y": + if is_first: + axis_limits['y_min'] -= sign * n_cells + axis_limits['x_min'] -= sign * n_cells + if is_last: + if axis_limits['y_max'] is not None: + axis_limits['y_max'] += sign * n_cells + if axis_limits['x_max'] is not None: + axis_limits['x_max'] += sign * n_cells + elif self.parallel_method == "X": + if is_first: + axis_limits['x_min'] -= sign * n_cells + axis_limits['y_min'] -= sign * n_cells + if is_last: + if axis_limits['x_max'] is not None: + axis_limits['x_max'] += sign * n_cells + if axis_limits['y_max'] is not None: + axis_limits['y_max'] += sign * n_cells + else: + axis_limits['y_min'] -= sign * n_cells + axis_limits['x_min'] -= sign * n_cells + if axis_limits['y_max'] is not None: + axis_limits['y_max'] += sign * n_cells + if axis_limits['x_max'] is not None: + axis_limits['x_max'] += sign * n_cells + + if axis_limits['x_min'] < 0: + if axis_limits['x_max'] is not None: + axis_limits['x_max'] += abs(axis_limits['x_min']) + axis_limits['x_min'] = 0 + if axis_limits['y_min'] < 0: + if axis_limits['y_max'] is not None: + axis_limits['y_max'] += abs(axis_limits['y_min']) + axis_limits['y_min'] = 0 + return axis_limits + + def _adjust_read_axis_limits(self, n_cells: int, expand: bool, is_first=True, is_last=True) -> None: + """ + Adjust read axis limits by expanding or contracting them based on parallelization method. + + Parameters + ---------- + n_cells : int + Number of cells to adjust. + expand : bool + If True, expands the limits; otherwise, contracts them. + is_first : bool + Whether the current rank is the first in parallel processing. + is_last : bool + Whether the current rank is the last in parallel processing. + + Returns + ------- + None + """ + sign = 1 if expand else -1 + + if self.parallel_method == "Y": + if is_first: + self.read_axis_limits['y_min'] -= sign * n_cells + self.read_axis_limits['x_min'] -= sign * n_cells + if is_last: + self.read_axis_limits['y_max'] += sign * n_cells + self.read_axis_limits['x_max'] += sign * n_cells + elif self.parallel_method == "X": + if is_first: + self.read_axis_limits['x_min'] -= sign * n_cells + self.read_axis_limits['y_min'] -= sign * n_cells + if is_last: + self.read_axis_limits['x_max'] += sign * n_cells + self.read_axis_limits['y_max'] += sign * n_cells + else: + self.read_axis_limits['y_min'] -= sign * n_cells + self.read_axis_limits['x_min'] -= sign * n_cells + self.read_axis_limits['y_max'] += sign * n_cells + self.read_axis_limits['x_max'] += sign * n_cells + return None + + def _adjust_write_axis_limits(self, n_cells: int, expand: bool, is_first=True, is_last=True) -> None: + """ + Adjust write axis limits by expanding or contracting them. + + Parameters + ---------- + n_cells : int + Number of cells to adjust. + expand : bool + If True, expands the limits; otherwise, contracts them. + + is_first : bool + Whether the current rank is the first in parallel processing. + is_last : bool + Whether the current rank is the last in parallel processing. + + Returns + ------- + None + """ + sign = 1 if expand else -1 + if self.parallel_method == "X": + max_axis = ['x_max'] + min_axis = ['x_min'] + elif self.parallel_method == "Y": + max_axis = ['y_max'] + min_axis = ['y_min'] + else: + max_axis = ['y_max', 'x_max'] + min_axis = ['y_min', 'x_min'] + + for axis in max_axis: + if self.write_axis_limits[axis] is not None: + if is_last: + self.write_axis_limits[axis] += sign * n_cells*2 + else: + self.write_axis_limits[axis] += sign * n_cells + for axis in min_axis: + if self.write_axis_limits[axis] is not None: + if is_first: + pass + else: + self.write_axis_limits[axis] += sign * n_cells + return None + def get_global_limits(self, limits: dict) -> dict: """ Compute the global axis limits across all MPI ranks. @@ -1481,6 +2427,80 @@ class Nes(object): return global_limits + def get_global_limits_b(self, limits: dict) -> dict: + """ + Compute the global axis limits across all MPI ranks. + + This function uses MPI reduce operations to determine the minimum and maximum + values for each axis across all ranks. `None` values are temporarily replaced + with `inf` or `-inf` to avoid issues during reduction and are restored afterward. + + Parameters + ---------- + limits : dict + A dictionary containing local axis limits for the current MPI rank. + Expected keys: + - `x_min`, `x_max` + - `y_min`, `y_max` + - `z_min`, `z_max` + - `t_min`, `t_max` + Values can be numerical or `None` (which will be processed accordingly). + + Returns + ------- + dict + A dictionary with the global minimum and maximum values for each axis, + computed across all MPI ranks. `None` is returned for dimensions that + were originally `None`. + + Notes + ----- + - The function assumes `self.comm` is a valid `MPI.COMM_WORLD` communicator. + - The reduction is performed only for numerical values; `None` values are handled explicitly. + - The final dictionary is only meaningful in rank 0. Other ranks return an undefined result. + + Examples + -------- + Suppose we have four MPI ranks with the following `limits`: + + Rank 0: + {'x_min': -91, 'x_max': 50, 'y_min': 450, 'y_max': 475, 'z_min': None, 'z_max': None, 't_min': 0, 't_max': 1} + + Rank 1: + {'x_min': -91, 'x_max': 50, 'y_min': 475, 'y_max': 500, 'z_min': None, 'z_max': None, 't_min': 0, 't_max': 1} + + Rank 2: + {'x_min': -91, 'x_max': 50, 'y_min': 500, 'y_max': 525, 'z_min': None, 'z_max': None, 't_min': 0, 't_max': 1} + + Rank 3: + {'x_min': -91, 'x_max': 50, 'y_min': 525, 'y_max': 549, 'z_min': None, 'z_max': None, 't_min': 0, 't_max': 1} + + Calling `get_global_limits` on rank 0 will return: + + {'x_min': -91, 'x_max': 50, 'y_min': 450, 'y_max': 549, 'z_min': None, 'z_max': None, 't_min': 0, 't_max': 1} + """ + my_limits = deepcopy(limits) + for key in my_limits: + if my_limits[key] is None: + my_limits[key] = float('inf') if 'min' in key else float('-inf') + + # Perform MPI reduction to find global min and max values + global_limits = {} + for key in my_limits: + if 'min' in key: + global_limits[key] = self.comm.reduce(my_limits[key], op=MPI.MIN, root=0) + elif 'max' in key: + global_limits[key] = self.comm.reduce(my_limits[key], op=MPI.MAX, root=0) + + # Restore None for dimensions that were originally None + if self.rank == 0: + for key in global_limits: + if global_limits[key] in [float('inf'), float('-inf')]: + global_limits[key] = None + + return global_limits + + def _filter_coordinates_selection(self): """ Use the selection limits to filter time, lev, lat, lon, lon_bnds and lat_bnds. @@ -2820,7 +3840,7 @@ class Nes(object): else: raise NotImplementedError("Error with {0}. Only can be read netCDF with 4 dimensions or less".format( var_name)) - + # Unmask array data = self._unmask_array(data) @@ -2874,6 +3894,8 @@ class Nes(object): if close: self.close() + self.loaded = True + return None @staticmethod @@ -3354,8 +4376,8 @@ class Nes(object): zlib=self.zip_lvl > 0, complevel=self.zip_lvl) if self.size > 1: cell_area.set_collective(True) - cell_area[self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], - self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] = \ + cell_area[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = \ self.cell_measures["cell_area"]["data"] cell_area.long_name = "area of grid cell" @@ -3645,7 +4667,6 @@ class Nes(object): keep_open : bool Indicates if you want to keep open the NetCDH to fill the data by time-step """ - # Open NetCDF if self.info: print("Rank {0:03d}: Creating {1}".format(self.rank, path)) @@ -3711,6 +4732,7 @@ class Nes(object): keep_open : bool Indicates if you want to keep open the NetCDH to fill the data by time-step """ + print("Saving to NetCDF") nc_type = nc_type old_info = self.info self.info = info @@ -3721,18 +4743,21 @@ class Nes(object): if serial and self.size > 1: try: data = self._gather_data(self.variables) + print("Data gathered") except KeyError: data = self.__gather_data_py_object(self.variables) + print("Data gathered object") try: c_measures = self._gather_data(self.cell_measures) except KeyError: c_measures = self.__gather_data_py_object(self.cell_measures) if self.master: - new_nc = self.copy(copy_vars=False) - new_nc.set_communicator(MPI.COMM_SELF) + new_nc = self.copy(copy_vars=False, new_comm=MPI.COMM_SELF) new_nc.variables = data new_nc.cell_measures = c_measures if nc_type in ["NES", "DEFAULT"]: + print("Saving to netcdf") + print(f"{new_nc}") new_nc.__to_netcdf_py(path, keep_open=keep_open) elif nc_type == "CAMS_RA": new_nc.__to_netcdf_cams_ra(path) @@ -3899,8 +4924,7 @@ class Nes(object): except KeyError: c_measures = self.__gather_data_py_object(self.cell_measures) if self.master: - new_nc = self.copy(copy_vars=False) - new_nc.set_communicator(MPI.COMM_SELF) + new_nc = self.copy(copy_vars=False, new_comm=MPI.COMM_SELF) new_nc.variables = data new_nc.cell_measures = c_measures new_nc.__to_grib2(path, grib_keys, grib_template_path, lat_flip=lat_flip, info=info) @@ -3909,7 +4933,7 @@ class Nes(object): return None - def create_shapefile(self): + def create_shapefile(self, overwrite=False): """ Create spatial GeoDataFrame (shapefile). @@ -3919,7 +4943,7 @@ class Nes(object): Shapefile dataframe. """ - if self.shapefile is None: + if self.shapefile is None or overwrite: if self.lat_bnds is None or self.lon_bnds is None: self.create_spatial_bounds() @@ -4269,9 +5293,9 @@ class Nes(object): Parameters ---------- - lon : array + lon : Array Longitude values. - lat : array + lat : Array Latitude values. """ @@ -4344,7 +5368,7 @@ class Nes(object): overwrite=overwrite) def interpolate_horizontal(self, dst_grid, weight_matrix_path=None, kind="NearestNeighbour", n_neighbours=4, - info=False, to_providentia=False, only_create_wm=False, wm=None, flux=False, keep_nan=False): + info=False, to_providentia=False, only_create_wm=False, wm=None, flux=False, keep_nan=False, fix_border=False): """ Horizontal methods from the current grid to another one. @@ -4370,11 +5394,15 @@ class Nes(object): Indicates if you want to calculate the weight matrix for flux variables. keep_nan : bool Indicates if you want to keep nan values after the interpolation + fix_border : bool + Indicates if you want to fix the borders on the NN interpolation by expanding it 1 cell and contracting + it later. """ return horizontal_interpolation.interpolate_horizontal( self, dst_grid, weight_matrix_path=weight_matrix_path, kind=kind, n_neighbours=n_neighbours, info=info, - to_providentia=to_providentia, only_create_wm=only_create_wm, wm=wm, flux=flux, keep_nan=keep_nan) + to_providentia=to_providentia, only_create_wm=only_create_wm, wm=wm, flux=flux, keep_nan=keep_nan, + fix_border=fix_border) def spatial_join(self, ext_shp, method=None, var_list=None, info=False, apply_bbox=True): """ diff --git a/nes/nc_projections/latlon_nes.py b/nes/nc_projections/latlon_nes.py index d7902f8e9cd29ba8e3d3046b816f472eb49019c8..c9b9785aa2d3201f88b7d8e0cdcc910d3d44f3c0 100644 --- a/nes/nc_projections/latlon_nes.py +++ b/nes/nc_projections/latlon_nes.py @@ -120,6 +120,100 @@ class LatLonNes(Nes): return new + def _expand_coordinates(self, n_cells: int, is_first: bool, is_last: bool) -> None: + """ + Expands the latitude and longitude coordinates. + + Parameters + ---------- + n_cells : int + Number of cells to expand. + is_first : bool + Whether the current rank is the first in parallel processing. + is_last : bool + Whether the current rank is the last in parallel processing. + + Returns + ------- + None + """ + for coord in ['lat', 'lat_bnds', 'lon', 'lon_bnds']: + expand_left = is_first if "lat" in coord else True + expand_right = is_last if "lat" in coord else True + if self.__dict__[coord] is not None: + self.__dict__[coord]['data'] = self._expand_1d( + self.__dict__[coord]['data'], n_cells=n_cells, left=expand_left, right=expand_right + ) + return None + + def _contract_coordinates(self, n_cells: int, is_first: bool, is_last: bool) -> None: + """ + Contracts latitude and longitude coordinates by removing `n_cells` from the borders. + + Parameters + ---------- + n_cells : int + The number of cells to remove from each border. + is_first : bool + Indicates whether the current rank is the first in parallel processing. + is_last : bool + Indicates whether the current rank is the last in parallel processing. + + Returns + ------- + None + """ + for coord in ['lat', 'lat_bnds', 'lon', 'lon_bnds']: + contract_left = is_first if "lat" in coord else True + contract_right = is_last if "lat" in coord else True + if self.__dict__[coord] is not None: + self.__dict__[coord]['data'] = self._contract_1d( + self.__dict__[coord]['data'], n_cells=n_cells, left=contract_left, right=contract_right + ) + return None + + def _expand_full_coordinates(self, n_cells: int) -> None: + """ + Expands the full coordinates if `self.master` is True. + + Parameters + ---------- + n_cells : int + Number of cells to expand. + + Returns + ------- + None + """ + if self.master: + for coord in ['_full_lat', '_full_lat_bnds', '_full_lon', '_full_lon_bnds']: + if self.__dict__[coord] is not None: + self.__dict__[coord]['data'] = self._expand_1d( + self.__dict__[coord]['data'], n_cells=n_cells, left=True, right=True + ) + return None + + def _contract_full_coordinates(self, n_cells: int) -> None: + """ + Contracts full latitude and longitude coordinates if `self.master` is True. + + Parameters + ---------- + n_cells : int + The number of cells to remove from each side. + + Returns + ------- + None + """ + if self.master: + for coord in ['_full_lat', '_full_lat_bnds', '_full_lon', '_full_lon_bnds']: + if self.__dict__[coord] is not None: + self.__dict__[coord]['data'] = self._contract_1d( + self.__dict__[coord]['data'], n_cells=n_cells, left=True, right=True + ) + return None + @staticmethod def _get_pyproj_projection(): """ diff --git a/nes/nc_projections/lcc_nes.py b/nes/nc_projections/lcc_nes.py index 2aea9121731efffe7d1f8273130f8e6b3f852497..501f62637afbfdcdbd8a8c98177dc8f7ffdd8cc3 100644 --- a/nes/nc_projections/lcc_nes.py +++ b/nes/nc_projections/lcc_nes.py @@ -158,7 +158,7 @@ class LCCNes(Nes): ... } """ - data = self.comm.bcast(self._full_y) + data = self.comm.bcast(self._full_y, root=0) return data @@ -177,7 +177,7 @@ class LCCNes(Nes): ... } """ - data = self.comm.bcast(self._full_x) + data = self.comm.bcast(self._full_x, root=0) return data def set_full_y(self, data: Dict[str, Any]) -> None: @@ -218,6 +218,140 @@ class LCCNes(Nes): self._full_x = data return None + def _expand_coordinates(self, n_cells: int, is_first: bool, is_last: bool) -> None: + """ + Expands the latitude and longitude coordinates. + + Parameters + ---------- + n_cells : int + Number of cells to expand. + is_first : bool + Whether the current rank is the first in parallel processing. + is_last : bool + Whether the current rank is the last in parallel processing. + + Returns + ------- + None + """ + for coord in ['x', 'y']: + expand_left = is_first if "y" in coord else True + expand_right = is_last if "y" in coord else True + self.__dict__[coord]['data'] = self._expand_1d( + self.__dict__[coord]['data'], n_cells=n_cells, left=expand_left, right=expand_right + ) + for coord in ['lat', 'lon']: + expand_left = is_first if "lat" in coord else True + expand_right = is_last if "lat" in coord else True + self.__dict__[coord]['data'] = self._expand_2d( + self.__dict__[coord]['data'], n_cells=n_cells, left=expand_left, right=expand_right + ) + for coord in ['lat_bnds', 'lon_bnds']: + expand_left = is_first if "lat" in coord else True + expand_right = is_last if "lat" in coord else True + if self.__dict__[coord] is not None: + self.__dict__[coord]['data'] = self._expand_2d_bounds( + self.__dict__[coord]['data'], n_cells=n_cells, left=expand_left, right=expand_right + ) + return None + + def _contract_coordinates(self, n_cells: int, is_first: bool, is_last: bool) -> None: + """ + Contracts latitude and longitude coordinates by removing `n_cells` from the borders. + + Parameters + ---------- + n_cells : int + The number of cells to remove from each border. + is_first : bool + Indicates whether the current rank is the first in parallel processing. + is_last : bool + Indicates whether the current rank is the last in parallel processing. + + Returns + ------- + None + """ + for coord in ['x', 'y']: + contract_left = is_first if "y" in coord else True + contract_right = is_last if "y" in coord else True + self.__dict__[coord]['data'] = self._contract_1d( + self.__dict__[coord]['data'], n_cells=n_cells, left=contract_left, right=contract_right + ) + for coord in ['lat', 'lon']: + contract_left = is_first if "lat" in coord else True + contract_right = is_last if "lat" in coord else True + self.__dict__[coord]['data'] = self._contract_2d( + self.__dict__[coord]['data'], n_cells=n_cells, left=contract_left, right=contract_right + ) + for coord in ['lat_bnds', 'lon_bnds']: + contract_left = is_first if "lat" in coord else True + contract_right = is_last if "lat" in coord else True + if self.__dict__[coord] is not None: + self.__dict__[coord]['data'] = self._contract_2d_bounds( + self.__dict__[coord]['data'], n_cells=n_cells, left=contract_left, right=contract_right + ) + return None + + def _expand_full_coordinates(self, n_cells: int) -> None: + """ + Expands the full coordinates if `self.master` is True. + + Parameters + ---------- + n_cells : int + Number of cells to expand. + + Returns + ------- + None + """ + if self.master: + for coord in ['_full_x', '_full_y']: + self.__dict__[coord]['data'] = self._expand_1d( + self.__dict__[coord]['data'], n_cells=n_cells, left=True, right=True + ) + for coord in ['_full_lat', '_full_lon', ]: + self.__dict__[coord]['data'] = self._expand_2d( + self.__dict__[coord]['data'], n_cells=n_cells, left=True, right=True + ) + for coord in ['_full_lat_bnds', '_full_lon_bnds']: + if self.__dict__[coord] is not None: + self.__dict__[coord]['data'] = self._expand_2d_bounds( + self.__dict__[coord]['data'], n_cells=n_cells, left=True, right=True + ) + return None + + def _contract_full_coordinates(self, n_cells: int) -> None: + """ + Contracts full latitude and longitude coordinates if `self.master` is True. + + Parameters + ---------- + n_cells : int + The number of cells to remove from each side. + + Returns + ------- + None + """ + if self.master: + for coord in ['_full_x', '_full_y']: + self.__dict__[coord]['data'] = self._contract_1d( + self.__dict__[coord]['data'], n_cells=n_cells, left=True, right=True + ) + for coord in ['_full_lat', '_full_lon']: + self.__dict__[coord]['data'] = self._contract_2d( + self.__dict__[coord]['data'], n_cells=n_cells, left=True, right=True + ) + for coord in ['_full_lat_bnds', '_full_lon_bnds']: + if self.__dict__[coord] is not None: + self.__dict__[coord]['data'] = self._contract_2d_bounds( + self.__dict__[coord]['data'], n_cells=n_cells, left=True, right=True + ) + return None + # noinspection DuplicatedCode def _filter_coordinates_selection(self): """ @@ -520,7 +654,7 @@ class LCCNes(Nes): """ var.grid_mapping = "Lambert_Conformal" - # var.coordinates = "lat lon" + var.coordinates = "lat lon" return None @@ -564,7 +698,7 @@ class LCCNes(Nes): raise NotImplementedError("Grib2 format cannot be written in a Lambert Conformal Conic projection.") # noinspection DuplicatedCode - def create_shapefile(self): + def create_shapefile(self, overwrite=False): """ Create spatial GeoDataFrame (shapefile). @@ -573,8 +707,8 @@ class LCCNes(Nes): shapefile : GeoPandasDataFrame Shapefile dataframe. """ - - if self.shapefile is None: + + if self.shapefile is None or overwrite: # Get latitude and longitude cell boundaries if self.lat_bnds is None or self.lon_bnds is None: diff --git a/nes/nc_projections/mercator_nes.py b/nes/nc_projections/mercator_nes.py index ef63cb741b4f56e6dccdaa3ae6f67c763baed4f9..7075f4b770dc62b912662175fa3f6481d3631f28 100644 --- a/nes/nc_projections/mercator_nes.py +++ b/nes/nc_projections/mercator_nes.py @@ -159,7 +159,7 @@ class MercatorNes(Nes): ... } """ - data = self.comm.bcast(self._full_y) + data = self.comm.bcast(self._full_y, root=0) return data @@ -178,7 +178,7 @@ class MercatorNes(Nes): ... } """ - data = self.comm.bcast(self._full_x) + data = self.comm.bcast(self._full_x, root=0) return data def set_full_y(self, data: Dict[str, Any]) -> None: @@ -219,6 +219,140 @@ class MercatorNes(Nes): self._full_x = data return None + def _expand_coordinates(self, n_cells: int, is_first: bool, is_last: bool) -> None: + """ + Expands the latitude and longitude coordinates. + + Parameters + ---------- + n_cells : int + Number of cells to expand. + is_first : bool + Whether the current rank is the first in parallel processing. + is_last : bool + Whether the current rank is the last in parallel processing. + + Returns + ------- + None + """ + for coord in ['x', 'y']: + expand_left = is_first if "y" in coord else True + expand_right = is_last if "y" in coord else True + self.__dict__[coord]['data'] = self._expand_1d( + self.__dict__[coord]['data'], n_cells=n_cells, left=expand_left, right=expand_right + ) + for coord in ['lat', 'lon']: + expand_left = is_first if "lat" in coord else True + expand_right = is_last if "lat" in coord else True + self.__dict__[coord]['data'] = self._expand_2d( + self.__dict__[coord]['data'], n_cells=n_cells, left=expand_left, right=expand_right + ) + for coord in ['lat_bnds', 'lon_bnds']: + expand_left = is_first if "lat" in coord else True + expand_right = is_last if "lat" in coord else True + if self.__dict__[coord] is not None: + self.__dict__[coord]['data'] = self._expand_2d_bounds( + self.__dict__[coord]['data'], n_cells=n_cells, left=expand_left, right=expand_right + ) + return None + + def _contract_coordinates(self, n_cells: int, is_first: bool, is_last: bool) -> None: + """ + Contracts latitude and longitude coordinates by removing `n_cells` from the borders. + + Parameters + ---------- + n_cells : int + The number of cells to remove from each border. + is_first : bool + Indicates whether the current rank is the first in parallel processing. + is_last : bool + Indicates whether the current rank is the last in parallel processing. + + Returns + ------- + None + """ + for coord in ['x', 'y']: + contract_left = is_first if "y" in coord else True + contract_right = is_last if "y" in coord else True + self.__dict__[coord]['data'] = self._contract_1d( + self.__dict__[coord]['data'], n_cells=n_cells, left=contract_left, right=contract_right + ) + for coord in ['lat', 'lon']: + contract_left = is_first if "lat" in coord else True + contract_right = is_last if "lat" in coord else True + self.__dict__[coord]['data'] = self._contract_2d( + self.__dict__[coord]['data'], n_cells=n_cells, left=contract_left, right=contract_right + ) + for coord in ['lat_bnds', 'lon_bnds']: + contract_left = is_first if "lat" in coord else True + contract_right = is_last if "lat" in coord else True + if self.__dict__[coord] is not None: + self.__dict__[coord]['data'] = self._contract_2d_bounds( + self.__dict__[coord]['data'], n_cells=n_cells, left=contract_left, right=contract_right + ) + return None + + def _expand_full_coordinates(self, n_cells: int) -> None: + """ + Expands the full coordinates if `self.master` is True. + + Parameters + ---------- + n_cells : int + Number of cells to expand. + + Returns + ------- + None + """ + if self.master: + for coord in ['_full_x', '_full_y']: + self.__dict__[coord]['data'] = self._expand_1d( + self.__dict__[coord]['data'], n_cells=n_cells, left=True, right=True + ) + for coord in ['_full_lat', '_full_lon', ]: + self.__dict__[coord]['data'] = self._expand_2d( + self.__dict__[coord]['data'], n_cells=n_cells, left=True, right=True + ) + for coord in ['_full_lat_bnds', '_full_lon_bnds']: + if self.__dict__[coord] is not None: + self.__dict__[coord]['data'] = self._expand_2d_bounds( + self.__dict__[coord]['data'], n_cells=n_cells, left=True, right=True + ) + return None + + def _contract_full_coordinates(self, n_cells: int) -> None: + """ + Contracts full latitude and longitude coordinates if `self.master` is True. + + Parameters + ---------- + n_cells : int + The number of cells to remove from each side. + + Returns + ------- + None + """ + if self.master: + for coord in ['_full_x', '_full_y']: + self.__dict__[coord]['data'] = self._contract_1d( + self.__dict__[coord]['data'], n_cells=n_cells, left=True, right=True + ) + for coord in ['_full_lat', '_full_lon']: + self.__dict__[coord]['data'] = self._contract_2d( + self.__dict__[coord]['data'], n_cells=n_cells, left=True, right=True + ) + for coord in ['_full_lat_bnds', '_full_lon_bnds']: + if self.__dict__[coord] is not None: + self.__dict__[coord]['data'] = self._contract_2d_bounds( + self.__dict__[coord]['data'], n_cells=n_cells, left=True, right=True + ) + return None + # noinspection DuplicatedCode def _filter_coordinates_selection(self): """ @@ -501,7 +635,7 @@ class MercatorNes(Nes): """ var.grid_mapping = "mercator" - # var.coordinates = "lat lon" + var.coordinates = "lat lon" return None @@ -544,7 +678,7 @@ class MercatorNes(Nes): raise NotImplementedError("Grib2 format cannot be written in a Mercator projection.") # noinspection DuplicatedCode - def create_shapefile(self): + def create_shapefile(self, overwrite=False): """ Create spatial GeoDataFrame (shapefile). @@ -554,8 +688,8 @@ class MercatorNes(Nes): Shapefile dataframe. """ - if self.shapefile is None: - + if self.shapefile is None or overwrite: + # Get latitude and longitude cell boundaries if self.lat_bnds is None or self.lon_bnds is None: self.create_spatial_bounds() diff --git a/nes/nc_projections/points_nes.py b/nes/nc_projections/points_nes.py index c63e9d5e99ba3e405d4760a26737b48d7b5bb4ea..f3b0768f798e838befe636d95df64a86900c4ef3 100644 --- a/nes/nc_projections/points_nes.py +++ b/nes/nc_projections/points_nes.py @@ -136,6 +136,12 @@ class PointsNes(Nes): return new + def expand(self, n_cells=1): + raise RuntimeError("It is not possible to expand a Point Dataset") + + def contract(self, n_cells=1): + raise RuntimeError("It is not possible to contract a Point Dataset") + @staticmethod def _get_pyproj_projection(): """ @@ -661,7 +667,7 @@ class PointsNes(Nes): raise NotImplementedError("Grib2 format cannot be written with point data.") - def create_shapefile(self): + def create_shapefile(self, overwrite=False): """ Create spatial GeoDataFrame (shapefile). @@ -671,7 +677,7 @@ class PointsNes(Nes): Shapefile dataframe. """ - if self.shapefile is None: + if self.shapefile is None or overwrite: # Create dataframe containing all points gdf = self.get_centroids_from_coordinates() diff --git a/nes/nc_projections/rotated_nes.py b/nes/nc_projections/rotated_nes.py index e1647eab7ab31df2b39b8e388692fee07e6c6546..095d516a0f00e01629527bba4a30195a5cf083e3 100644 --- a/nes/nc_projections/rotated_nes.py +++ b/nes/nc_projections/rotated_nes.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +import sys from numpy import (float64, linspace, cos, sin, arcsin, arctan2, array, mean, diff, append, flip, repeat, concatenate, vstack) @@ -161,7 +162,7 @@ class RotatedNes(Nes): ... } """ - data = self.comm.bcast(self._full_rlat) + data = self.comm.bcast(self._full_rlat, root=0) return data @@ -180,7 +181,7 @@ class RotatedNes(Nes): ... } """ - data = self.comm.bcast(self._full_rlon) + data = self.comm.bcast(self._full_rlon, root=0) return data def set_full_rlat(self, data: Dict[str, Any]) -> None: @@ -221,6 +222,166 @@ class RotatedNes(Nes): self._full_rlon = data return None + def _expand_coordinates(self, n_cells: int, is_first: bool, is_last: bool) -> None: + """ + Expands the latitude and longitude coordinates. + + Parameters + ---------- + n_cells : int + Number of cells to expand. + is_first : bool + Whether the current rank is the first in parallel processing. + is_last : bool + Whether the current rank is the last in parallel processing. + + Returns + ------- + None + """ + for coord in ['rlat', 'rlon']: + expand_left = is_first if "lat" in coord else True + expand_right = is_last if "lat" in coord else True + self.__dict__[coord]['data'] = self._expand_1d( + self.__dict__[coord]['data'], n_cells=n_cells, left=expand_left, right=expand_right + ) + + if self.parallel_method == "X": + expand_top = True + expand_bottom = True + expand_left = is_first + expand_right = is_last + elif self.parallel_method == "Y": + expand_top = is_last + expand_bottom = is_first + expand_left = True + expand_right = True + else: + expand_top = True + expand_bottom = True + expand_left = True + expand_right = True + + for coord in ['lat', 'lon']: + self.__dict__[coord]['data'] = self._expand_2d( + self.__dict__[coord]['data'], n_cells=n_cells, left=expand_left, right=expand_right, top=expand_top, bottom=expand_bottom, + ) + for coord in ['lat_bnds', 'lon_bnds']: + if self.__dict__[coord] is not None: + self.__dict__[coord]['data'] = self._expand_2d_bounds( + self.__dict__[coord]['data'], n_cells=n_cells, left=expand_left, right=expand_right, top=expand_top, bottom=expand_bottom, + ) + return None + + def _contract_coordinates(self, n_cells: int, is_first: bool, is_last: bool) -> None: + """ + Contracts latitude and longitude coordinates by removing `n_cells` from the borders. + + Parameters + ---------- + n_cells : int + The number of cells to remove from each border. + is_first : bool + Indicates whether the current rank is the first in parallel processing. + is_last : bool + Indicates whether the current rank is the last in parallel processing. + + Returns + ------- + None + """ + for coord in ['rlat', 'rlon']: + contract_left = is_first if "lat" in coord else True + contract_right = is_last if "lat" in coord else True + self.__dict__[coord]['data'] = self._contract_1d( + self.__dict__[coord]['data'], n_cells=n_cells, left=contract_left, right=contract_right + ) + if self.parallel_method == "X": + expand_top = True + expand_bottom = True + expand_left = is_first + expand_right = is_last + elif self.parallel_method == "Y": + expand_top = is_last + expand_bottom = is_first + expand_left = True + expand_right = True + else: + expand_top = True + expand_bottom = True + expand_left = True + expand_right = True + + for coord in ['lat', 'lon']: + self.__dict__[coord]['data'] = self._contract_2d( + self.__dict__[coord]['data'], n_cells=n_cells, left=expand_left, right=expand_right, top=expand_top, bottom=expand_bottom, + ) + for coord in ['lat_bnds', 'lon_bnds']: + if self.__dict__[coord] is not None: + self.__dict__[coord]['data'] = self._contract_2d_bounds( + self.__dict__[coord]['data'], n_cells=n_cells, left=expand_left, right=expand_right, top=expand_top, bottom=expand_bottom, + ) + return None + + def _expand_full_coordinates(self, n_cells: int) -> None: + """ + Expands the full coordinates if `self.master` is True. + + Parameters + ---------- + n_cells : int + Number of cells to expand. + + Returns + ------- + None + """ + if self.master: + for coord in ['_full_rlat', '_full_rlon']: + self.__dict__[coord]['data'] = self._expand_1d( + self.__dict__[coord]['data'], n_cells=n_cells, left=True, right=True + ) + for coord in ['_full_lat', '_full_lon', ]: + self.__dict__[coord]['data'] = self._expand_2d( + self.__dict__[coord]['data'], n_cells=n_cells, left=True, right=True, top=True, bottom=True + ) + for coord in ['_full_lat_bnds', '_full_lon_bnds']: + if self.__dict__[coord] is not None: + self.__dict__[coord]['data'] = self._expand_2d_bounds( + self.__dict__[coord]['data'], n_cells=n_cells, left=True, right=True, bottom=True + ) + + return None + + def _contract_full_coordinates(self, n_cells: int) -> None: + """ + Contracts full latitude and longitude coordinates if `self.master` is True. + + Parameters + ---------- + n_cells : int + The number of cells to remove from each side. + + Returns + ------- + None + """ + if self.master: + for coord in ['_full_rlat', '_full_rlon']: + self.__dict__[coord]['data'] = self._contract_1d( + self.__dict__[coord]['data'], n_cells=n_cells, left=True, right=True + ) + for coord in ['_full_lat', '_full_lon']: + self.__dict__[coord]['data'] = self._contract_2d( + self.__dict__[coord]['data'], n_cells=n_cells, left=True, right=True, top=True, bottom=True, + ) + for coord in ['_full_lat_bnds', '_full_lon_bnds']: + if self.__dict__[coord] is not None: + self.__dict__[coord]['data'] = self._contract_2d_bounds( + self.__dict__[coord]['data'], n_cells=n_cells, left=True, right=True, top=True, bottom=True, + ) + return None + # noinspection DuplicatedCode def _filter_coordinates_selection(self): """ @@ -586,7 +747,7 @@ class RotatedNes(Nes): """ var.grid_mapping = "rotated_pole" - # var.coordinates = "lat lon" + var.coordinates = "lat lon" return None @@ -629,7 +790,7 @@ class RotatedNes(Nes): raise NotImplementedError("Grib2 format cannot be written in a Rotated pole projection.") # noinspection DuplicatedCode - def create_shapefile(self): + def create_shapefile(self, overwrite=False): """ Create spatial geodataframe (shapefile). @@ -639,7 +800,7 @@ class RotatedNes(Nes): Shapefile dataframe. """ - if self.shapefile is None: + if self.shapefile is None or overwrite: if self.lat_bnds is None or self.lon_bnds is None: self.create_spatial_bounds() @@ -650,6 +811,8 @@ class RotatedNes(Nes): aux_b_lons = self.lon_bnds["data"].reshape((self.lon_bnds["data"].shape[0] * self.lon_bnds["data"].shape[1], self.lon_bnds["data"].shape[2])) + sys.stdout.flush() + self.comm.Barrier() # Get polygons from bounds geometry = [] for i in range(aux_b_lons.shape[0]): @@ -658,8 +821,8 @@ class RotatedNes(Nes): (aux_b_lons[i, 2], aux_b_lats[i, 2]), (aux_b_lons[i, 3], aux_b_lats[i, 3]), (aux_b_lons[i, 0], aux_b_lats[i, 0])])) - - # Create dataframe cointaining all polygons + + # Create dataframe containing all polygons fids = self.get_fids() gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=geometry, crs="EPSG:4326") self.shapefile = gdf diff --git a/nes/utilities/__init__.py b/nes/utilities/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..40a3a73abb267d2cc3903eba0f4d3be37d465279 --- /dev/null +++ b/nes/utilities/__init__.py @@ -0,0 +1,5 @@ +from .mpi_tools import sync_check + +__all__ = [ + 'sync_check', +] diff --git a/nes/utilities/checker.py b/nes/utilities/checker.py new file mode 100644 index 0000000000000000000000000000000000000000..673400c7966b40dff5c268badc19553847711ace --- /dev/null +++ b/nes/utilities/checker.py @@ -0,0 +1,49 @@ +from ..load_nes import open_netcdf +import argparse +from numpy import isinf, isnan + +def run_checks(): + parser = argparse.ArgumentParser(description="Check NaN in a NetCDF file.") + + # Define expected arguments + parser.add_argument("--file", "-f", type=str, help="Input NetCDF file path") + parser.add_argument("--nan", "-n", type=bool, default=True, help="Check NaNs") + parser.add_argument("--inf", "-i", type=bool, default=True, help="Check infs") + + + # Parse arguments + args = parser.parse_args() + + # Call your function with parsed arguments + infile = args.file + do_nans = args.nan + do_infs = args.inf + + # Lee solo metadatos + nessy = open_netcdf(infile) + + # nessy.variables = {'var1': {'data': None, units= kg}} + # Lee matrices + nessy.load() + # nessy.variables = {'var1': {'data': ARRAY, units= kg}} + + for var_name, var_info in nessy.variables.items(): + # var_name = 'var_1' + # var_info = {'data: np.array, units: 'kg'} + if do_infs: + has_inf = isinf(var_info['data']).any() + else: + has_inf = False + + if do_nans: + has_nan = isnan(var_info['data']).any() + else: + has_nan = False + + if has_inf or has_nan: + ValueError(f"{var_name} contains NaN or Inf") + else: + pass + return + +# bash_funcion -f -n My_File \ No newline at end of file diff --git a/nes/utilities/mpi_tools.py b/nes/utilities/mpi_tools.py new file mode 100644 index 0000000000000000000000000000000000000000..45f80fded7c4b31ae85697fafe956d1e4a5ae70e --- /dev/null +++ b/nes/utilities/mpi_tools.py @@ -0,0 +1,47 @@ +import numpy as np +from mpi4py.MPI import Comm, COMM_WORLD +from typing import Optional + + +class MyObject(): + def __init__(self): + self.name = "TestObject" + + +def sync_check(comm: Comm=COMM_WORLD, msg: str= "", abort: bool=False) -> None: + """ + Prints a synchronization message with the MPI rank, flushes stdout, + and applies a strong MPI barrier. + + Parameters + ---------- + comm : MPI.Comm + The MPI communicator to synchronize across. + msg : str + The message to print before the barrier. + abort : bool + If True it stops the execution. + """ + comm.Barrier() + print(f"[Rank {comm.Get_rank()}] {msg}", flush=True) + comm.Barrier() + + check_bcast(comm, msg=msg) + + if abort: + print(f"[Rank {comm.Get_rank()}] ABORTING {msg}", flush=True) + comm.Abort(1) + + return None + +def check_bcast(comm: Comm=COMM_WORLD, msg=""): + + if comm.Get_rank() == 0: + a = MyObject() + else: + a = None + a = comm.bcast(a, root=0) + if not isinstance(a, MyObject): + print(f"FAIL {msg}: Received object {a}", flush=True) + comm.Abort(1) + return True diff --git a/setup.py b/setup.py index 0e300345b547a09a2330b8f0997fd04a1003e53a..1252cbe42ea42beab64bcdb6b5ace8de07a2943a 100755 --- a/setup.py +++ b/setup.py @@ -1,8 +1,14 @@ #!/usr/bin/env python -from nes import __version__ from setuptools import find_packages from setuptools import setup +def read_version(): + with open("nes/__init__.py") as f: + for line in f: + if line.startswith("__version__"): + delim = '"' if '"' in line else "'" + return line.split(delim)[1] + raise RuntimeError("Unable to find version string.") with open("README.md", "r") as f: long_description = f.read() @@ -33,7 +39,7 @@ REQUIREMENTS = { setup( name='nes', license='Apache License 2.0', - version=__version__, + version=read_version(), description='NES (NetCDF for Earth Science) is a library designed to operate, read, and write NetCDF datasets in a parallel and efficient yet transparent way for the user, using domain decomposition as the parallelization strategy with customizable axis splitting.', long_description=long_description, long_description_content_type="text/markdown", @@ -62,6 +68,7 @@ setup( entry_points={ "console_scripts": [ "nes_reorder_longitudes=nes.utilities.reorder_longitudes_cli:reorder_longitudes_cli", + "nes_check=nes.utilities.checker:run_checks" ] } ) \ No newline at end of file diff --git a/tests/1.4-test_selecting_negatives.py b/tests/1.4-test_selecting_negatives.py index 2c0e4ec995b97480445225d391d0d74e64af8f46..6ba41493f83a8f9c0c1d0c8209cf2067aa616aa8 100644 --- a/tests/1.4-test_selecting_negatives.py +++ b/tests/1.4-test_selecting_negatives.py @@ -18,7 +18,7 @@ result_path = "Times_test_1.4.Selecting_{0}_{1:03d}.csv".format(parallel_method, result = pd.DataFrame(index=['read', 'calcul', 'write'], columns=['1.4.1.Selecting_Negatives']) -# NAMEE +# Global GFAS src_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/ga_20220101.nc" var_list = ['so2', "pm25", 'nox'] diff --git a/tests/1.5-test_expand_contract.py b/tests/1.5-test_expand_contract.py new file mode 100644 index 0000000000000000000000000000000000000000..1d40ea7d23ef0516b09e02887dc951136f20864c --- /dev/null +++ b/tests/1.5-test_expand_contract.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python +import sys +import timeit +import pandas as pd +from mpi4py import MPI +from nes import open_netcdf, create_nes + + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' +serial_write = True + +result_path = "Times_test_1.5_Expand_Contract_{0}_{1:03d}.csv".format(parallel_method, size) + +result = pd.DataFrame(index=['read', 'calcul', 'write'], + columns=['1.5.1.Expand_NoLoaded', '1.5.2.Expand_Loaded', '1.5.3.Expand_Contract', '1.5.4.Expand_Contract_Rotated']) + +# Global GFAS +# src_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/ga_20220101.nc" +src_path = "/Users/ctena/PycharmProjects/NES/tests/ga_20220101.nc" +var_list = ['so2', "pm25", 'nox'] + +# ====================================================================================================================== +# ====================================== '1.5.1.Expand_NoLoaded' ===================================================== +# ====================================================================================================================== +test_name = '1.5.1.Expand_NoLoaded' + +if rank == 0: + print(test_name) + +st_time = timeit.default_timer() + +# Source data +nessy = open_netcdf(src_path, parallel_method=parallel_method, balanced=True) +print(f"1 Read {nessy.read_axis_limits}") +print(f"1 Write {nessy.write_axis_limits}") + +nessy.keep_vars(var_list) +nessy.sel(lat_min=35, lat_max=45, lon_min=-9, lon_max=5) +print(f"2 Read {nessy.read_axis_limits}") +print(f"2 Write {nessy.write_axis_limits}") + +nessy.expand(n_cells=10) +print(f"3 Read {nessy.read_axis_limits}") +print(f"3 Write {nessy.write_axis_limits}") + +nessy.load() + +# print(f"Rank {nessy.rank} -> Lon: {nessy.lon}") +# print(f"Rank {nessy.rank} -> Lon BNDS: {nessy.lon_bnds}") +# print(f"Rank {nessy.rank} -> FULL Lon: {nessy.get_full_longitudes()}") + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() +print(test_name.replace(' ', '_') + "{0:03d}.nc".format(size)) +nessy.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size), serial=serial_write) + +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ====================================== '1.5.2.Expand_Loaded' ===================================================== +# ====================================================================================================================== +test_name = '1.5.2.Expand_Loaded' + +if rank == 0: + print(test_name) + +st_time = timeit.default_timer() + +# Source data +nessy = open_netcdf(src_path, parallel_method=parallel_method, balanced=True) +nessy.keep_vars(var_list) +nessy.sel(lat_min=35, lat_max=45, lon_min=-9, lon_max=5) + +nessy.load() + +nessy.expand(n_cells=10) + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() +print(test_name.replace(' ', '_') + "{0:03d}.nc".format(size)) +nessy.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size), serial=serial_write) + +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + + +# ====================================================================================================================== +# ====================================== '1.5.3.Expand_Contract' ===================================================== +# ====================================================================================================================== +test_name = '1.5.3.Expand_Contract' + +if rank == 0: + print(test_name) + +st_time = timeit.default_timer() + +# Source data +nessy = open_netcdf(src_path, parallel_method=parallel_method, balanced=True) + +nessy.keep_vars(var_list) +nessy.sel(lat_min=35, lat_max=45, lon_min=-9, lon_max=5) + +nessy.expand(n_cells=10) + +nessy.load() + +nessy.contract(n_cells=10) + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() +print(test_name.replace(' ', '_') + "{0:03d}.nc".format(size)) +nessy.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size), serial=serial_write) + +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + + + +# ====================================================================================================================== +# ====================================== '1.5.4.Expand_Contract_Rotated' ===================================================== +# ====================================================================================================================== +test_name = '1.5.4.Expand_Contract_Rotated' + +if rank == 0: + print(test_name) + +st_time = timeit.default_timer() + +# Source data +centre_lat = 51 +centre_lon = 10 +west_boundary = -35 +south_boundary = -27 +inc_rlat = 0.2 +inc_rlon = 0.2 +nessy = create_nes(projection='rotated', parallel_method=parallel_method, + centre_lat=centre_lat, centre_lon=centre_lon, + west_boundary=west_boundary, south_boundary=south_boundary, + inc_rlat=inc_rlat, inc_rlon=inc_rlon) + +nessy.create_spatial_bounds() + +nessy.expand(n_cells=10) + +nessy.contract(n_cells=10) + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() +print(test_name.replace(' ', '_') + "{0:03d}.nc".format(size)) +nessy.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size), serial=serial_write) + +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/tests/clean_output.sh b/tests/clean_output.sh index d5d7810511ea4b17272fa2bc014c49072d6411a9..5f9737d69cc0a9e05c8d141987680fffeadcd2b3 100644 --- a/tests/clean_output.sh +++ b/tests/clean_output.sh @@ -3,6 +3,7 @@ rm 1.1.* rm 1.2.* rm 1.3.* +rm 1.4.* rm 2.1.* rm 2.2.* diff --git a/tests/test_bash.mn5.sh b/tests/test_bash.mn5.sh index 606abec246c19d8866c34933a30d84fd636349b7..4ea4f15ae70ba9b7f249b96570e5f151c44b07f9 100644 --- a/tests/test_bash.mn5.sh +++ b/tests/test_bash.mn5.sh @@ -22,7 +22,7 @@ mpirun -np 4 python 1.1-test_read_write_projection.py mpirun -np 4 python 1.2-test_create_projection.py mpirun -np 4 python 1.3-test_selecting.py mpirun -np 4 python 1.4-test_selecting_negatives.py - +mpirun -np 4 python 1.5-test_expand_contract.py mpirun -np 4 python 2.1-test_spatial_join.py mpirun -np 4 python 2.2-test_create_shapefile.py