From 87f34e1e67444cf2a2ea0c1848a6e7021e226234 Mon Sep 17 00:00:00 2001 From: cpinero Date: Thu, 10 Oct 2024 16:51:08 +0200 Subject: [PATCH 01/33] Develop 84 add info flag to to_shapefile method to suppress warnings --- nes/nc_projections/default_nes.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 399968f..68e087a 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -3763,7 +3763,7 @@ class Nes(object): return None - def to_shapefile(self, path, time=None, lev=None, var_list=None): + def to_shapefile(self, path, time=None, lev=None, var_list=None, info=True): """ Create shapefile from NES data. @@ -3781,6 +3781,8 @@ class Nes(object): Vertical level to select. var_list : List, str, None List (or single string) of the variables to be loaded and saved in the shapefile. + info: bool + Flag to allow/suppress warnings when the 'time' or 'lev' parameters are None. Default is True. """ # If list is not defined, get all variables @@ -3801,9 +3803,10 @@ class Nes(object): # Select first vertical level (if needed) if lev is None: - msg = "No vertical level has been specified. The first one will be selected." - warn(msg) - sys.stderr.flush() + if info: + msg = "No vertical level has been specified. The first one will be selected." + warn(msg) + sys.stderr.flush() idx_lev = 0 else: if lev not in self.lev["data"]: @@ -3812,9 +3815,10 @@ class Nes(object): # Select first time (if needed) if time is None: - msg = "No time has been specified. The first one will be selected." - warn(msg) - sys.stderr.flush() + if info: + msg = "No time has been specified. The first one will be selected." + warn(msg) + sys.stderr.flush() idx_time = 0 else: if time not in self.time: -- GitLab From 55de095216dc4b34230fa01cec47386fbca89d0c Mon Sep 17 00:00:00 2001 From: Artur Date: Thu, 24 Oct 2024 18:28:16 +0200 Subject: [PATCH 02/33] Solving Residential problems --- nes/methods/horizontal_interpolation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nes/methods/horizontal_interpolation.py b/nes/methods/horizontal_interpolation.py index 25efef6..d95c164 100644 --- a/nes/methods/horizontal_interpolation.py +++ b/nes/methods/horizontal_interpolation.py @@ -56,6 +56,7 @@ def interpolate_horizontal(self, dst_grid, weight_matrix_path=None, kind="Neares weights, idx = __get_weights_idx_t_axis(self, dst_grid, weight_matrix_path, kind, n_neighbours, only_create_wm, wm, flux) elif self.parallel_method in ["Y", "X"]: + print(weight_matrix_path) weights, idx = __get_weights_idx_xy_axis(self, dst_grid, weight_matrix_path, kind, n_neighbours, only_create_wm, wm, flux) else: @@ -341,6 +342,7 @@ def __get_weights_idx_xy_axis(self, dst_grid, weight_matrix_path, kind, n_neighb weight_matrix = wm elif weight_matrix_path is not None: + print(weight_matrix) with FileLock(weight_matrix_path + "{0:03d}.lock".format(self.rank)): if os.path.isfile(weight_matrix_path): if self.master: -- GitLab From f9398e1cfddfcb705638cd784ab124a1339b5697 Mon Sep 17 00:00:00 2001 From: Artur Date: Fri, 22 Nov 2024 14:48:01 +0100 Subject: [PATCH 03/33] keep_nans variable in interpolate_horizontal --- nes/methods/horizontal_interpolation.py | 11 ++++++++--- nes/nc_projections/default_nes.py | 6 ++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/nes/methods/horizontal_interpolation.py b/nes/methods/horizontal_interpolation.py index d95c164..36a16b5 100644 --- a/nes/methods/horizontal_interpolation.py +++ b/nes/methods/horizontal_interpolation.py @@ -4,7 +4,7 @@ import sys import os import nes from warnings import warn, filterwarnings -from numpy import (ma, empty, nansum, concatenate, pad, nan, array, float64, int64, float32, meshgrid, expand_dims, +from numpy import (ma, empty, nansum, sum, concatenate, pad, nan, array, float64, int64, float32, meshgrid, expand_dims, reciprocal, arange, uint32, array_split, radians, cos, sin, column_stack, zeros) from pandas import concat, DataFrame from mpi4py import MPI @@ -21,7 +21,7 @@ CONSERVATIVE_OPTS = ["Conservative", "Area_Conservative", "cons", "conservative" def interpolate_horizontal(self, dst_grid, weight_matrix_path=None, kind="NearestNeighbour", n_neighbours=4, - info=False, to_providentia=False, only_create_wm=False, wm=None, flux=False): + info=False, to_providentia=False, only_create_wm=False, wm=None, flux=False, keep_nan=False): """ Horizontal methods from one grid to another one. @@ -47,6 +47,8 @@ def interpolate_horizontal(self, dst_grid, weight_matrix_path=None, kind="Neares Weight matrix Nes File. flux : bool Indicates if you want to calculate the weight matrix for flux variables. + keep_nan : bool + Indicates if you want to keep nan values after the interpolation """ if info and self.master: print("Creating Weight Matrix") @@ -119,7 +121,10 @@ def interpolate_horizontal(self, dst_grid, weight_matrix_path=None, kind="Neares for time in range(dst_shape[0]): for lev in range(dst_shape[1]): src_aux = __get_src_data(self.comm, var_info["data"][time, lev], idx, self.parallel_method) - final_dst.variables[var_name]["data"][time, lev] = nansum(weights * src_aux, axis=1) + if keep_nan: + final_dst.variables[var_name]["data"][time, lev] = sum(weights * src_aux, axis=1) + else: + final_dst.variables[var_name]["data"][time, lev] = nansum(weights * src_aux, axis=1) if isinstance(dst_grid, nes.PointsNes): # Removing level axis diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 372d6ee..5e49e27 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -4117,7 +4117,7 @@ class Nes(object): overwrite=overwrite) def interpolate_horizontal(self, dst_grid, weight_matrix_path=None, kind="NearestNeighbour", n_neighbours=4, - info=False, to_providentia=False, only_create_wm=False, wm=None, flux=False): + info=False, to_providentia=False, only_create_wm=False, wm=None, flux=False, keep_nan=False): """ Horizontal methods from the current grid to another one. @@ -4141,11 +4141,13 @@ class Nes(object): Weight matrix Nes File. flux : bool Indicates if you want to calculate the weight matrix for flux variables. + keep_nan : bool + Indicates if you want to keep nan values after the interpolation """ return horizontal_interpolation.interpolate_horizontal( self, dst_grid, weight_matrix_path=weight_matrix_path, kind=kind, n_neighbours=n_neighbours, info=info, - to_providentia=to_providentia, only_create_wm=only_create_wm, wm=wm, flux=flux) + to_providentia=to_providentia, only_create_wm=only_create_wm, wm=wm, flux=flux, keep_nan=keep_nan) def spatial_join(self, ext_shp, method=None, var_list=None, info=False, apply_bbox=True): """ -- GitLab From 4995af86ed2e2263a37fd76b4b45af81890bc738 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carmen=20Pi=C3=B1ero=20Meg=C3=ADas?= Date: Wed, 4 Dec 2024 11:51:12 +0100 Subject: [PATCH 04/33] Add extra accepted names for the time variable --- nes/nc_projections/default_nes.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 5e49e27..196bf0d 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -2231,7 +2231,16 @@ class Nes(object): List of times (datetime) of the NetCDF data. """ if self.master: - nc_var = self.dataset.variables["time"] + try: + nc_var = self.dataset.variables["time"] + except KeyError: + nc_variable_names = self.dataset.variables.keys() + # Accepted name options for the time variable + accepted_time_names = ["TIME", "valid_time"] + # Get name of the time variable of the dataset + time_var_name = list(set(nc_variable_names).intersection(set(accepted_time_names)))[0] + nc_var = self.dataset.variables[time_var_name] + time_data, units, calendar = self.__parse_time(nc_var) # Extracting time resolution depending on the units self._time_resolution = self.__get_time_resolution_from_units(units) -- GitLab From 1b88adefc633d539aaddbf373111a8fe89423d4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carmen=20Pi=C3=B1ero=20Meg=C3=ADas?= Date: Thu, 5 Dec 2024 11:45:04 +0100 Subject: [PATCH 05/33] Update CHANGELOG.rst --- CHANGELOG.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 9417368..2ef86fa 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,14 @@ CHANGELOG .. start-here +1.1.x +============ + +* Release date: +* Changes and new features: + + * Add additional names for the time variable + 1.1.8 ============ @@ -14,6 +22,7 @@ CHANGELOG * Rename project from NES to nes + 1.1.7.post2 ============ -- GitLab From 098a5a8b1cc4cd37915cffec8321d7ccdbe8bd72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carmen=20Pi=C3=B1ero=20Meg=C3=ADas?= Date: Thu, 5 Dec 2024 16:47:41 +0100 Subject: [PATCH 06/33] Free time variable for the other accepted names --- nes/nc_projections/default_nes.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 196bf0d..a5533e4 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -2230,6 +2230,7 @@ class Nes(object): time : List[datetime] List of times (datetime) of the NetCDF data. """ + time_var_name = "time" if self.master: try: nc_var = self.dataset.variables["time"] @@ -2252,7 +2253,8 @@ class Nes(object): time = [datetime(year=dt.year, month=dt.month, day=dt.day, hour=dt.hour, minute=dt.minute) for dt in time] else: time = None - self.free_vars("time") + # Free the time variable + self.free_vars(time_var_name) return time -- GitLab From 2d5fbd7ae25ef3b28dfad4b76c5da64a530726ed Mon Sep 17 00:00:00 2001 From: cpinero Date: Thu, 5 Dec 2024 16:54:25 +0100 Subject: [PATCH 07/33] Develop 87 add extra name options to the time variable for phenomena --- CHANGELOG.rst | 9 +++++++++ nes/nc_projections/default_nes.py | 15 +++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 9417368..2ef86fa 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,14 @@ CHANGELOG .. start-here +1.1.x +============ + +* Release date: +* Changes and new features: + + * Add additional names for the time variable + 1.1.8 ============ @@ -14,6 +22,7 @@ CHANGELOG * Rename project from NES to nes + 1.1.7.post2 ============ diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 5e49e27..a5533e4 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -2230,8 +2230,18 @@ class Nes(object): time : List[datetime] List of times (datetime) of the NetCDF data. """ + time_var_name = "time" if self.master: - nc_var = self.dataset.variables["time"] + try: + nc_var = self.dataset.variables["time"] + except KeyError: + nc_variable_names = self.dataset.variables.keys() + # Accepted name options for the time variable + accepted_time_names = ["TIME", "valid_time"] + # Get name of the time variable of the dataset + time_var_name = list(set(nc_variable_names).intersection(set(accepted_time_names)))[0] + nc_var = self.dataset.variables[time_var_name] + time_data, units, calendar = self.__parse_time(nc_var) # Extracting time resolution depending on the units self._time_resolution = self.__get_time_resolution_from_units(units) @@ -2243,7 +2253,8 @@ class Nes(object): time = [datetime(year=dt.year, month=dt.month, day=dt.day, hour=dt.hour, minute=dt.minute) for dt in time] else: time = None - self.free_vars("time") + # Free the time variable + self.free_vars(time_var_name) return time -- GitLab From 9db135dde43ba543b272c34421c67b6687ed8fbe Mon Sep 17 00:00:00 2001 From: Johanna Gehlen Date: Tue, 4 Feb 2025 15:39:25 +0100 Subject: [PATCH 08/33] function to convert longitudes from [0, 360] to [-180, 180] --- nes/nc_projections/default_nes.py | 207 ++++++++++++++++++++++-------- 1 file changed, 157 insertions(+), 50 deletions(-) diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index a5533e4..5a14f10 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -1,11 +1,12 @@ #!/usr/bin/env python -import sys +import os, sys from gc import collect from warnings import warn +from math import isclose from numpy import (array, ndarray, abs, mean, diff, dstack, append, tile, empty, unique, stack, vstack, full, isnan, flipud, nan, float32, float64, ma, generic, character, issubdtype, arange, newaxis, concatenate, - split, cumsum, zeros, column_stack) + split, cumsum, zeros, column_stack, argsort, take) from pandas import Index, concat from geopandas import GeoDataFrame from datetime import timedelta, datetime @@ -206,7 +207,7 @@ class Nes(object): create_nes : bool Indicates if you want to create the object from scratch (True) or through an existing file. balanced : bool - Indicates if you want a balanced parallelization or not. + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. times : List[datetime] or None List of times to substitute the current ones while creation. @@ -301,7 +302,7 @@ class Nes(object): # Lazy variables self.variables = self._get_lazy_variables() - + # Complete dimensions self._full_time = self.__get_time() self._full_time_bnds = self.__get_time_bnds() @@ -373,7 +374,7 @@ class Nes(object): @staticmethod def new(comm=None, path=None, info=False, dataset=None, parallel_method="Y", - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ Initialize the Nes class. @@ -402,7 +403,7 @@ class Nes(object): create_nes : bool Indicates if you want to create the object from scratch (True) or through an existing file. balanced : bool - Indicates if you want a balanced parallelization or not. + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. times : List[datetime] or None List of times to substitute the current ones while creation. @@ -422,13 +423,13 @@ class Nes(object): ------- int Max length of the string data - """ - + """ + if "strlen" in self.dataset.dimensions: strlen = self.dataset.dimensions["strlen"].size else: return None - + return strlen def set_strlen(self, strlen=75): @@ -442,7 +443,7 @@ class Nes(object): strlen : int or None Max length of the string """ - + self.strlen = strlen return None @@ -960,7 +961,7 @@ class Nes(object): self.read_axis_limits = self._get_read_axis_limits() self.write_axis_limits = self._get_write_axis_limits() - + return None def set_climatology(self, is_climatology): @@ -1098,11 +1099,11 @@ class Nes(object): ---------- coordinates : array Coordinates in degrees (latitude or longitude). - inc : float + inc : float Increment between centre values. - spatial_nv : int + spatial_nv : int Non-mandatory parameter that informs the number of vertices that the boundaries must have. Default: 2. - inverse : bool + inverse : bool For some grid latitudes. Returns @@ -1110,7 +1111,7 @@ class Nes(object): bounds : array An Array with as many elements as vertices for each value of coords. """ - + # Create new arrays moving the centres half increment less and more. coords_left = coordinates - inc / 2 coords_right = coordinates + inc / 2 @@ -1300,7 +1301,7 @@ class Nes(object): aux_nessy.variables[var_name][att_name] = att_value else: aux_nessy.variables[var_name]["data"] = aux_nessy.variables[var_name]["data"][[idx_time]] - + return aux_nessy def sel(self, hours_start=None, time_min=None, hours_end=None, time_max=None, lev_min=None, lev_max=None, @@ -1450,7 +1451,7 @@ class Nes(object): self.lon_min = None return None - + def _get_projection_data(self, create_nes, **kwargs): """ Retrieves projection data based on grid details. @@ -1804,7 +1805,7 @@ class Nes(object): # ================================================================================================================== # Reading # ================================================================================================================== - + def _get_read_axis_limits(self): """ Calculate the 4D reading axis limits depending on if them have to balanced or not. @@ -1836,7 +1837,7 @@ class Nes(object): "y_min": None, "y_max": None, "z_min": None, "z_max": None, "t_min": None, "t_max": None} - + idx = self._get_idx_intervals() if self.parallel_method == "Y": y_len = idx["idx_y_max"] - idx["idx_y_min"] @@ -1855,7 +1856,7 @@ class Nes(object): axis_limits["t_min"] = idx["idx_t_min"] axis_limits["t_max"] = idx["idx_t_max"] - + elif self.parallel_method == "X": x_len = idx["idx_x_max"] - idx["idx_x_min"] if x_len < self.size: @@ -1873,7 +1874,7 @@ class Nes(object): axis_limits["t_min"] = idx["idx_t_min"] axis_limits["t_max"] = idx["idx_t_max"] - + elif self.parallel_method == "T": t_len = idx["idx_t_max"] - idx["idx_t_min"] if t_len < self.size: @@ -2616,10 +2617,10 @@ class Nes(object): else: raise NotImplementedError("Error with {0}. Only can be read netCDF with 4 dimensions or less".format( var_name)) - + # Unmask array data = self._unmask_array(data) - + return data def load(self, var_list=None): @@ -2633,7 +2634,7 @@ class Nes(object): var_list : List, str, None List (or single string) of the variables to be loaded. """ - + if (self.__ini_path is None) and (self.dataset is None): raise RuntimeError("Only data from existing files can be loaded.") @@ -2674,7 +2675,7 @@ class Nes(object): @staticmethod def _unmask_array(data): - """ + """ Missing to nan. This operation is done because sometimes the missing value is lost during the calculation. Parameters @@ -2687,7 +2688,7 @@ class Nes(object): array Unmasked array. """ - + if isinstance(data, ma.MaskedArray): try: data = data.filled(nan) @@ -2706,7 +2707,7 @@ class Nes(object): data_type : str or Type Data type, by default "float32" """ - + for var_name, var_info in self.variables.items(): if isinstance(var_info["data"], ndarray): self.variables[var_name]["data"] = self.variables[var_name]["data"].astype(data_type) @@ -2778,7 +2779,7 @@ class Nes(object): # ================================================================================================================== # Writing # ================================================================================================================== - + def _get_write_axis_limits(self): """ Calculate the 4D writing axis limits depending on if them have to balanced or not. @@ -3225,26 +3226,26 @@ class Nes(object): var_dtype = var_dict["data"].dtype if var_dtype is object: raise TypeError("Data dtype is object. Define dtype explicitly as dictionary key 'dtype'") - + if var_dict["data"] is not None: - + # Ensure data is of type numpy array (to create NES) if not isinstance(var_dict["data"], (ndarray, generic)): try: var_dict["data"] = array(var_dict["data"]) except AttributeError: raise AttributeError("Data for variable {0} must be a numpy array.".format(var_name)) - + # Convert list of strings to chars for parallelization if issubdtype(var_dtype, character): var_dict["data_aux"] = self._str2char(var_dict["data"]) var_dims += ("strlen",) var_dtype = "S1" - + if self.info: print("Rank {0:03d}: Writing {1} var ({2}/{3})".format( self.rank, var_name, i + 1, len(self.variables))) - + if not chunking: var = netcdf.createVariable(var_name, var_dtype, var_dims, zlib=self.zip_lvl > 0, complevel=self.zip_lvl) @@ -3280,7 +3281,7 @@ class Nes(object): self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = 0 - + elif len(att_value.shape) == 5: if "strlen" in var_dims: var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], @@ -3304,11 +3305,11 @@ class Nes(object): :] = att_value else: raise NotImplementedError("It is not possible to write 3D variables.") - + if self.info: print("Rank {0:03d}: Var {1} data ({2}/{3})".format( self.rank, var_name, i + 1, len(self.variables))) - + elif att_name not in ["chunk_size", "var_dims", "dimensions", "dtype", "data_aux"]: var.setncattr(att_name, att_value) @@ -3702,7 +3703,7 @@ class Nes(object): shapefile : GeoPandasDataFrame Shapefile dataframe. """ - + if self.shapefile is None: if self.lat_bnds is None or self.lon_bnds is None: @@ -3743,14 +3744,14 @@ class Nes(object): fids = self.get_fids() gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=geometry, crs="EPSG:4326") self.shapefile = gdf - + else: gdf = self.shapefile - + return gdf def write_shapefile(self, path): - """ + """ Save spatial GeoDataFrame (shapefile). Parameters @@ -3758,10 +3759,10 @@ class Nes(object): path : str Path to the output file. """ - + if self.shapefile is None: raise ValueError("Shapefile was not created.") - + if self.size == 1: # In serial, avoid gather self.shapefile.to_file(path) @@ -3771,13 +3772,13 @@ class Nes(object): if self.master: data = concat(data) data.to_file(path) - + return None def to_shapefile(self, path, time=None, lev=None, var_list=None, info=True): """ Create shapefile from NES data. - + 1. Create grid shapefile. 2. Add variables to shapefile (as independent function). 3. Write shapefile. @@ -3795,7 +3796,7 @@ class Nes(object): info: bool Flag to allow/suppress warnings when the 'time' or 'lev' parameters are None. Default is True. """ - + # If list is not defined, get all variables if var_list is None: var_list = list(self.variables.keys()) @@ -3835,7 +3836,7 @@ class Nes(object): if time not in self.time: raise ValueError("Time {} is not available. Choose from {}".format(time, self.time)) idx_time = self.time.index(time) - + # Create shapefile self.create_shapefile() @@ -3880,7 +3881,7 @@ class Nes(object): for lon_ind in range(0, len(self.lon["data"])): centroids.append(Point(self.lon["data"][lon_ind], self.lat["data"][lat_ind])) - + # Create dataframe containing all points fids = self.get_fids() centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=centroids, crs="EPSG:4326") @@ -4155,9 +4156,9 @@ class Nes(object): keep_nan : bool Indicates if you want to keep nan values after the interpolation """ - + return horizontal_interpolation.interpolate_horizontal( - self, dst_grid, weight_matrix_path=weight_matrix_path, kind=kind, n_neighbours=n_neighbours, info=info, + self, dst_grid, weight_matrix_path=weight_matrix_path, kind=kind, n_neighbours=n_neighbours, info=info, to_providentia=to_providentia, only_create_wm=only_create_wm, wm=wm, flux=flux, keep_nan=keep_nan) def spatial_join(self, ext_shp, method=None, var_list=None, info=False, apply_bbox=True): @@ -4199,7 +4200,7 @@ class Nes(object): self.cell_measures["cell_area"] = {"data": grid_area} else: grid_area = self.cell_measures["cell_area"]["data"] - + return grid_area @staticmethod @@ -4264,3 +4265,109 @@ class Nes(object): Dictionary with data of grid edge longitudes. """ raise NotImplementedError("create_providentia_exp_grid_edge_coordinates function is not implemented by default") + + def _detect_longitude_format(self): + """ + Determines whether longitude values are in the [0, 360] or [-180, 180] range. + + Returns + --------- + bool: True if longitudes are in [0, 360], False otherwise. + """ + longitudes = self.lon["data"] + longitudes = array(longitudes) + if all((longitudes >= 0) & (longitudes <= 360)): + return True + elif all((longitudes >= -180) & (longitudes <= 180)): + return False + else: + return False + + def _check_if_data_is_loaded(self): + """ + Verifies that data is loaded for all variables. + + Raises + ------- + ValueError: If any variable's data is missing. + """ + # check if data is loaded + for variable in self.variables.keys(): + if self.variables[variable]["data"] is None: + raise ValueError(f"All variables data must be loaded before using this function. Data for {variable} is not loaded.") + + def convert_lon_360_to_180(self, path): + """ + Converts longitudes from the [0, 360] range to the [-180, 180] range. + + Parameters + ------------ + path (str): The file path where the converted data will be saved. + + Raises + -------- + ValueError: If the method is run in parallel processing mode. + ValueError: If longitudes are already in [-180, 180] format or an unrecognized format. + ValueError: If data is not fully loaded before conversion. + """ + # make this only available in serial. + if self.comm.size > 1: + raise ValueError("This method is currently only available in serial.") + + # check if the longitude flip is needed + if not self._detect_longitude_format(self): + raise ValueError("Longitudes are already in [-180, 180] format or another unrecognised format.") + + # check if data is loaded + self._check_if_data_is_loaded(self) + + # make a copy + dst = deepcopy(self) + + # copy global attributes and dimensions + dst.global_attrs = self.global_attrs + dst.lat = self.lat + dst.lon = self.lon + dst.time = self.time + dst.lat_bnds = self.lat_bnds + dst.lon_bnds = self.lon_bnds + dst.lev = self.lev + + # adjust longitude values + lon = self.lon + lon_data = lon["data"] + lon_data = lon_data % 360 + lon_data[lon_data > 180] -= 360 + sorted_indices = argsort(lon_data) + dst.lon["data"] = lon_data[sorted_indices] + dst.set_full_longitudes(dst.lon) + + # adjust longitude bounds + lon_bnds_data = dst.lon_bnds["data"] + lon_bnds_data = lon_bnds_data % 360 + lon_bnds_data[lon_bnds_data > 180] -= 360 + lon_bnds_sorted = lon_bnds_data[sorted_indices] + + if (lon_bnds_sorted[0][0] > lon_bnds_sorted[0][1]) and (isclose(lon_bnds_sorted[0][0], 180)): + lon_bnds_sorted[0][0] = -180 + elif (lon_bnds_sorted[-1][0] > lon_bnds_sorted[-1][1]) and (isclose(lon_bnds_sorted[-1][1], -180)): + lon_bnds_sorted[-1][1] = 180 + + dst.lon_bnds["data"] = lon_bnds_sorted + dst.set_full_longitudes_boundaries(dst.lon_bnds) + + # copy and adjust remaining variables + for name, var in dst.variables.items(): + dst.load(name) # TODO: is it necessary? + dst.variables[name] = var + if "longitude" in var["dimensions"]: + data = var["data"] + reordered_data = take(data, sorted_indices, axis=3) + dst.variables[name]["data"] = reordered_data + else: + dst.variables[name] = var + + # save the file + tmp_file_path = "tmp.nc" + dst.to_netcdf(tmp_file_path) + os.path.replace(tmp_file_path, path) -- GitLab From 6dff6b32717607fd0e6e48e1e401e2797cd77ca4 Mon Sep 17 00:00:00 2001 From: Johanna Gehlen Date: Tue, 4 Feb 2025 16:55:24 +0100 Subject: [PATCH 09/33] change function name --- nes/nc_projections/default_nes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 5a14f10..5faf81e 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -4296,7 +4296,7 @@ class Nes(object): if self.variables[variable]["data"] is None: raise ValueError(f"All variables data must be loaded before using this function. Data for {variable} is not loaded.") - def convert_lon_360_to_180(self, path): + def convert_longitudes(self, path): """ Converts longitudes from the [0, 360] range to the [-180, 180] range. -- GitLab From e9072baad1d5fdd47d2e4d3f5d3bd12ce08a146e Mon Sep 17 00:00:00 2001 From: Johanna Gehlen Date: Tue, 4 Feb 2025 17:14:19 +0100 Subject: [PATCH 10/33] add to init file --- nes/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nes/__init__.py b/nes/__init__.py index 1dcabe9..be4838e 100644 --- a/nes/__init__.py +++ b/nes/__init__.py @@ -1,7 +1,7 @@ __date__ = "2024-10-07" __version__ = "1.1.8" __all__ = [ - 'open_netcdf', 'concatenate_netcdfs', 'create_nes', 'from_shapefile', 'calculate_geometry_area', 'Nes', 'LatLonNes', + 'open_netcdf', 'convert_longitudes', 'concatenate_netcdfs', 'create_nes', 'from_shapefile', 'calculate_geometry_area', 'Nes', 'LatLonNes', 'LCCNes', 'RotatedNes', 'RotatedNestedNes', 'MercatorNes', 'PointsNesProvidentia', 'PointsNesGHOST', 'PointsNes' ] -- GitLab From c1e8f85e890d5700c8580582d70ea19687618957 Mon Sep 17 00:00:00 2001 From: Johanna Gehlen Date: Tue, 4 Feb 2025 17:15:54 +0100 Subject: [PATCH 11/33] nvm --- nes/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nes/__init__.py b/nes/__init__.py index be4838e..1dcabe9 100644 --- a/nes/__init__.py +++ b/nes/__init__.py @@ -1,7 +1,7 @@ __date__ = "2024-10-07" __version__ = "1.1.8" __all__ = [ - 'open_netcdf', 'convert_longitudes', 'concatenate_netcdfs', 'create_nes', 'from_shapefile', 'calculate_geometry_area', 'Nes', 'LatLonNes', + 'open_netcdf', 'concatenate_netcdfs', 'create_nes', 'from_shapefile', 'calculate_geometry_area', 'Nes', 'LatLonNes', 'LCCNes', 'RotatedNes', 'RotatedNestedNes', 'MercatorNes', 'PointsNesProvidentia', 'PointsNesGHOST', 'PointsNes' ] -- GitLab From d00663fa4243b81c1984c85795245f01df72c4b4 Mon Sep 17 00:00:00 2001 From: Johanna Gehlen Date: Tue, 4 Feb 2025 17:35:55 +0100 Subject: [PATCH 12/33] comment out the var.coordinates attribute to avoid the ncview warning --- nes/nc_projections/latlon_nes.py | 12 +++++----- nes/nc_projections/lcc_nes.py | 32 ++++++++++++------------- nes/nc_projections/mercator_nes.py | 38 +++++++++++++++--------------- nes/nc_projections/points_nes.py | 12 +++++----- nes/nc_projections/rotated_nes.py | 26 ++++++++++---------- 5 files changed, 60 insertions(+), 60 deletions(-) diff --git a/nes/nc_projections/latlon_nes.py b/nes/nc_projections/latlon_nes.py index 35d68c8..d7902f8 100644 --- a/nes/nc_projections/latlon_nes.py +++ b/nes/nc_projections/latlon_nes.py @@ -50,12 +50,12 @@ class LatLonNes(Nes): create_nes : bool Indicates if you want to create the object from scratch (True) or through an existing file. balanced : bool - Indicates if you want a balanced parallelization or not. + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. times : list, None List of times to substitute the current ones while creation. """ - + super(LatLonNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, parallel_method=parallel_method, balanced=balanced, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, @@ -78,7 +78,7 @@ class LatLonNes(Nes): @staticmethod def new(comm=None, path=None, info=False, dataset=None, parallel_method="Y", - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ Initialize the Nes class. @@ -107,14 +107,14 @@ class LatLonNes(Nes): create_nes : bool Indicates if you want to create the object from scratch (True) or through an existing file. balanced : bool - Indicates if you want a balanced parallelization or not. + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. times : list, None List of times to substitute the current ones while creation. """ new = LatLonNes(comm=comm, path=path, info=info, dataset=dataset, - parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, + parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, balanced=balanced, times=times, **kwargs) @@ -352,7 +352,7 @@ class LatLonNes(Nes): """ var.grid_mapping = "crs" - var.coordinates = "lat lon" + # var.coordinates = "lat lon" return None diff --git a/nes/nc_projections/lcc_nes.py b/nes/nc_projections/lcc_nes.py index f9eda6e..2aea912 100644 --- a/nes/nc_projections/lcc_nes.py +++ b/nes/nc_projections/lcc_nes.py @@ -63,7 +63,7 @@ class LCCNes(Nes): create_nes : bool Indicates if you want to create the object from scratch (True) or through an existing file. balanced : bool - Indicates if you want a balanced parallelization or not. + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. times : list, None List of times to substitute the current ones while creation. @@ -89,7 +89,7 @@ class LCCNes(Nes): # Dimensions screening self.y = self._get_coordinate_values(self.get_full_y(), "Y") self.x = self._get_coordinate_values(self.get_full_x(), "X") - + # Set axis limits for parallel writing self.write_axis_limits = self._get_write_axis_limits() @@ -101,7 +101,7 @@ class LCCNes(Nes): @staticmethod def new(comm=None, path=None, info=False, dataset=None, parallel_method="Y", - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ Initialize the Nes class. @@ -130,7 +130,7 @@ class LCCNes(Nes): create_nes : bool Indicates if you want to create the object from scratch (True) or through an existing file. balanced : bool - Indicates if you want a balanced parallelization or not. + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. times : list, None List of times to substitute the current ones while creation. @@ -261,7 +261,7 @@ class LCCNes(Nes): ) return projection - + def _get_projection_data(self, create_nes, **kwargs): """ Retrieves projection data based on grid details. @@ -467,12 +467,12 @@ class LCCNes(Nes): # Get edges for regular coordinates grid_edge_lon_data, grid_edge_lat_data = self.projection(x_grid_edge, y_grid_edge, inverse=True) - + # Create grid outline by stacking the edges in both coordinates model_grid_outline = vstack((grid_edge_lon_data, grid_edge_lat_data)).T grid_edge_lat = {"data": model_grid_outline[:, 1]} grid_edge_lon = {"data": model_grid_outline[:, 0]} - + return grid_edge_lat, grid_edge_lon # noinspection DuplicatedCode @@ -507,7 +507,7 @@ class LCCNes(Nes): :]} return None - + @staticmethod def _set_var_crs(var): """ @@ -520,7 +520,7 @@ class LCCNes(Nes): """ var.grid_mapping = "Lambert_Conformal" - var.coordinates = "lat lon" + # var.coordinates = "lat lon" return None @@ -567,13 +567,13 @@ class LCCNes(Nes): def create_shapefile(self): """ Create spatial GeoDataFrame (shapefile). - + Returns ------- shapefile : GeoPandasDataFrame Shapefile dataframe. """ - + if self.shapefile is None: # Get latitude and longitude cell boundaries @@ -594,7 +594,7 @@ class LCCNes(Nes): (aux_b_lon[i, 2], aux_b_lat[i, 2]), (aux_b_lon[i, 3], aux_b_lat[i, 3]), (aux_b_lon[i, 0], aux_b_lat[i, 0])])) - + # Create dataframe containing all polygons fids = self.get_fids() gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=geometry, crs="EPSG:4326") @@ -602,7 +602,7 @@ class LCCNes(Nes): else: gdf = self.shapefile - + return gdf # noinspection DuplicatedCode @@ -615,14 +615,14 @@ class LCCNes(Nes): centroids_gdf: GeoPandasDataFrame Centroids dataframe. """ - + # Get centroids from coordinates centroids = [] for lat_ind in range(0, self.lon["data"].shape[0]): for lon_ind in range(0, self.lon["data"].shape[1]): - centroids.append(Point(self.lon["data"][lat_ind, lon_ind], + centroids.append(Point(self.lon["data"][lat_ind, lon_ind], self.lat["data"][lat_ind, lon_ind])) - + # Create dataframe containing all points fids = self.get_fids() centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=centroids, crs="EPSG:4326") diff --git a/nes/nc_projections/mercator_nes.py b/nes/nc_projections/mercator_nes.py index 520f9bb..ef63cb7 100644 --- a/nes/nc_projections/mercator_nes.py +++ b/nes/nc_projections/mercator_nes.py @@ -57,7 +57,7 @@ class MercatorNes(Nes): avoid_last_hours : int Number of hours to remove from last time steps. balanced : bool - Indicates if you want a balanced parallelization or not. + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. first_level : int Index of the first level to use. @@ -86,7 +86,7 @@ class MercatorNes(Nes): # Complete dimensions self._full_y = self._get_coordinate_dimension("y") self._full_x = self._get_coordinate_dimension("x") - + # Dimensions screening self.y = self._get_coordinate_values(self.get_full_y(), "Y") self.x = self._get_coordinate_values(self.get_full_x(), "X") @@ -131,7 +131,7 @@ class MercatorNes(Nes): create_nes : bool Indicates if you want to create the object from scratch (True) or through an existing file. balanced : bool - Indicates if you want a balanced parallelization or not. + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. times : list, None List of times to substitute the current ones while creation. @@ -252,7 +252,7 @@ class MercatorNes(Nes): b=self.earth_radius[0], lat_ts=float64(self.projection_data["standard_parallel"]), lon_0=float64(self.projection_data["longitude_of_projection_origin"]),) - + return projection # noinspection DuplicatedCode @@ -449,12 +449,12 @@ class MercatorNes(Nes): # Get edges for regular coordinates grid_edge_lon_data, grid_edge_lat_data = self.projection(x_grid_edge, y_grid_edge, inverse=True) - + # Create grid outline by stacking the edges in both coordinates model_grid_outline = vstack((grid_edge_lon_data, grid_edge_lat_data)).T grid_edge_lat = {"data": model_grid_outline[:, 1]} grid_edge_lon = {"data": model_grid_outline[:, 0]} - + return grid_edge_lat, grid_edge_lon # noinspection DuplicatedCode @@ -480,15 +480,15 @@ class MercatorNes(Nes): # Obtain regular coordinates bounds self.set_full_latitudes_boundaries({"data": deepcopy(lat_bnds)}) self.lat_bnds = {"data": lat_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :]} self.set_full_longitudes_boundaries({"data": deepcopy(lon_bnds)}) self.lon_bnds = {"data": lon_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :]} return None - + @staticmethod def _set_var_crs(var): """ @@ -499,9 +499,9 @@ class MercatorNes(Nes): var : Variable netCDF4-python variable object. """ - + var.grid_mapping = "mercator" - var.coordinates = "lat lon" + # var.coordinates = "lat lon" return None @@ -520,7 +520,7 @@ class MercatorNes(Nes): mapping.grid_mapping_name = self.projection_data["grid_mapping_name"] mapping.standard_parallel = self.projection_data["standard_parallel"] mapping.longitude_of_projection_origin = self.projection_data["longitude_of_projection_origin"] - + return None def to_grib2(self, path, grib_keys, grib_template_path, lat_flip=False, info=False): @@ -540,7 +540,7 @@ class MercatorNes(Nes): info : bool Indicates if you want to print extra information during the process. """ - + raise NotImplementedError("Grib2 format cannot be written in a Mercator projection.") # noinspection DuplicatedCode @@ -555,7 +555,7 @@ class MercatorNes(Nes): """ if self.shapefile is None: - + # Get latitude and longitude cell boundaries if self.lat_bnds is None or self.lon_bnds is None: self.create_spatial_bounds() @@ -574,7 +574,7 @@ class MercatorNes(Nes): (aux_b_lon[i, 2], aux_b_lat[i, 2]), (aux_b_lon[i, 3], aux_b_lat[i, 3]), (aux_b_lon[i, 0], aux_b_lat[i, 0])])) - + # Create dataframe containing all polygons fids = self.get_fids() gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=geometry, crs="EPSG:4326") @@ -582,7 +582,7 @@ class MercatorNes(Nes): else: gdf = self.shapefile - + return gdf # noinspection DuplicatedCode @@ -595,14 +595,14 @@ class MercatorNes(Nes): centroids_gdf: GeoPandasDataFrame Centroids dataframe. """ - + # Get centroids from coordinates centroids = [] for lat_ind in range(0, self.lon["data"].shape[0]): for lon_ind in range(0, self.lon["data"].shape[1]): - centroids.append(Point(self.lon["data"][lat_ind, lon_ind], + centroids.append(Point(self.lon["data"][lat_ind, lon_ind], self.lat["data"][lat_ind, lon_ind])) - + # Create dataframe containing all points fids = self.get_fids() centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=centroids, crs="EPSG:4326") diff --git a/nes/nc_projections/points_nes.py b/nes/nc_projections/points_nes.py index 29022b5..c63e9d5 100644 --- a/nes/nc_projections/points_nes.py +++ b/nes/nc_projections/points_nes.py @@ -62,7 +62,7 @@ class PointsNes(Nes): create_nes : bool Indicates if you want to create the object from scratch (True) or through an existing file. balanced : bool - Indicates if you want a balanced parallelization or not. + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. times : list, None List of times to substitute the current ones while creation. @@ -123,7 +123,7 @@ class PointsNes(Nes): create_nes : bool Indicates if you want to create the object from scratch (True) or through an existing file. balanced : bool - Indicates if you want a balanced parallelization or not. + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. times : list, None List of times to substitute the current ones while creation. @@ -466,12 +466,12 @@ class PointsNes(Nes): var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value except IndexError: - out_shape = var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + out_shape = var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( out_shape, att_value.shape)) except ValueError: - out_shape = var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + out_shape = var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( out_shape, att_value.shape)) @@ -596,7 +596,7 @@ class PointsNes(Nes): def to_providentia(self, model_centre_lon, model_centre_lat, grid_edge_lon, grid_edge_lat): """ Transform a PointsNes into a PointsNesProvidentia object - + Returns ---------- points_nes_providentia : nes.Nes @@ -750,6 +750,6 @@ class PointsNes(Nes): var : Variable netCDF4-python variable object. """ - var.coordinates = "lat lon" + # var.coordinates = "lat lon" return None diff --git a/nes/nc_projections/rotated_nes.py b/nes/nc_projections/rotated_nes.py index c5c3794..e1647ea 100644 --- a/nes/nc_projections/rotated_nes.py +++ b/nes/nc_projections/rotated_nes.py @@ -65,7 +65,7 @@ class RotatedNes(Nes): create_nes : bool Indicates if you want to create the object from scratch (True) or through an existing file. balanced : bool - Indicates if you want a balanced parallelization or not. + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. times : list, None List of times to substitute the current ones while creation. @@ -260,7 +260,7 @@ class RotatedNes(Nes): o_lat_p=float64(self.projection_data["grid_north_pole_latitude"]), o_lon_p=float64(self.projection_data["grid_north_pole_longitude"]), ) - + return projection # noinspection DuplicatedCode @@ -532,13 +532,13 @@ class RotatedNes(Nes): # Get edges for regular coordinates grid_edge_lon_data, grid_edge_lat_data = self.rotated2latlon(rlon_grid_edge, rlat_grid_edge) - + # Create grid outline by stacking the edges in both coordinates model_grid_outline = vstack((grid_edge_lon_data, grid_edge_lat_data)).T grid_edge_lat = {"data": model_grid_outline[:, 1]} grid_edge_lon = {"data": model_grid_outline[:, 0]} - + return grid_edge_lat, grid_edge_lon # noinspection DuplicatedCode @@ -586,7 +586,7 @@ class RotatedNes(Nes): """ var.grid_mapping = "rotated_pole" - var.coordinates = "lat lon" + # var.coordinates = "lat lon" return None @@ -645,11 +645,11 @@ class RotatedNes(Nes): self.create_spatial_bounds() # Reshape arrays to create geometry - aux_b_lats = self.lat_bnds["data"].reshape((self.lat_bnds["data"].shape[0] * self.lat_bnds["data"].shape[1], + aux_b_lats = self.lat_bnds["data"].reshape((self.lat_bnds["data"].shape[0] * self.lat_bnds["data"].shape[1], self.lat_bnds["data"].shape[2])) - aux_b_lons = self.lon_bnds["data"].reshape((self.lon_bnds["data"].shape[0] * self.lon_bnds["data"].shape[1], + aux_b_lons = self.lon_bnds["data"].reshape((self.lon_bnds["data"].shape[0] * self.lon_bnds["data"].shape[1], self.lon_bnds["data"].shape[2])) - + # Get polygons from bounds geometry = [] for i in range(aux_b_lons.shape[0]): @@ -658,7 +658,7 @@ class RotatedNes(Nes): (aux_b_lons[i, 2], aux_b_lats[i, 2]), (aux_b_lons[i, 3], aux_b_lats[i, 3]), (aux_b_lons[i, 0], aux_b_lats[i, 0])])) - + # Create dataframe cointaining all polygons fids = self.get_fids() gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=geometry, crs="EPSG:4326") @@ -679,16 +679,16 @@ class RotatedNes(Nes): centroids_gdf: GeoPandasDataFrame Centroids dataframe. """ - + # Get centroids from coordinates centroids = [] for lat_ind in range(0, self.lon["data"].shape[0]): for lon_ind in range(0, self.lon["data"].shape[1]): - centroids.append(Point(self.lon["data"][lat_ind, lon_ind], + centroids.append(Point(self.lon["data"][lat_ind, lon_ind], self.lat["data"][lat_ind, lon_ind])) - + # Create dataframe cointaining all points fids = self.get_fids() centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=centroids, crs="EPSG:4326") - + return centroids_gdf -- GitLab From d85c882f086a3af844a3e9ed5fff06da56779f35 Mon Sep 17 00:00:00 2001 From: pserrano Date: Tue, 4 Feb 2025 17:54:38 +0100 Subject: [PATCH 13/33] Uploaded create fake network tutorial --- .../2.Creation/2.10.Create_Fake_Network.ipynb | 407 ++++++++++++++++++ 1 file changed, 407 insertions(+) create mode 100644 tutorials/2.Creation/2.10.Create_Fake_Network.ipynb diff --git a/tutorials/2.Creation/2.10.Create_Fake_Network.ipynb b/tutorials/2.Creation/2.10.Create_Fake_Network.ipynb new file mode 100644 index 0000000..bd14b86 --- /dev/null +++ b/tutorials/2.Creation/2.10.Create_Fake_Network.ipynb @@ -0,0 +1,407 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f685678b-87d0-4ede-b908-1831e2461390", + "metadata": {}, + "source": [ + "# How to create a fake network" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "eeb44561-b528-4b25-91fa-785b86a24f67", + "metadata": {}, + "outputs": [], + "source": [ + "import nes\n", + "import numpy as np\n", + "import os" + ] + }, + { + "cell_type": "markdown", + "id": "f6a3f5e7-d663-4e0e-aa2e-d820b8d86347", + "metadata": {}, + "source": [ + "Function to create an empty array." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "d24d3e4a-d212-41ed-93a2-bb6fde334ecb", + "metadata": {}, + "outputs": [], + "source": [ + "def create_empty_array(shape):\n", + " empty_array = np.empty(shape)\n", + " empty_array[:] = np.nan\n", + " return empty_array" + ] + }, + { + "cell_type": "markdown", + "id": "705fb851-d65d-45ef-bd61-15a80c87e9ef", + "metadata": {}, + "source": [ + "This is where the network will be saved:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "e412b55a-f12c-4f54-a38f-b932ae207188", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "working_directory = os.getcwd()\n", + "working_directory" + ] + }, + { + "cell_type": "markdown", + "id": "2c4f5b54-a088-4bef-ab79-ecb16dc50a67", + "metadata": {}, + "source": [ + "## 1. Set Up information" + ] + }, + { + "cell_type": "markdown", + "id": "6e53f7e1-eb93-4275-9872-fbf6c76466e9", + "metadata": {}, + "source": [ + "### Introduce the original network directory" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "b4181143-e151-4178-bf6b-b06596a0eb98", + "metadata": {}, + "outputs": [], + "source": [ + "path = \"/gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly\"" + ] + }, + { + "cell_type": "markdown", + "id": "0c958071-0ca6-4d01-b356-20488aa38ed1", + "metadata": {}, + "source": [ + "### Initialise the species, the original year and the fake network year" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "240419bc-c1ff-4e2a-923f-ed732d374707", + "metadata": {}, + "outputs": [], + "source": [ + "species = ['sconco3','pm2p5','pm10','sconcno2','sconcco']\n", + "og_year = 2005\n", + "fake_year = 2026" + ] + }, + { + "cell_type": "markdown", + "id": "00e087ec-6751-4cca-b2fd-4c1c50728151", + "metadata": {}, + "source": [ + "## 2. Iterate through the loop" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "108d4fbc-4663-4783-98f2-fc8690f8f00b", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconco3/sconco3_200501.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconco3/sconco3_202601.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconco3/sconco3_200502.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconco3/sconco3_202602.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconco3/sconco3_200503.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconco3/sconco3_202603.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconco3/sconco3_200504.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconco3/sconco3_202604.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconco3/sconco3_200505.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconco3/sconco3_202605.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconco3/sconco3_200506.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconco3/sconco3_202606.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconco3/sconco3_200507.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconco3/sconco3_202607.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconco3/sconco3_200508.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconco3/sconco3_202608.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconco3/sconco3_200509.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconco3/sconco3_202609.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconco3/sconco3_200510.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconco3/sconco3_202610.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconco3/sconco3_200511.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconco3/sconco3_202611.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconco3/sconco3_200512.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconco3/sconco3_202612.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm2p5/pm2p5_200501.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm2p5/pm2p5_202601.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm2p5/pm2p5_200502.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm2p5/pm2p5_202602.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm2p5/pm2p5_200503.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm2p5/pm2p5_202603.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm2p5/pm2p5_200504.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm2p5/pm2p5_202604.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm2p5/pm2p5_200505.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm2p5/pm2p5_202605.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm2p5/pm2p5_200506.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm2p5/pm2p5_202606.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm2p5/pm2p5_200507.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm2p5/pm2p5_202607.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm2p5/pm2p5_200508.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm2p5/pm2p5_202608.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm2p5/pm2p5_200509.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm2p5/pm2p5_202609.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm2p5/pm2p5_200510.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm2p5/pm2p5_202610.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm2p5/pm2p5_200511.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm2p5/pm2p5_202611.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm2p5/pm2p5_200512.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm2p5/pm2p5_202612.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm10/pm10_200501.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm10/pm10_202601.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm10/pm10_200502.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm10/pm10_202602.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm10/pm10_200503.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm10/pm10_202603.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm10/pm10_200504.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm10/pm10_202604.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm10/pm10_200505.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm10/pm10_202605.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm10/pm10_200506.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm10/pm10_202606.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm10/pm10_200507.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm10/pm10_202607.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm10/pm10_200508.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm10/pm10_202608.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm10/pm10_200509.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm10/pm10_202609.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm10/pm10_200510.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm10/pm10_202610.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm10/pm10_200511.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm10/pm10_202611.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/pm10/pm10_200512.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/pm10/pm10_202612.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcno2/sconcno2_200501.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcno2/sconcno2_202601.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcno2/sconcno2_200502.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcno2/sconcno2_202602.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcno2/sconcno2_200503.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcno2/sconcno2_202603.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcno2/sconcno2_200504.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcno2/sconcno2_202604.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcno2/sconcno2_200505.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcno2/sconcno2_202605.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcno2/sconcno2_200506.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcno2/sconcno2_202606.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcno2/sconcno2_200507.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcno2/sconcno2_202607.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcno2/sconcno2_200508.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcno2/sconcno2_202608.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcno2/sconcno2_200509.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcno2/sconcno2_202609.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcno2/sconcno2_200510.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcno2/sconcno2_202610.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcno2/sconcno2_200511.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcno2/sconcno2_202611.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcno2/sconcno2_200512.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcno2/sconcno2_202612.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcco/sconcco_200501.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcco/sconcco_202601.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcco/sconcco_200502.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcco/sconcco_202602.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcco/sconcco_200503.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcco/sconcco_202603.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcco/sconcco_200504.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcco/sconcco_202604.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcco/sconcco_200505.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcco/sconcco_202605.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcco/sconcco_200506.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcco/sconcco_202606.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcco/sconcco_200507.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcco/sconcco_202607.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcco/sconcco_200508.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcco/sconcco_202608.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcco/sconcco_200509.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcco/sconcco_202609.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcco/sconcco_200510.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcco/sconcco_202610.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcco/sconcco_200511.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcco/sconcco_202611.nc\n", + "\n", + "Original: /gpfs/projects/bsc32/AC_cache/obs/ghost/GHOST/1.5/hourly/sconcco/sconcco_200512.nc\n", + "Saved: /gpfs/scratch/bsc32/bsc032388/nes/tutorials/2.Creation/fake/GHOST_2005/hourly/sconcco/sconcco_202612.nc\n" + ] + } + ], + "source": [ + "for spec in species:\n", + " # get the species directory\n", + " spec_path = os.path.join(path,spec)\n", + " if os.path.isdir(spec_path):\n", + " for month in range(1,13):\n", + " str_month = str(month).zfill(2)\n", + "\n", + " # get original nc file path\n", + " date_path = os.path.join(spec_path, f\"{spec}_{og_year}{str_month}.nc\")\n", + "\n", + " print(f\"\\nOriginal: {date_path}\")\n", + "\n", + " # load original nc file\n", + " ness_nc_file = nes.open_netcdf(path=date_path, info=False)\n", + " ness_nc_file.keep_vars([spec,\"station_name\",\"station_reference\"])\n", + " ness_nc_file.load()\n", + "\n", + " # get the parameters data\n", + " lat = ness_nc_file.lat[\"data\"]\n", + " lon = ness_nc_file.lon[\"data\"]\n", + " times = ness_nc_file.time\n", + "\n", + " # get the variables data\n", + " station_name_data = ness_nc_file.variables[\"station_name\"][\"data\"]\n", + " station_reference_data = ness_nc_file.variables[\"station_reference\"][\"data\"]\n", + "\n", + " # get the variable data shape\n", + " species_shape = ness_nc_file.variables[spec][\"data\"].shape\n", + " \n", + " # initialize nes\n", + " nessy = nes.create_nes(comm=None, info=False, projection=None, parallel_method='X', lat=lat, lon=lon, times=times)\n", + " nessy.variables = {'station_name': {'data': station_name_data,\n", + " 'dimensions': ('station',),\n", + " 'dtype': str},\n", + " 'station_reference': {'data': station_reference_data,\n", + " 'dimensions': ('station',),\n", + " 'dtype': str},\n", + " spec: {'data': create_empty_array(species_shape).T,\n", + " 'dimensions': ('time', 'station',),\n", + " 'dtype': float}}\n", + "\n", + " # create the directory\n", + " netcdf_path = f'fake/GHOST_2005/hourly/{spec}/'\n", + " if not os.path.exists(os.path.dirname(netcdf_path)):\n", + " os.makedirs(os.path.dirname(netcdf_path))\n", + "\n", + " # To run Providentia, this folder should be moved to:\n", + " # '/esarchive/obs/' as in '/esarchive/obs/fake/GHOST_2005/hourly/species'\n", + "\n", + " # get the file name\n", + " output_file = f'{spec}_{fake_year}{str_month}.nc'\n", + "\n", + " # save nc file\n", + " nessy.set_strlen(75)\n", + " nessy.to_netcdf(os.path.join(netcdf_path,output_file), info=False) \n", + "\n", + " print(f\"Saved: {os.path.join(working_directory,netcdf_path,output_file)}\")\n", + "\n", + " del nessy" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} -- GitLab From 999fe65d2e3ebf596d4729ba79d4cfba2fcee36f Mon Sep 17 00:00:00 2001 From: Johanna Gehlen Date: Wed, 5 Feb 2025 13:09:41 +0100 Subject: [PATCH 14/33] longitude flip function tested locally --- nes/nc_projections/default_nes.py | 46 ++++++++----------------------- 1 file changed, 12 insertions(+), 34 deletions(-) diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 5faf81e..b470eb8 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -4291,7 +4291,6 @@ class Nes(object): ------- ValueError: If any variable's data is missing. """ - # check if data is loaded for variable in self.variables.keys(): if self.variables[variable]["data"] is None: raise ValueError(f"All variables data must be loaded before using this function. Data for {variable} is not loaded.") @@ -4310,28 +4309,13 @@ class Nes(object): ValueError: If longitudes are already in [-180, 180] format or an unrecognized format. ValueError: If data is not fully loaded before conversion. """ - # make this only available in serial. if self.comm.size > 1: raise ValueError("This method is currently only available in serial.") - # check if the longitude flip is needed - if not self._detect_longitude_format(self): + if not self._detect_longitude_format(): raise ValueError("Longitudes are already in [-180, 180] format or another unrecognised format.") - # check if data is loaded - self._check_if_data_is_loaded(self) - - # make a copy - dst = deepcopy(self) - - # copy global attributes and dimensions - dst.global_attrs = self.global_attrs - dst.lat = self.lat - dst.lon = self.lon - dst.time = self.time - dst.lat_bnds = self.lat_bnds - dst.lon_bnds = self.lon_bnds - dst.lev = self.lev + self._check_if_data_is_loaded() # adjust longitude values lon = self.lon @@ -4339,11 +4323,11 @@ class Nes(object): lon_data = lon_data % 360 lon_data[lon_data > 180] -= 360 sorted_indices = argsort(lon_data) - dst.lon["data"] = lon_data[sorted_indices] - dst.set_full_longitudes(dst.lon) + self.lon["data"] = lon_data[sorted_indices] + self.set_full_longitudes(self.lon) # adjust longitude bounds - lon_bnds_data = dst.lon_bnds["data"] + lon_bnds_data = self.lon_bnds["data"] lon_bnds_data = lon_bnds_data % 360 lon_bnds_data[lon_bnds_data > 180] -= 360 lon_bnds_sorted = lon_bnds_data[sorted_indices] @@ -4353,21 +4337,15 @@ class Nes(object): elif (lon_bnds_sorted[-1][0] > lon_bnds_sorted[-1][1]) and (isclose(lon_bnds_sorted[-1][1], -180)): lon_bnds_sorted[-1][1] = 180 - dst.lon_bnds["data"] = lon_bnds_sorted - dst.set_full_longitudes_boundaries(dst.lon_bnds) + self.lon_bnds["data"] = lon_bnds_sorted + self.set_full_longitudes_boundaries(self.lon_bnds) - # copy and adjust remaining variables - for name, var in dst.variables.items(): - dst.load(name) # TODO: is it necessary? - dst.variables[name] = var + # adjust variables which have longitude in their dimensions + for name, var in self.variables.items(): if "longitude" in var["dimensions"]: data = var["data"] reordered_data = take(data, sorted_indices, axis=3) - dst.variables[name]["data"] = reordered_data - else: - dst.variables[name] = var + self.variables[name]["data"] = reordered_data - # save the file - tmp_file_path = "tmp.nc" - dst.to_netcdf(tmp_file_path) - os.path.replace(tmp_file_path, path) + self.to_netcdf(path) + return self -- GitLab From 87bed084556e16cfd59d5bf8dd70cc4d8a0e7fa0 Mon Sep 17 00:00:00 2001 From: Johanna Gehlen Date: Wed, 5 Feb 2025 14:20:56 +0100 Subject: [PATCH 15/33] comment out coordinates metadata --- nes/nc_projections/latlon_nes.py | 12 +++++----- nes/nc_projections/lcc_nes.py | 32 ++++++++++++------------- nes/nc_projections/mercator_nes.py | 38 +++++++++++++++--------------- nes/nc_projections/points_nes.py | 12 +++++----- nes/nc_projections/rotated_nes.py | 26 ++++++++++---------- 5 files changed, 60 insertions(+), 60 deletions(-) diff --git a/nes/nc_projections/latlon_nes.py b/nes/nc_projections/latlon_nes.py index 35d68c8..d7902f8 100644 --- a/nes/nc_projections/latlon_nes.py +++ b/nes/nc_projections/latlon_nes.py @@ -50,12 +50,12 @@ class LatLonNes(Nes): create_nes : bool Indicates if you want to create the object from scratch (True) or through an existing file. balanced : bool - Indicates if you want a balanced parallelization or not. + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. times : list, None List of times to substitute the current ones while creation. """ - + super(LatLonNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, parallel_method=parallel_method, balanced=balanced, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, @@ -78,7 +78,7 @@ class LatLonNes(Nes): @staticmethod def new(comm=None, path=None, info=False, dataset=None, parallel_method="Y", - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ Initialize the Nes class. @@ -107,14 +107,14 @@ class LatLonNes(Nes): create_nes : bool Indicates if you want to create the object from scratch (True) or through an existing file. balanced : bool - Indicates if you want a balanced parallelization or not. + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. times : list, None List of times to substitute the current ones while creation. """ new = LatLonNes(comm=comm, path=path, info=info, dataset=dataset, - parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, + parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, balanced=balanced, times=times, **kwargs) @@ -352,7 +352,7 @@ class LatLonNes(Nes): """ var.grid_mapping = "crs" - var.coordinates = "lat lon" + # var.coordinates = "lat lon" return None diff --git a/nes/nc_projections/lcc_nes.py b/nes/nc_projections/lcc_nes.py index f9eda6e..2aea912 100644 --- a/nes/nc_projections/lcc_nes.py +++ b/nes/nc_projections/lcc_nes.py @@ -63,7 +63,7 @@ class LCCNes(Nes): create_nes : bool Indicates if you want to create the object from scratch (True) or through an existing file. balanced : bool - Indicates if you want a balanced parallelization or not. + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. times : list, None List of times to substitute the current ones while creation. @@ -89,7 +89,7 @@ class LCCNes(Nes): # Dimensions screening self.y = self._get_coordinate_values(self.get_full_y(), "Y") self.x = self._get_coordinate_values(self.get_full_x(), "X") - + # Set axis limits for parallel writing self.write_axis_limits = self._get_write_axis_limits() @@ -101,7 +101,7 @@ class LCCNes(Nes): @staticmethod def new(comm=None, path=None, info=False, dataset=None, parallel_method="Y", - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ Initialize the Nes class. @@ -130,7 +130,7 @@ class LCCNes(Nes): create_nes : bool Indicates if you want to create the object from scratch (True) or through an existing file. balanced : bool - Indicates if you want a balanced parallelization or not. + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. times : list, None List of times to substitute the current ones while creation. @@ -261,7 +261,7 @@ class LCCNes(Nes): ) return projection - + def _get_projection_data(self, create_nes, **kwargs): """ Retrieves projection data based on grid details. @@ -467,12 +467,12 @@ class LCCNes(Nes): # Get edges for regular coordinates grid_edge_lon_data, grid_edge_lat_data = self.projection(x_grid_edge, y_grid_edge, inverse=True) - + # Create grid outline by stacking the edges in both coordinates model_grid_outline = vstack((grid_edge_lon_data, grid_edge_lat_data)).T grid_edge_lat = {"data": model_grid_outline[:, 1]} grid_edge_lon = {"data": model_grid_outline[:, 0]} - + return grid_edge_lat, grid_edge_lon # noinspection DuplicatedCode @@ -507,7 +507,7 @@ class LCCNes(Nes): :]} return None - + @staticmethod def _set_var_crs(var): """ @@ -520,7 +520,7 @@ class LCCNes(Nes): """ var.grid_mapping = "Lambert_Conformal" - var.coordinates = "lat lon" + # var.coordinates = "lat lon" return None @@ -567,13 +567,13 @@ class LCCNes(Nes): def create_shapefile(self): """ Create spatial GeoDataFrame (shapefile). - + Returns ------- shapefile : GeoPandasDataFrame Shapefile dataframe. """ - + if self.shapefile is None: # Get latitude and longitude cell boundaries @@ -594,7 +594,7 @@ class LCCNes(Nes): (aux_b_lon[i, 2], aux_b_lat[i, 2]), (aux_b_lon[i, 3], aux_b_lat[i, 3]), (aux_b_lon[i, 0], aux_b_lat[i, 0])])) - + # Create dataframe containing all polygons fids = self.get_fids() gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=geometry, crs="EPSG:4326") @@ -602,7 +602,7 @@ class LCCNes(Nes): else: gdf = self.shapefile - + return gdf # noinspection DuplicatedCode @@ -615,14 +615,14 @@ class LCCNes(Nes): centroids_gdf: GeoPandasDataFrame Centroids dataframe. """ - + # Get centroids from coordinates centroids = [] for lat_ind in range(0, self.lon["data"].shape[0]): for lon_ind in range(0, self.lon["data"].shape[1]): - centroids.append(Point(self.lon["data"][lat_ind, lon_ind], + centroids.append(Point(self.lon["data"][lat_ind, lon_ind], self.lat["data"][lat_ind, lon_ind])) - + # Create dataframe containing all points fids = self.get_fids() centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=centroids, crs="EPSG:4326") diff --git a/nes/nc_projections/mercator_nes.py b/nes/nc_projections/mercator_nes.py index 520f9bb..ef63cb7 100644 --- a/nes/nc_projections/mercator_nes.py +++ b/nes/nc_projections/mercator_nes.py @@ -57,7 +57,7 @@ class MercatorNes(Nes): avoid_last_hours : int Number of hours to remove from last time steps. balanced : bool - Indicates if you want a balanced parallelization or not. + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. first_level : int Index of the first level to use. @@ -86,7 +86,7 @@ class MercatorNes(Nes): # Complete dimensions self._full_y = self._get_coordinate_dimension("y") self._full_x = self._get_coordinate_dimension("x") - + # Dimensions screening self.y = self._get_coordinate_values(self.get_full_y(), "Y") self.x = self._get_coordinate_values(self.get_full_x(), "X") @@ -131,7 +131,7 @@ class MercatorNes(Nes): create_nes : bool Indicates if you want to create the object from scratch (True) or through an existing file. balanced : bool - Indicates if you want a balanced parallelization or not. + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. times : list, None List of times to substitute the current ones while creation. @@ -252,7 +252,7 @@ class MercatorNes(Nes): b=self.earth_radius[0], lat_ts=float64(self.projection_data["standard_parallel"]), lon_0=float64(self.projection_data["longitude_of_projection_origin"]),) - + return projection # noinspection DuplicatedCode @@ -449,12 +449,12 @@ class MercatorNes(Nes): # Get edges for regular coordinates grid_edge_lon_data, grid_edge_lat_data = self.projection(x_grid_edge, y_grid_edge, inverse=True) - + # Create grid outline by stacking the edges in both coordinates model_grid_outline = vstack((grid_edge_lon_data, grid_edge_lat_data)).T grid_edge_lat = {"data": model_grid_outline[:, 1]} grid_edge_lon = {"data": model_grid_outline[:, 0]} - + return grid_edge_lat, grid_edge_lon # noinspection DuplicatedCode @@ -480,15 +480,15 @@ class MercatorNes(Nes): # Obtain regular coordinates bounds self.set_full_latitudes_boundaries({"data": deepcopy(lat_bnds)}) self.lat_bnds = {"data": lat_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :]} self.set_full_longitudes_boundaries({"data": deepcopy(lon_bnds)}) self.lon_bnds = {"data": lon_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :]} return None - + @staticmethod def _set_var_crs(var): """ @@ -499,9 +499,9 @@ class MercatorNes(Nes): var : Variable netCDF4-python variable object. """ - + var.grid_mapping = "mercator" - var.coordinates = "lat lon" + # var.coordinates = "lat lon" return None @@ -520,7 +520,7 @@ class MercatorNes(Nes): mapping.grid_mapping_name = self.projection_data["grid_mapping_name"] mapping.standard_parallel = self.projection_data["standard_parallel"] mapping.longitude_of_projection_origin = self.projection_data["longitude_of_projection_origin"] - + return None def to_grib2(self, path, grib_keys, grib_template_path, lat_flip=False, info=False): @@ -540,7 +540,7 @@ class MercatorNes(Nes): info : bool Indicates if you want to print extra information during the process. """ - + raise NotImplementedError("Grib2 format cannot be written in a Mercator projection.") # noinspection DuplicatedCode @@ -555,7 +555,7 @@ class MercatorNes(Nes): """ if self.shapefile is None: - + # Get latitude and longitude cell boundaries if self.lat_bnds is None or self.lon_bnds is None: self.create_spatial_bounds() @@ -574,7 +574,7 @@ class MercatorNes(Nes): (aux_b_lon[i, 2], aux_b_lat[i, 2]), (aux_b_lon[i, 3], aux_b_lat[i, 3]), (aux_b_lon[i, 0], aux_b_lat[i, 0])])) - + # Create dataframe containing all polygons fids = self.get_fids() gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=geometry, crs="EPSG:4326") @@ -582,7 +582,7 @@ class MercatorNes(Nes): else: gdf = self.shapefile - + return gdf # noinspection DuplicatedCode @@ -595,14 +595,14 @@ class MercatorNes(Nes): centroids_gdf: GeoPandasDataFrame Centroids dataframe. """ - + # Get centroids from coordinates centroids = [] for lat_ind in range(0, self.lon["data"].shape[0]): for lon_ind in range(0, self.lon["data"].shape[1]): - centroids.append(Point(self.lon["data"][lat_ind, lon_ind], + centroids.append(Point(self.lon["data"][lat_ind, lon_ind], self.lat["data"][lat_ind, lon_ind])) - + # Create dataframe containing all points fids = self.get_fids() centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=centroids, crs="EPSG:4326") diff --git a/nes/nc_projections/points_nes.py b/nes/nc_projections/points_nes.py index 29022b5..c63e9d5 100644 --- a/nes/nc_projections/points_nes.py +++ b/nes/nc_projections/points_nes.py @@ -62,7 +62,7 @@ class PointsNes(Nes): create_nes : bool Indicates if you want to create the object from scratch (True) or through an existing file. balanced : bool - Indicates if you want a balanced parallelization or not. + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. times : list, None List of times to substitute the current ones while creation. @@ -123,7 +123,7 @@ class PointsNes(Nes): create_nes : bool Indicates if you want to create the object from scratch (True) or through an existing file. balanced : bool - Indicates if you want a balanced parallelization or not. + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. times : list, None List of times to substitute the current ones while creation. @@ -466,12 +466,12 @@ class PointsNes(Nes): var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value except IndexError: - out_shape = var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + out_shape = var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( out_shape, att_value.shape)) except ValueError: - out_shape = var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + out_shape = var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( out_shape, att_value.shape)) @@ -596,7 +596,7 @@ class PointsNes(Nes): def to_providentia(self, model_centre_lon, model_centre_lat, grid_edge_lon, grid_edge_lat): """ Transform a PointsNes into a PointsNesProvidentia object - + Returns ---------- points_nes_providentia : nes.Nes @@ -750,6 +750,6 @@ class PointsNes(Nes): var : Variable netCDF4-python variable object. """ - var.coordinates = "lat lon" + # var.coordinates = "lat lon" return None diff --git a/nes/nc_projections/rotated_nes.py b/nes/nc_projections/rotated_nes.py index c5c3794..e1647ea 100644 --- a/nes/nc_projections/rotated_nes.py +++ b/nes/nc_projections/rotated_nes.py @@ -65,7 +65,7 @@ class RotatedNes(Nes): create_nes : bool Indicates if you want to create the object from scratch (True) or through an existing file. balanced : bool - Indicates if you want a balanced parallelization or not. + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. times : list, None List of times to substitute the current ones while creation. @@ -260,7 +260,7 @@ class RotatedNes(Nes): o_lat_p=float64(self.projection_data["grid_north_pole_latitude"]), o_lon_p=float64(self.projection_data["grid_north_pole_longitude"]), ) - + return projection # noinspection DuplicatedCode @@ -532,13 +532,13 @@ class RotatedNes(Nes): # Get edges for regular coordinates grid_edge_lon_data, grid_edge_lat_data = self.rotated2latlon(rlon_grid_edge, rlat_grid_edge) - + # Create grid outline by stacking the edges in both coordinates model_grid_outline = vstack((grid_edge_lon_data, grid_edge_lat_data)).T grid_edge_lat = {"data": model_grid_outline[:, 1]} grid_edge_lon = {"data": model_grid_outline[:, 0]} - + return grid_edge_lat, grid_edge_lon # noinspection DuplicatedCode @@ -586,7 +586,7 @@ class RotatedNes(Nes): """ var.grid_mapping = "rotated_pole" - var.coordinates = "lat lon" + # var.coordinates = "lat lon" return None @@ -645,11 +645,11 @@ class RotatedNes(Nes): self.create_spatial_bounds() # Reshape arrays to create geometry - aux_b_lats = self.lat_bnds["data"].reshape((self.lat_bnds["data"].shape[0] * self.lat_bnds["data"].shape[1], + aux_b_lats = self.lat_bnds["data"].reshape((self.lat_bnds["data"].shape[0] * self.lat_bnds["data"].shape[1], self.lat_bnds["data"].shape[2])) - aux_b_lons = self.lon_bnds["data"].reshape((self.lon_bnds["data"].shape[0] * self.lon_bnds["data"].shape[1], + aux_b_lons = self.lon_bnds["data"].reshape((self.lon_bnds["data"].shape[0] * self.lon_bnds["data"].shape[1], self.lon_bnds["data"].shape[2])) - + # Get polygons from bounds geometry = [] for i in range(aux_b_lons.shape[0]): @@ -658,7 +658,7 @@ class RotatedNes(Nes): (aux_b_lons[i, 2], aux_b_lats[i, 2]), (aux_b_lons[i, 3], aux_b_lats[i, 3]), (aux_b_lons[i, 0], aux_b_lats[i, 0])])) - + # Create dataframe cointaining all polygons fids = self.get_fids() gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=geometry, crs="EPSG:4326") @@ -679,16 +679,16 @@ class RotatedNes(Nes): centroids_gdf: GeoPandasDataFrame Centroids dataframe. """ - + # Get centroids from coordinates centroids = [] for lat_ind in range(0, self.lon["data"].shape[0]): for lon_ind in range(0, self.lon["data"].shape[1]): - centroids.append(Point(self.lon["data"][lat_ind, lon_ind], + centroids.append(Point(self.lon["data"][lat_ind, lon_ind], self.lat["data"][lat_ind, lon_ind])) - + # Create dataframe cointaining all points fids = self.get_fids() centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=centroids, crs="EPSG:4326") - + return centroids_gdf -- GitLab From 2d36ba2f980c36e4bd962ea58c89f689f7d4af4b Mon Sep 17 00:00:00 2001 From: Johanna Gehlen Date: Mon, 10 Feb 2025 12:50:40 +0100 Subject: [PATCH 16/33] not saving the netcdf in the convert_longitudes function --- build/lib/nes/__init__.py | 13 + build/lib/nes/create_nes.py | 191 + build/lib/nes/load_nes.py | 340 ++ build/lib/nes/methods/__init__.py | 8 + build/lib/nes/methods/cell_measures.py | 265 + .../nes/methods/horizontal_interpolation.py | 762 +++ build/lib/nes/methods/spatial_join.py | 305 ++ .../lib/nes/methods/vertical_interpolation.py | 335 ++ build/lib/nes/nc_projections/__init__.py | 15 + build/lib/nes/nc_projections/default_nes.py | 4252 +++++++++++++++++ build/lib/nes/nc_projections/latlon_nes.py | 395 ++ build/lib/nes/nc_projections/lcc_nes.py | 630 +++ build/lib/nes/nc_projections/mercator_nes.py | 610 +++ build/lib/nes/nc_projections/points_nes.py | 755 +++ .../nes/nc_projections/points_nes_ghost.py | 818 ++++ .../nc_projections/points_nes_providentia.py | 650 +++ build/lib/nes/nc_projections/rotated_nes.py | 694 +++ .../nes/nc_projections/rotated_nested_nes.py | 147 + build/lib/nes/nes_formats/__init__.py | 9 + build/lib/nes/nes_formats/cams_ra_format.py | 219 + build/lib/nes/nes_formats/cmaq_format.py | 355 ++ build/lib/nes/nes_formats/monarch_format.py | 114 + build/lib/nes/nes_formats/wrf_chem_format.py | 398 ++ .../tests/1.1-test_read_write_projection.py | 221 + build/lib/tests/1.2-test_create_projection.py | 190 + build/lib/tests/1.3-test_selecting.py | 183 + build/lib/tests/2.1-test_spatial_join.py | 329 ++ build/lib/tests/2.2-test_create_shapefile.py | 201 + build/lib/tests/2.3-test_bounds.py | 275 ++ build/lib/tests/2.4-test_cell_area.py | 195 + build/lib/tests/3.1-test_vertical_interp.py | 108 + .../tests/3.2-test_horiz_interp_bilinear.py | 222 + .../3.3-test_horiz_interp_conservative.py | 248 + build/lib/tests/4.1-test_stats.py | 97 + build/lib/tests/4.2-test_sum.py | 76 + build/lib/tests/4.3-test_write_timestep.py | 151 + build/lib/tests/__init__.py | 0 build/lib/tests/unit/__init__.py | 0 build/lib/tests/unit/test_imports.py | 106 + nes/nc_projections/default_nes.py | 6 +- tests/2.5-test_longitude_conversion.py | 57 + tests/clean_output.sh | 1 + tests/run_scalability_tests_nord3v2.sh | 2 +- tests/test_bash.mn4.sh | 1 + tests/test_bash.mn5.sh | 1 + tests/test_bash.nord3v2.sh | 1 + 46 files changed, 14948 insertions(+), 3 deletions(-) create mode 100644 build/lib/nes/__init__.py create mode 100644 build/lib/nes/create_nes.py create mode 100644 build/lib/nes/load_nes.py create mode 100644 build/lib/nes/methods/__init__.py create mode 100644 build/lib/nes/methods/cell_measures.py create mode 100644 build/lib/nes/methods/horizontal_interpolation.py create mode 100644 build/lib/nes/methods/spatial_join.py create mode 100644 build/lib/nes/methods/vertical_interpolation.py create mode 100644 build/lib/nes/nc_projections/__init__.py create mode 100644 build/lib/nes/nc_projections/default_nes.py create mode 100644 build/lib/nes/nc_projections/latlon_nes.py create mode 100644 build/lib/nes/nc_projections/lcc_nes.py create mode 100644 build/lib/nes/nc_projections/mercator_nes.py create mode 100644 build/lib/nes/nc_projections/points_nes.py create mode 100644 build/lib/nes/nc_projections/points_nes_ghost.py create mode 100644 build/lib/nes/nc_projections/points_nes_providentia.py create mode 100644 build/lib/nes/nc_projections/rotated_nes.py create mode 100644 build/lib/nes/nc_projections/rotated_nested_nes.py create mode 100644 build/lib/nes/nes_formats/__init__.py create mode 100644 build/lib/nes/nes_formats/cams_ra_format.py create mode 100644 build/lib/nes/nes_formats/cmaq_format.py create mode 100644 build/lib/nes/nes_formats/monarch_format.py create mode 100644 build/lib/nes/nes_formats/wrf_chem_format.py create mode 100644 build/lib/tests/1.1-test_read_write_projection.py create mode 100644 build/lib/tests/1.2-test_create_projection.py create mode 100644 build/lib/tests/1.3-test_selecting.py create mode 100644 build/lib/tests/2.1-test_spatial_join.py create mode 100644 build/lib/tests/2.2-test_create_shapefile.py create mode 100644 build/lib/tests/2.3-test_bounds.py create mode 100644 build/lib/tests/2.4-test_cell_area.py create mode 100644 build/lib/tests/3.1-test_vertical_interp.py create mode 100644 build/lib/tests/3.2-test_horiz_interp_bilinear.py create mode 100644 build/lib/tests/3.3-test_horiz_interp_conservative.py create mode 100644 build/lib/tests/4.1-test_stats.py create mode 100644 build/lib/tests/4.2-test_sum.py create mode 100644 build/lib/tests/4.3-test_write_timestep.py create mode 100644 build/lib/tests/__init__.py create mode 100644 build/lib/tests/unit/__init__.py create mode 100644 build/lib/tests/unit/test_imports.py create mode 100644 tests/2.5-test_longitude_conversion.py diff --git a/build/lib/nes/__init__.py b/build/lib/nes/__init__.py new file mode 100644 index 0000000..1dcabe9 --- /dev/null +++ b/build/lib/nes/__init__.py @@ -0,0 +1,13 @@ +__date__ = "2024-10-07" +__version__ = "1.1.8" +__all__ = [ + 'open_netcdf', 'concatenate_netcdfs', 'create_nes', 'from_shapefile', 'calculate_geometry_area', 'Nes', 'LatLonNes', + 'LCCNes', 'RotatedNes', 'RotatedNestedNes', 'MercatorNes', 'PointsNesProvidentia', 'PointsNesGHOST', 'PointsNes' +] + +from .load_nes import open_netcdf, concatenate_netcdfs +# from .load_nes import open_raster +from .create_nes import create_nes, from_shapefile +from .methods.cell_measures import calculate_geometry_area +from .nc_projections import (Nes, LatLonNes, LCCNes, RotatedNes, RotatedNestedNes, MercatorNes, PointsNesProvidentia, + PointsNes, PointsNesGHOST) diff --git a/build/lib/nes/create_nes.py b/build/lib/nes/create_nes.py new file mode 100644 index 0000000..ce8b619 --- /dev/null +++ b/build/lib/nes/create_nes.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python + +import warnings +import sys +from netCDF4 import num2date +from mpi4py import MPI +from .nc_projections import PointsNes, LatLonNes, RotatedNes, RotatedNestedNes, LCCNes, MercatorNes + + +def create_nes(comm=None, info=False, projection=None, parallel_method="Y", balanced=False, + times=None, avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, + **kwargs): + """ + Create a Nes class from scratch. + + Parameters + ---------- + comm : MPI.Comm, optional + MPI Communicator. If None, uses MPI.COMM_WORLD. + info : bool, optional + Indicates if reading/writing info should be provided. Default is False. + projection : str, optional + The projection type. Accepted values are None, "regular", "global", "rotated", "rotated-nested", "lcc", + "mercator". + parallel_method : str, optional + The parallelization method to use. Default is "Y". Accepted values are ["X", "Y", "T"]. + balanced : bool, optional + Indicates if balanced parallelization is desired. Balanced datasets cannot be written in chunking mode. + Default is False. + times : list of datetime, optional + List of datetime objects representing the time dimension. If None, a default time array is created. + avoid_first_hours : int, optional + Number of hours to remove from the start of the time steps. Default is 0. + avoid_last_hours : int, optional + Number of hours to remove from the end of the time steps. Default is 0. + first_level : int, optional + Index of the first level to use. Default is 0. + last_level : int or None, optional + Index of the last level to use. If None, the last level is used. Default is None. + **kwargs : additional arguments + Additional parameters required for specific projections. + + Returns + ------- + nes : Nes + An instance of the Nes class based on the specified parameters and projection. + + Raises + ------ + ValueError + If any required projection-specific parameters are missing or if invalid parameters are provided. + NotImplementedError + If an unsupported parallel method or projection type is specified. + + Notes + ----- + The function dynamically creates an instance of a specific Nes subclass based on the provided projection. + The required parameters for each projection type are: + - None: ["lat", "lon"] + - "regular": ["lat_orig", "lon_orig", "inc_lat", "inc_lon", "n_lat", "n_lon"] + - "global": ["inc_lat", "inc_lon"] + - "rotated": ["centre_lat", "centre_lon", "west_boundary", "south_boundary", "inc_rlat", "inc_rlon"] + - "rotated-nested": ["parent_grid_path", "parent_ratio", "i_parent_start", "j_parent_start", "n_rlat", "n_rlon"] + - "lcc": ["lat_1", "lat_2", "lon_0", "lat_0", "nx", "ny", "inc_x", "inc_y", "x_0", "y_0"] + - "mercator": ["lat_ts", "lon_0", "nx", "ny", "inc_x", "inc_y", "x_0", "y_0"] + + Example + ------- + >>> nes = create_nes(projection="regular", lat_orig=0, lon_orig=0, inc_lat=1, inc_lon=1, n_lat=180, n_lon=360) + """ + + if comm is None: + comm = MPI.COMM_WORLD + else: + comm = comm + + # Create time array + if times is None: + units = "days since 1996-12-31 00:00:00" + calendar = "standard" + times = num2date([0], units=units, calendar=calendar) + times = [aux.replace(second=0, microsecond=0) for aux in times] + else: + if not isinstance(times, list): + times = list(times) + + # Check if the parameters that are required to create the object have been defined in kwargs + kwargs_list = [] + for name, value in kwargs.items(): + kwargs_list.append(name) + + if projection is None: + required_vars = ["lat", "lon"] + elif projection == "regular": + required_vars = ["lat_orig", "lon_orig", "inc_lat", "inc_lon", "n_lat", "n_lon"] + elif projection == "global": + required_vars = ["inc_lat", "inc_lon"] + elif projection == "rotated": + required_vars = ["centre_lat", "centre_lon", "west_boundary", "south_boundary", "inc_rlat", "inc_rlon"] + elif projection == "rotated-nested": + required_vars = ["parent_grid_path", "parent_ratio", "i_parent_start", "j_parent_start", "n_rlat", "n_rlon"] + elif projection == "lcc": + required_vars = ["lat_1", "lat_2", "lon_0", "lat_0", "nx", "ny", "inc_x", "inc_y", "x_0", "y_0"] + elif projection == "mercator": + required_vars = ["lat_ts", "lon_0", "nx", "ny", "inc_x", "inc_y", "x_0", "y_0"] + else: + raise ValueError("Unknown projection: {0}".format(projection)) + + for var in required_vars: + if var not in kwargs_list: + msg = "Variable {0} has not been defined. ".format(var) + msg += "For a {} projection, it is necessary to define {}".format(projection, required_vars) + raise ValueError(msg) + + for var in kwargs_list: + if var not in required_vars: + msg = "Variable {0} has been defined. ".format(var) + msg += "For a {} projection, you can only define {}".format(projection, required_vars) + raise ValueError(msg) + + if projection is None: + if parallel_method == "Y": + warnings.warn("Parallel method cannot be 'Y' to create points NES. Setting it to 'X'") + sys.stderr.flush() + parallel_method = "X" + elif parallel_method == "T": + raise NotImplementedError("Parallel method T not implemented yet") + nessy = PointsNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, balanced=balanced, + create_nes=True, times=times, **kwargs) + elif projection in ["regular", "global"]: + nessy = LatLonNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, balanced=balanced, + create_nes=True, times=times, **kwargs) + elif projection == "rotated": + nessy = RotatedNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, balanced=balanced, + create_nes=True, times=times, **kwargs) + elif projection == "rotated-nested": + nessy = RotatedNestedNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, balanced=balanced, + create_nes=True, times=times, **kwargs) + elif projection == "lcc": + nessy = LCCNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, balanced=balanced, + create_nes=True, times=times, **kwargs) + elif projection == "mercator": + nessy = MercatorNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, balanced=balanced, + create_nes=True, times=times, **kwargs) + else: + raise NotImplementedError(projection) + + return nessy + + +def from_shapefile(path, method=None, parallel_method="Y", **kwargs): + """ + Create NES from shapefile data. + + 1. Create NES grid. + 2. Create shapefile for grid. + 3. Spatial join to add shapefile variables to NES variables. + + Parameters + ---------- + path : str + Path to shapefile. + method : str + Overlay method. Accepted values: ["nearest", "intersection", None]. + parallel_method : str + Indicates the parallelization method that you want. Default: "Y". + accepted values: ["X", "Y", "T"]. + """ + + # Create NES + nessy = create_nes(comm=None, info=False, parallel_method=parallel_method, **kwargs) + + # Create shapefile for grid + nessy.create_shapefile() + + # Make spatial join + nessy.spatial_join(path, method=method) + + return nessy diff --git a/build/lib/nes/load_nes.py b/build/lib/nes/load_nes.py new file mode 100644 index 0000000..542b583 --- /dev/null +++ b/build/lib/nes/load_nes.py @@ -0,0 +1,340 @@ +#!/usr/bin/env python + +import os +import sys +from numpy import empty +from mpi4py import MPI +from netCDF4 import Dataset +from warnings import warn +from .nc_projections import RotatedNes, PointsNes, PointsNesGHOST, PointsNesProvidentia, LCCNes, LatLonNes, MercatorNes + +DIM_VAR_NAMES = ["lat", "latitude", "lat_bnds", "lon", "longitude", "lon_bnds", "time", "time_bnds", "lev", "level", + "cell_area", "crs", "rotated_pole", "x", "y", "rlat", "rlon", "Lambert_conformal", "mercator"] + + +def open_netcdf(path, comm=None, info=False, parallel_method="Y", avoid_first_hours=0, avoid_last_hours=0, + first_level=0, last_level=None, balanced=False): + """ + Open a netCDF file. + + Parameters + ---------- + path : str + Path to the NetCDF file to read. + comm : MPI.COMM + MPI communicator to use in that netCDF. Default: MPI.COMM_WORLD. + info : bool + Indicates if you want to print (stdout) the reading/writing steps. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + parallel_method : str + Indicates the parallelization method that you want. Default: "Y". + Accepted values: ["X", "Y", "T"] + balanced : bool + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + + Returns + ------- + Nes + A Nes object. Variables read in lazy mode (only metadata). + """ + + if comm is None: + comm = MPI.COMM_WORLD + else: + comm = comm + + if not os.path.exists(path): + raise FileNotFoundError(path) + + dataset = Dataset(path, format="NETCDF4", mode="r", parallel=False) + # Parallel is not needed for reading + # if comm.Get_size() == 1: + # dataset = Dataset(path, format="NETCDF4", mode="r", parallel=False) + # else: + # dataset = Dataset(path, format="NETCDF4", mode="r", parallel=True, comm=comm, info=MPI.Info()) + + if __is_rotated(dataset): + # Rotated grids + nessy = RotatedNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) + elif __is_points(dataset): + if parallel_method == "Y": + warn("Parallel method cannot be 'Y' to create points NES. Setting it to 'X'") + sys.stderr.flush() + parallel_method = "X" + if __is_points_ghost(dataset): + # Points - GHOST + nessy = PointsNesGHOST(comm=comm, dataset=dataset, info=info, + parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) + elif __is_points_providentia(dataset): + # Points - Providentia + nessy = PointsNesProvidentia(comm=comm, dataset=dataset, info=info, + parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=False, + balanced=balanced,) + else: + # Points - non-GHOST + nessy = PointsNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) + elif __is_lcc(dataset): + # Lambert conformal conic grids + nessy = LCCNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) + elif __is_mercator(dataset): + # Mercator grids + nessy = MercatorNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) + else: + # Regular grids + nessy = LatLonNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) + + return nessy + + +def __is_rotated(dataset): + """ + Check if the netCDF is in rotated pole projection or not. + + Parameters + ---------- + dataset : Dataset + netcdf4-python open dataset object. + + Returns + ------- + value : bool + Indicated if the netCDF is a rotated one. + """ + + if "rotated_pole" in dataset.variables.keys(): + return True + elif ("rlat" in dataset.dimensions) and ("rlon" in dataset.dimensions): + return True + else: + return False + + +def __is_points(dataset): + """ + Check if the netCDF is a points dataset in non-GHOST format or not. + + Parameters + ---------- + dataset : Dataset + netcdf4-python open dataset object. + + Returns + ------- + value : bool + Indicated if the netCDF is a points non-GHOST one. + """ + + if "station" in dataset.dimensions: + return True + else: + return False + + +def __is_points_ghost(dataset): + """ + Check if the netCDF is a points dataset in GHOST format or not. + + Parameters + ---------- + dataset : Dataset + netcdf4-python open dataset object. + + Returns + ------- + value : bool + Indicated if the netCDF is a points GHOST one. + """ + + if "N_flag_codes" in dataset.dimensions and "N_qa_codes" in dataset.dimensions: + return True + else: + return False + + +def __is_points_providentia(dataset): + """ + Check if the netCDF is a points dataset in Providentia format or not. + + Parameters + ---------- + dataset : Dataset + netcdf4-python open dataset object. + + Returns + ------- + value : bool + Indicated if the netCDF is a points Providentia one. + """ + + if (("grid_edge" in dataset.dimensions) and ("model_latitude" in dataset.dimensions) and + ("model_longitude" in dataset.dimensions)): + return True + else: + return False + + +def __is_lcc(dataset): + """ + Check if the netCDF is in Lambert Conformal Conic (LCC) projection or not. + + Parameters + ---------- + dataset : Dataset + netcdf4-python open dataset object. + + Returns + ------- + value : bool + Indicated if the netCDF is an LCC one. + """ + + if "Lambert_Conformal" in dataset.variables.keys() or "Lambert_conformal" in dataset.variables.keys(): + return True + else: + return False + + +def __is_mercator(dataset): + """ + Check if the netCDF is in Mercator projection or not. + + Parameters + ---------- + dataset : Dataset + netcdf4-python open dataset object. + + Returns + ------- + value : bool + Indicated if the netCDF is a Mercator one. + """ + + if "mercator" in dataset.variables.keys(): + return True + else: + return False + + +def concatenate_netcdfs(nessy_list, comm=None, info=False, parallel_method="Y", avoid_first_hours=0, avoid_last_hours=0, + first_level=0, last_level=None, balanced=False): + """ + Concatenate variables form different sources. + + Parameters + ---------- + nessy_list : list + A List of Nes objects or list of paths to concatenate. + comm : MPI.Comm + MPI Communicator. + info: bool + Indicates if you want to get reading/writing info. + parallel_method : str + Indicates the parallelization method that you want. Default: "Y". + accepted values: ["X", "Y", "T"]. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + + Returns + ------- + Nes + A Nes object with all the variables. + """ + if not isinstance(nessy_list, list): + raise AttributeError("You must pass a list of NES objects or paths.") + + if isinstance(nessy_list[0], str): + nessy_first = open_netcdf(nessy_list[0], + comm=comm, + parallel_method=parallel_method, + info=info, + avoid_first_hours=avoid_first_hours, + avoid_last_hours=avoid_last_hours, + first_level=first_level, + last_level=last_level, + balanced=balanced + ) + nessy_first.load() + else: + nessy_first = nessy_list[0] + for i, aux_nessy in enumerate(nessy_list[1:]): + if isinstance(aux_nessy, str): + nc_add = Dataset(filename=aux_nessy, mode="r") + for var_name, var_info in nc_add.variables.items(): + if var_name not in DIM_VAR_NAMES: + nessy_first.variables[var_name] = {} + var_dims = var_info.dimensions + # Read data in 4 dimensions + if len(var_dims) < 2: + data = var_info[:] + elif len(var_dims) == 2: + data = var_info[nessy_first.read_axis_limits["y_min"]:nessy_first.read_axis_limits["y_max"], + nessy_first.read_axis_limits["x_min"]:nessy_first.read_axis_limits["x_max"]] + data = data.reshape(1, 1, data.shape[-2], data.shape[-1]) + elif len(var_dims) == 3: + if "strlen" in var_dims: + data = var_info[nessy_first.read_axis_limits["y_min"]:nessy_first.read_axis_limits["y_max"], + nessy_first.read_axis_limits["x_min"]:nessy_first.read_axis_limits["x_max"], + :] + data_aux = empty(shape=(data.shape[0], data.shape[1]), dtype=object) + for lat_n in range(data.shape[0]): + for lon_n in range(data.shape[1]): + data_aux[lat_n, lon_n] = "".join( + data[lat_n, lon_n].tobytes().decode("ascii").replace("\x00", "")) + data = data_aux.reshape((1, 1, data_aux.shape[-2], data_aux.shape[-1])) + else: + data = var_info[nessy_first.read_axis_limits["t_min"]:nessy_first.read_axis_limits["t_max"], + nessy_first.read_axis_limits["y_min"]:nessy_first.read_axis_limits["y_max"], + nessy_first.read_axis_limits["x_min"]:nessy_first.read_axis_limits["x_max"]] + data = data.reshape(data.shape[-3], 1, data.shape[-2], data.shape[-1]) + elif len(var_dims) == 4: + data = var_info[nessy_first.read_axis_limits["t_min"]:nessy_first.read_axis_limits["t_max"], + nessy_first.read_axis_limits["z_min"]:nessy_first.read_axis_limits["z_max"], + nessy_first.read_axis_limits["y_min"]:nessy_first.read_axis_limits["y_max"], + nessy_first.read_axis_limits["x_min"]:nessy_first.read_axis_limits["x_max"]] + else: + raise TypeError("{} data shape is nto accepted".format(var_dims)) + + nessy_first.variables[var_name]["data"] = data + # Avoid some attributes + for attrname in var_info.ncattrs(): + if attrname not in ["missing_value", "_FillValue"]: + value = getattr(var_info, attrname) + if value in ["unitless", "-"]: + value = "" + nessy_first.variables[var_name][attrname] = value + nc_add.close() + + else: + nessy_first.concatenate(aux_nessy) + + return nessy_first diff --git a/build/lib/nes/methods/__init__.py b/build/lib/nes/methods/__init__.py new file mode 100644 index 0000000..35b6346 --- /dev/null +++ b/build/lib/nes/methods/__init__.py @@ -0,0 +1,8 @@ +from .vertical_interpolation import add_4d_vertical_info +from .vertical_interpolation import interpolate_vertical +from .horizontal_interpolation import interpolate_horizontal +from .spatial_join import spatial_join + +__all__ = [ + 'add_4d_vertical_info', 'interpolate_vertical', 'interpolate_horizontal', 'spatial_join' +] diff --git a/build/lib/nes/methods/cell_measures.py b/build/lib/nes/methods/cell_measures.py new file mode 100644 index 0000000..185d033 --- /dev/null +++ b/build/lib/nes/methods/cell_measures.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python +from numpy import empty, newaxis, array, arcsin, tan, fabs, arctan, sqrt, radians, cos, sin, column_stack +from copy import deepcopy + + +def calculate_grid_area(self): + """ + Get coordinate bounds and call function to calculate the area of each cell of a grid. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + """ + + # Create bounds if they do not exist + if self.lat_bnds is None or self.lon_bnds is None: + self.create_spatial_bounds() + + # Get spatial number of vertices + spatial_nv = self.lat_bnds["data"].shape[-1] + + # Reshape bounds + if spatial_nv == 2: + + aux_shape = (self.lat_bnds["data"].shape[0], self.lon_bnds["data"].shape[0], 4) + lon_bnds_aux = empty(aux_shape) + lon_bnds_aux[:, :, 0] = self.lon_bnds["data"][newaxis, :, 0] + lon_bnds_aux[:, :, 1] = self.lon_bnds["data"][newaxis, :, 1] + lon_bnds_aux[:, :, 2] = self.lon_bnds["data"][newaxis, :, 1] + lon_bnds_aux[:, :, 3] = self.lon_bnds["data"][newaxis, :, 0] + + lon_bnds = lon_bnds_aux + del lon_bnds_aux + + lat_bnds_aux = empty(aux_shape) + lat_bnds_aux[:, :, 0] = self.lat_bnds["data"][:, newaxis, 0] + lat_bnds_aux[:, :, 1] = self.lat_bnds["data"][:, newaxis, 0] + lat_bnds_aux[:, :, 2] = self.lat_bnds["data"][:, newaxis, 1] + lat_bnds_aux[:, :, 3] = self.lat_bnds["data"][:, newaxis, 1] + + lat_bnds = lat_bnds_aux + del lat_bnds_aux + + else: + lon_bnds = self.lon_bnds["data"] + lat_bnds = self.lat_bnds["data"] + + # Reshape bounds and assign as grid corner coordinates + grid_corner_lon = deepcopy(lon_bnds).reshape(lon_bnds.shape[0]*lon_bnds.shape[1], + lon_bnds.shape[2]) + grid_corner_lat = deepcopy(lat_bnds).reshape(lat_bnds.shape[0]*lat_bnds.shape[1], + lat_bnds.shape[2]) + + # Calculate cell areas + grid_area = calculate_cell_area(grid_corner_lon, grid_corner_lat, + earth_radius_minor_axis=self.earth_radius[0], + earth_radius_major_axis=self.earth_radius[1]) + + return grid_area + + +def calculate_geometry_area(geometry_list, earth_radius_minor_axis=6356752.3142, + earth_radius_major_axis=6378137.0): + """ + Get coordinate bounds and call function to calculate the area of each cell of a set of geometries. + + Parameters + ---------- + geometry_list : List + A List with polygon geometries. + earth_radius_minor_axis : float + Radius of the minor axis of the Earth. + earth_radius_major_axis : float + Radius of the major axis of the Earth. + """ + + geometry_area = empty(shape=(len(geometry_list,))) + + for geom_ind in range(0, len(geometry_list)): + + # Calculate the area of each geometry in multipolygon and collection objects + if geometry_list[geom_ind].geom_type in ["MultiPolygon", "GeometryCollection"]: + multi_geom_area = 0 + for multi_geom_ind in range(0, len(geometry_list[geom_ind].geoms)): + if geometry_list[geom_ind].geoms[multi_geom_ind].geom_type == "Point": + continue + geometry_corner_lon, geometry_corner_lat = ( + geometry_list[geom_ind].geoms[multi_geom_ind].exterior.coords.xy) + geometry_corner_lon = array(geometry_corner_lon) + geometry_corner_lat = array(geometry_corner_lat) + geom_area = __mod_huiliers_area(geometry_corner_lon, geometry_corner_lat) + multi_geom_area += geom_area + geometry_area[geom_ind] = multi_geom_area * earth_radius_minor_axis * earth_radius_major_axis + + # Calculate the area of each geometry + else: + geometry_corner_lon, geometry_corner_lat = geometry_list[geom_ind].exterior.coords.xy + geometry_corner_lon = array(geometry_corner_lon) + geometry_corner_lat = array(geometry_corner_lat) + geom_area = __mod_huiliers_area(geometry_corner_lon, geometry_corner_lat) + geometry_area[geom_ind] = geom_area * earth_radius_minor_axis * earth_radius_major_axis + + return geometry_area + + +def calculate_cell_area(grid_corner_lon, grid_corner_lat, + earth_radius_minor_axis=6356752.3142, earth_radius_major_axis=6378137.0): + """ + Calculate the area of each cell of a grid. + + Parameters + ---------- + grid_corner_lon : array + An Array with longitude bounds of grid. + grid_corner_lat : array + An Array with longitude bounds of grid. + earth_radius_minor_axis : float + Radius of the minor axis of the Earth. + earth_radius_major_axis : float + Radius of the major axis of the Earth. + """ + + # Calculate area for each grid cell + n_cells = grid_corner_lon.shape[0] + area = empty(shape=(n_cells,)) + for i in range(0, n_cells): + area[i] = __mod_huiliers_area(grid_corner_lon[i], grid_corner_lat[i]) + + return area*earth_radius_minor_axis*earth_radius_major_axis + + +def __mod_huiliers_area(cell_corner_lon, cell_corner_lat): + """ + Calculate the area of each cell according to Huilier's theorem. + Reference: CDO (https://earth.bsc.es/gitlab/ces/cdo/). + + Parameters + ---------- + cell_corner_lon : array + Longitude boundaries of each cell. + cell_corner_lat : array + Latitude boundaries of each cell. + """ + + my_sum = 0 + + # Get points 0 (bottom left) and 1 (bottom right) in Earth coordinates + point_0 = __lon_lat_to_cartesian(cell_corner_lon[0], cell_corner_lat[0], earth_radius_major_axis=1) + point_1 = __lon_lat_to_cartesian(cell_corner_lon[1], cell_corner_lat[1], earth_radius_major_axis=1) + point_0, point_1 = point_0[0], point_1[0] + + # Get number of vertices + if cell_corner_lat[0] == cell_corner_lat[-1]: + spatial_nv = len(cell_corner_lon) - 1 + else: + spatial_nv = len(cell_corner_lon) + + for i in range(2, spatial_nv): + + # Get point 2 (top right) in Earth coordinates + point_2 = __lon_lat_to_cartesian(cell_corner_lon[i], cell_corner_lat[i], earth_radius_major_axis=1) + point_2 = point_2[0] + + # Calculate area of triangle between points 0, 1 and 2 + my_sum += __tri_area(point_0, point_1, point_2) + + # Copy to calculate area of next triangle + if i == (spatial_nv - 1): + point_1 = deepcopy(point_2) + + return my_sum + + +def __tri_area(point_0, point_1, point_2): + """ + Calculate area between three points that form a triangle. + Reference: CDO (https://earth.bsc.es/gitlab/ces/cdo/). + + Parameters + ---------- + point_0 : array + Position of first point in cartesian coordinates. + point_1 : array + Position of second point in cartesian coordinates. + point_2 : array + Position of third point in cartesian coordinates. + """ + + # Get length of side a (between point 0 and 1) + tmp_vec = __cross_product(point_0, point_1) + sin_a = __norm(tmp_vec) + a = arcsin(sin_a) + + # Get length of side b (between point 0 and 2) + tmp_vec = __cross_product(point_0, point_2) + sin_b = __norm(tmp_vec) + b = arcsin(sin_b) + + # Get length of side c (between point 1 and 2) + tmp_vec = __cross_product(point_2, point_1) + sin_c = __norm(tmp_vec) + c = arcsin(sin_c) + + # Calculate area + s = 0.5*(a+b+c) + t = tan(s*0.5) * tan((s - a)*0.5) * tan((s - b)*0.5) * tan((s - c)*0.5) + area = fabs(4.0 * arctan(sqrt(fabs(t)))) + + return area + + +def __cross_product(a, b): + """ + Calculate cross product between two points. + + Parameters + ---------- + a : array + Position of point A in cartesian coordinates. + b : array + Position of point B in cartesian coordinates. + """ + + return [a[1]*b[2] - a[2]*b[1], + a[2]*b[0] - a[0]*b[2], + a[0]*b[1] - a[1]*b[0]] + + +def __norm(cp): + """ + Normalize the result of the cross product operation. + + Parameters + ---------- + cp : array + Cross product between two points. + """ + + return sqrt(cp[0]*cp[0] + cp[1]*cp[1] + cp[2]*cp[2]) + + +# noinspection DuplicatedCode +def __lon_lat_to_cartesian(lon, lat, earth_radius_major_axis=6378137.0): + """ + Calculate lon, lat coordinates of a point on a sphere. + + Parameters + ---------- + lon : array + Longitude values. + lat : array + Latitude values. + earth_radius_major_axis : float + Radius of the major axis of the Earth. + """ + + lon_r = radians(lon) + lat_r = radians(lat) + + x = earth_radius_major_axis * cos(lat_r) * cos(lon_r) + y = earth_radius_major_axis * cos(lat_r) * sin(lon_r) + z = earth_radius_major_axis * sin(lat_r) + + return column_stack([x, y, z]) diff --git a/build/lib/nes/methods/horizontal_interpolation.py b/build/lib/nes/methods/horizontal_interpolation.py new file mode 100644 index 0000000..25efef6 --- /dev/null +++ b/build/lib/nes/methods/horizontal_interpolation.py @@ -0,0 +1,762 @@ +#!/usr/bin/env python + +import sys +import os +import nes +from warnings import warn, filterwarnings +from numpy import (ma, empty, nansum, concatenate, pad, nan, array, float64, int64, float32, meshgrid, expand_dims, + reciprocal, arange, uint32, array_split, radians, cos, sin, column_stack, zeros) +from pandas import concat, DataFrame +from mpi4py import MPI +from scipy import spatial +from filelock import FileLock +from datetime import datetime +from copy import deepcopy +from pyproj import Proj, Transformer, CRS +import gc + +# CONSTANTS +NEAREST_OPTS = ["NearestNeighbour", "NearestNeighbours", "nn", "NN"] +CONSERVATIVE_OPTS = ["Conservative", "Area_Conservative", "cons", "conservative", "area"] + + +def interpolate_horizontal(self, dst_grid, weight_matrix_path=None, kind="NearestNeighbour", n_neighbours=4, + info=False, to_providentia=False, only_create_wm=False, wm=None, flux=False): + """ + Horizontal methods from one grid to another one. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + dst_grid : nes.Nes + Final projection Nes object. + weight_matrix_path : str, None + Path to the weight matrix to read/create. + kind : str + Kind of horizontal interpolation. Accepted values: ["NearestNeighbour", "Conservative"]. + n_neighbours : int + Used if kind == NearestNeighbour. Number of nearest neighbours to interpolate. Default: 4. + info : bool + Indicates if you want to print extra info during the methods process. + to_providentia : bool + Indicates if we want the interpolated grid in Providentia format. + only_create_wm : bool + Indicates if you want to only create the Weight Matrix. + wm : Nes + Weight matrix Nes File. + flux : bool + Indicates if you want to calculate the weight matrix for flux variables. + """ + if info and self.master: + print("Creating Weight Matrix") + + # Obtain weight matrix + if self.parallel_method == "T": + weights, idx = __get_weights_idx_t_axis(self, dst_grid, weight_matrix_path, kind, n_neighbours, + only_create_wm, wm, flux) + elif self.parallel_method in ["Y", "X"]: + weights, idx = __get_weights_idx_xy_axis(self, dst_grid, weight_matrix_path, kind, n_neighbours, + only_create_wm, wm, flux) + else: + raise NotImplementedError("Parallel method {0} is not implemented yet for horizontal interpolations.".format( + self.parallel_method) + "Use 'T'") + + if info and self.master: + print("Weight Matrix done!") + if only_create_wm: + # weights for only_create is the WM NES object + return weights + + # idx[idx < 0] = nan + idx = ma.masked_array(idx, mask=idx == -999) + # idx = array(idx, dtype=float) + # idx[idx < 0] = nan + # weights[weights < 0] = nan + weights = ma.masked_array(weights, mask=weights == -999) + # weights = array(weights, dtype=float) + # weights[weights < 0] = nan + + # Copy NES + final_dst = dst_grid.copy() + + sys.stdout.flush() + final_dst.set_communicator(dst_grid.comm) + + # Remove original file information + final_dst.__ini_path = None + final_dst.netcdf = None + final_dst.dataset = None + + # Return final_dst + final_dst.lev = self.lev + final_dst.set_full_levels(self.get_full_levels()) + final_dst.time = self.time + final_dst.set_full_times(self.get_full_times()) + final_dst.hours_start = self.hours_start + final_dst.hours_end = self.hours_end + + if info and self.master: + print("Applying weights") + # Apply weights + for var_name, var_info in self.variables.items(): + if info and self.master: + print("\t{var} horizontal interpolation".format(var=var_name)) + sys.stdout.flush() + src_shape = var_info["data"].shape + if isinstance(dst_grid, nes.PointsNes): + dst_shape = (src_shape[0], src_shape[1], idx.shape[-1]) + else: + dst_shape = (src_shape[0], src_shape[1], idx.shape[-2], idx.shape[-1]) + # Creating new variable without data + final_dst.variables[var_name] = {attr_name: attr_value for attr_name, attr_value in var_info.items() + if attr_name != "data"} + # Creating empty data + final_dst.variables[var_name]["data"] = empty(dst_shape) + + # src_data = var_info["data"].reshape((src_shape[0], src_shape[1], src_shape[2] * src_shape[3])) + for time in range(dst_shape[0]): + for lev in range(dst_shape[1]): + src_aux = __get_src_data(self.comm, var_info["data"][time, lev], idx, self.parallel_method) + final_dst.variables[var_name]["data"][time, lev] = nansum(weights * src_aux, axis=1) + + if isinstance(dst_grid, nes.PointsNes): + # Removing level axis + if src_shape[1] != 1: + raise IndexError("Data with vertical levels cannot be interpolated to points") + final_dst.variables[var_name]["data"] = final_dst.variables[var_name]["data"].reshape( + (src_shape[0], idx.shape[-1])) + if isinstance(dst_grid, nes.PointsNesGHOST) and not to_providentia: + final_dst = final_dst.to_points() + + final_dst.global_attrs = self.global_attrs + + if info and self.master: + print("Formatting") + + if to_providentia: + # self = experiment to interpolate (regular, rotated, etc.) + # final_dst = interpolated experiment (points) + if isinstance(final_dst, nes.PointsNes): + model_centre_lat, model_centre_lon = self.create_providentia_exp_centre_coordinates() + grid_edge_lat, grid_edge_lon = self.create_providentia_exp_grid_edge_coordinates() + final_dst = final_dst.to_providentia(model_centre_lon=model_centre_lon, + model_centre_lat=model_centre_lat, + grid_edge_lon=grid_edge_lon, + grid_edge_lat=grid_edge_lat) + else: + msg = "The final projection must be points to interpolate an experiment and get it in Providentia format." + warn(msg) + sys.stderr.flush() + else: + # Convert dimensions (time, lev, lat, lon) or (time, lat, lon) to (time, station) for interpolated variables + # and reshape data + if isinstance(final_dst, nes.PointsNes): + for var_name, var_info in final_dst.variables.items(): + if len(var_info["dimensions"]) != len(var_info["data"].shape): + final_dst.variables[var_name]["dimensions"] = ("time", "station") + + return final_dst + + +def __get_src_data(comm, var_data, idx, parallel_method): + """ + To obtain the needed src data to interpolate. + + Parameters + ---------- + comm : MPI.Comm. + MPI communicator. + var_data : array + Rank source data. + idx : array + Index of the needed data in a 2D flatten way. + parallel_method: str + Source parallel method. + + Returns + ------- + array + Flatten source needed data. + """ + + if parallel_method == "T": + var_data = var_data.flatten() + else: + var_data = comm.gather(var_data, root=0) + if comm.Get_rank() == 0: + if parallel_method == "Y": + axis = 0 + elif parallel_method == "X": + axis = 1 + else: + raise NotImplementedError(parallel_method) + var_data = concatenate(var_data, axis=axis) + var_data = var_data.flatten() + + var_data = comm.bcast(var_data) + + var_data = pad(var_data, [1, 1], "constant", constant_values=nan).take(idx + 1, mode="clip") + + return var_data + + +# noinspection DuplicatedCode +def __get_weights_idx_t_axis(self, dst_grid, weight_matrix_path, kind, n_neighbours, only_create, wm, flux): + """ + To obtain the weights and source data index through the T axis. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + dst_grid : nes.Nes + Final projection Nes object. + weight_matrix_path : str, None + Path to the weight matrix to read/create. + kind : str + Kind of horizontal interpolation. Accepted values: ["NearestNeighbour", "Conservative"]. + n_neighbours : int + Used if kind == NearestNeighbour. Number of nearest neighbours to interpolate. Default: 4. + only_create : bool + Indicates if you want to only create the Weight Matrix. + wm : Nes + Weight matrix Nes File. + flux : bool + Indicates if you want to calculate the weight matrix for flux variables. + + Returns + ------- + tuple + Weights and source data index. + """ + weight_matrix = None + + if wm is not None: + weight_matrix = wm + + elif weight_matrix_path is not None: + with FileLock(weight_matrix_path + "{0:03d}.lock".format(self.rank)): + if os.path.isfile(weight_matrix_path): + if self.master: + weight_matrix = __read_weight_matrix(weight_matrix_path, comm=MPI.COMM_SELF) + else: + weight_matrix = True + if kind in NEAREST_OPTS: + if self.master: + if len(weight_matrix.lev["data"]) != n_neighbours: + warn("The selected weight matrix does not have the same number of nearest neighbours." + + "Re-calculating again but not saving it.") + sys.stderr.flush() + weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours) + else: + weight_matrix = True + + else: + if self.master: + if kind in NEAREST_OPTS: + weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours, + wm_path=weight_matrix_path) + elif kind in CONSERVATIVE_OPTS: + weight_matrix = __create_area_conservative_weight_matrix( + self, dst_grid, wm_path=weight_matrix_path, flux=flux) + else: + raise NotImplementedError(kind) + else: + weight_matrix = True + + if os.path.exists(weight_matrix_path + "{0:03d}.lock".format(self.rank)): + os.remove(weight_matrix_path + "{0:03d}.lock".format(self.rank)) + else: + if self.master: + if kind in NEAREST_OPTS: + weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours) + elif kind in CONSERVATIVE_OPTS: + weight_matrix = __create_area_conservative_weight_matrix(self, dst_grid, flux=flux) + else: + raise NotImplementedError(kind) + else: + weight_matrix = True + + if only_create: + return weight_matrix, None + + if self.master: + if kind in NEAREST_OPTS: + # Normalize to 1 + weights = array(array(weight_matrix.variables["weight"]["data"], dtype=float64) / + array(weight_matrix.variables["weight"]["data"], dtype=float64).sum(axis=1), + dtype=float64) + else: + weights = array(weight_matrix.variables["weight"]["data"], dtype=float64) + idx = array(weight_matrix.variables["idx"]["data"][0], dtype=int) + else: + weights = None + idx = None + + weights = self.comm.bcast(weights, root=0) + idx = self.comm.bcast(idx, root=0) + + return weights, idx + + +# noinspection DuplicatedCode +def __get_weights_idx_xy_axis(self, dst_grid, weight_matrix_path, kind, n_neighbours, only_create, wm, flux): + """ + To obtain the weights and source data index through the X or Y axis. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + dst_grid : nes.Nes + Final projection Nes object. + weight_matrix_path : str, None + Path to the weight matrix to read/create. + kind : str + Kind of horizontal interpolation. Accepted values: ["NearestNeighbour", "Conservative"]. + n_neighbours : int + Used if kind == NearestNeighbour. Number of nearest neighbours to interpolate. Default: 4. + only_create : bool + Indicates if you want to only create the Weight Matrix. + wm : Nes + Weight matrix Nes File. + flux : bool + Indicates if you want to calculate the weight matrix for flux variables. + + Returns + ------- + tuple + Weights and source data index. + """ + weight_matrix = None + + if isinstance(dst_grid, nes.PointsNes) and weight_matrix_path is not None: + if self.master: + warn("To point weight matrix cannot be saved.") + sys.stderr.flush() + weight_matrix_path = None + + if wm is not None: + weight_matrix = wm + + elif weight_matrix_path is not None: + with FileLock(weight_matrix_path + "{0:03d}.lock".format(self.rank)): + if os.path.isfile(weight_matrix_path): + if self.master: + weight_matrix = __read_weight_matrix(weight_matrix_path, comm=MPI.COMM_SELF) + else: + weight_matrix = True + if kind in NEAREST_OPTS: + if self.master: + if len(weight_matrix.lev["data"]) != n_neighbours: + warn("The selected weight matrix does not have the same number of nearest neighbours." + + "Re-calculating again but not saving it.") + sys.stderr.flush() + weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours) + else: + weight_matrix = True + else: + if kind in NEAREST_OPTS: + if self.master: + weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours, + wm_path=weight_matrix_path) + else: + weight_matrix = True + elif kind in CONSERVATIVE_OPTS: + weight_matrix = __create_area_conservative_weight_matrix( + self, dst_grid, wm_path=weight_matrix_path, flux=flux) + else: + raise NotImplementedError(kind) + + if os.path.exists(weight_matrix_path + "{0:03d}.lock".format(self.rank)): + os.remove(weight_matrix_path + "{0:03d}.lock".format(self.rank)) + else: + if kind in NEAREST_OPTS: + if self.master: + weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours) + else: + weight_matrix = True + elif kind in CONSERVATIVE_OPTS: + weight_matrix = __create_area_conservative_weight_matrix(self, dst_grid, flux=flux) + else: + raise NotImplementedError(kind) + + if only_create: + return weight_matrix, None + + # Normalize to 1 + if self.master: + if kind in NEAREST_OPTS: + weights = array(array(weight_matrix.variables["weight"]["data"], dtype=float64) / + array(weight_matrix.variables["weight"]["data"], dtype=float64).sum(axis=1), + dtype=float64) + else: + weights = array(weight_matrix.variables["weight"]["data"], dtype=float64) + idx = array(weight_matrix.variables["idx"]["data"][0], dtype=int64) + else: + weights = None + idx = None + + weights = self.comm.bcast(weights, root=0) + idx = self.comm.bcast(idx, root=0) + + # if isinstance(dst_grid, nes.PointsNes): + # print("weights 1 ->", weights.shape) + # print("idx 1 ->", idx.shape) + # weights = weights[:, dst_grid.write_axis_limits["x_min"]:dst_grid.write_axis_limits["x_max"]] + # idx = idx[dst_grid.write_axis_limits["x_min"]:dst_grid.write_axis_limits["x_max"]] + # else: + weights = weights[:, :, dst_grid.write_axis_limits["y_min"]:dst_grid.write_axis_limits["y_max"], + dst_grid.write_axis_limits["x_min"]:dst_grid.write_axis_limits["x_max"]] + idx = idx[:, dst_grid.write_axis_limits["y_min"]:dst_grid.write_axis_limits["y_max"], + dst_grid.write_axis_limits["x_min"]:dst_grid.write_axis_limits["x_max"]] + # print("weights 2 ->", weights.shape) + # print("idx 2 ->", idx.shape) + + return weights, idx + + +def __read_weight_matrix(weight_matrix_path, comm=None, parallel_method="T"): + """ + Read weight matrix. + + Parameters + ---------- + weight_matrix_path : str + Path of the weight matrix. + comm : MPI.Comm + A Communicator to read the weight matrix. + parallel_method : str + Nes parallel method to read the weight matrix. + + Returns + ------- + nes.Nes + Weight matrix. + """ + + weight_matrix = nes.open_netcdf(path=weight_matrix_path, comm=comm, parallel_method=parallel_method, balanced=True) + weight_matrix.load() + + # In previous versions of NES weight was called inverse_dists + if "inverse_dists" in weight_matrix.variables.keys(): + weight_matrix.variables["weight"] = weight_matrix.variables["inverse_dists"] + + weight_matrix.variables["weight"]["data"][weight_matrix.variables["weight"]["data"] <= 0] = nan + weight_matrix.variables["weight"]["data"][weight_matrix.variables["idx"]["data"] <= 0] = nan + + return weight_matrix + + +# noinspection DuplicatedCode,PyProtectedMember +def __create_nn_weight_matrix(self, dst_grid, n_neighbours=4, wm_path=None, info=False): + """ + To create the weight matrix with the nearest neighbours method. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + dst_grid : nes.Nes + Final projection Nes object. + n_neighbours : int + Used if kind == NearestNeighbour. Number of nearest neighbours to interpolate. Default: 4. + wm_path : str + Path where write the weight matrix. + info: bool + Indicates if you want to print extra info during the methods process. + + Returns + ------- + nes.Nes + Weight matrix. + """ + # Only master is here. + if info and self.master: + print("\tCreating Nearest Neighbour Weight Matrix with {0} neighbours".format(n_neighbours)) + sys.stdout.flush() + # Source + src_lat = array(self._full_lat["data"], dtype=float32) + src_lon = array(self._full_lon["data"], dtype=float32) + + # 1D to 2D coordinates + if len(src_lon.shape) == 1: + src_lon, src_lat = meshgrid(src_lon, src_lat) + + # Destination + dst_lat = array(dst_grid._full_lat["data"], dtype=float32) + dst_lon = array(dst_grid._full_lon["data"], dtype=float32) + + if isinstance(dst_grid, nes.PointsNes): + dst_lat = expand_dims(dst_grid._full_lat["data"], axis=0) + dst_lon = expand_dims(dst_grid._full_lon["data"], axis=0) + else: + # 1D to 2D coordinates + if len(dst_lon.shape) == 1: + dst_lon, dst_lat = meshgrid(dst_lon, dst_lat) + + # calculate N nearest neighbour inverse distance weights (and indices) + # from gridcells centres of model 1 to each grid cell centre of model 2 + # model geographic longitude/latitude coordinates are first converted + # to cartesian ECEF (Earth Centred, Earth Fixed) coordinates, before + # calculating distances. + + # src_mod_xy = lon_lat_to_cartesian(src_lon.flatten(), src_lat.flatten()) + # dst_mod_xy = lon_lat_to_cartesian(dst_lon.flatten(), dst_lat.flatten()) + + src_mod_xy = __lon_lat_to_cartesian_ecef(src_lon.flatten(), src_lat.flatten()) + dst_mod_xy = __lon_lat_to_cartesian_ecef(dst_lon.flatten(), dst_lat.flatten()) + + # generate KDtree using model 1 coordinates (i.e. the model grid you are + # interpolating from) + src_tree = spatial.cKDTree(src_mod_xy) + + # get n-neighbour nearest distances/indices (ravel form) of model 1 grid cell + # centres from each model 2 grid cell centre + + dists, idx = src_tree.query(dst_mod_xy, k=n_neighbours) + # self.nearest_neighbour_inds = \ + # column_stack(unravel_index(idx, lon.shape)) + + weight_matrix = dst_grid.copy() + weight_matrix.time = [datetime(year=2000, month=1, day=1, hour=0, second=0, microsecond=0)] + weight_matrix._full_time = [datetime(year=2000, month=1, day=1, hour=0, second=0, microsecond=0)] + weight_matrix._full_time_bnds = None + weight_matrix.time_bnds = None + weight_matrix.last_level = None + weight_matrix.first_level = 0 + weight_matrix.hours_start = 0 + weight_matrix.hours_end = 0 + + weight_matrix.set_communicator(MPI.COMM_SELF) + # take the reciprocals of the nearest neighbours distances + dists[dists < 1] = 1 + inverse_dists = reciprocal(dists) + + inverse_dists_transf = inverse_dists.T.reshape((1, n_neighbours, dst_lon.shape[0], dst_lon.shape[1])) + weight_matrix.variables["weight"] = {"data": inverse_dists_transf, "units": "m"} + idx_transf = idx.T.reshape((1, n_neighbours, dst_lon.shape[0], dst_lon.shape[1])) + weight_matrix.variables["idx"] = {"data": idx_transf, "units": ""} + weight_matrix.lev = {"data": arange(inverse_dists_transf.shape[1]), "units": ""} + weight_matrix._full_lev = {"data": arange(inverse_dists_transf.shape[1]), "units": ""} + if wm_path is not None: + weight_matrix.to_netcdf(wm_path) + + return weight_matrix + + +# noinspection DuplicatedCode +def __create_area_conservative_weight_matrix(self, dst_nes, wm_path=None, flux=False, info=False): + """ + To create the weight matrix with the area conservative method. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + dst_nes : nes.Nes + Final projection Nes object. + wm_path : str + Path where write the weight matrix. + flux : bool + Indicates if you want to calculate the weight matrix for flux variables. + info: bool + Indicates if you want to print extra info during the methods process. + + Returns + ------- + nes.Nes + Weight matrix. + """ + + if info and self.master: + print("\tCreating area conservative Weight Matrix") + sys.stdout.flush() + + my_crs = CRS.from_proj4("+proj=latlon") # Common projection for both shapefiles + + # Get a portion of the destiny grid + if dst_nes.shapefile is None: + dst_nes.create_shapefile() + dst_grid = deepcopy(dst_nes.shapefile) + + # Formatting Destination grid + dst_grid.to_crs(crs=my_crs, inplace=True) + dst_grid["FID_dst"] = dst_grid.index + + # Preparing Source grid + if self.shapefile is None: + self.create_shapefile() + src_grid = deepcopy(self.shapefile) + + # Formatting Source grid + src_grid.to_crs(crs=my_crs, inplace=True) + + # Serialize index intersection function to avoid memory problems + if self.size > 1 and self.parallel_method != "T": + src_grid = self.comm.gather(src_grid, root=0) + dst_grid = self.comm.gather(dst_grid, root=0) + if self.master: + src_grid = concat(src_grid) + dst_grid = concat(dst_grid) + if self.master: + src_grid["FID_src"] = src_grid.index + src_grid = src_grid.reset_index() + dst_grid = dst_grid.reset_index() + fid_src, fid_dst = dst_grid.sindex.query(src_grid.geometry, predicate="intersects") + + # Calculate intersected areas and fractions + intersection_df = DataFrame(columns=["FID_src", "FID_dst"]) + + intersection_df["FID_src"] = array(src_grid.loc[fid_src, "FID_src"], dtype=uint32) + intersection_df["FID_dst"] = array(dst_grid.loc[fid_dst, "FID_dst"], dtype=uint32) + + intersection_df["geometry_src"] = src_grid.loc[fid_src, "geometry"].values + intersection_df["geometry_dst"] = dst_grid.loc[fid_dst, "geometry"].values + del src_grid, dst_grid, fid_src, fid_dst + # Split the array into smaller arrays in order to scatter the data among the processes + intersection_df = array_split(intersection_df, self.size) + else: + intersection_df = None + + intersection_df = self.comm.scatter(intersection_df, root=0) + + if info and self.master: + print("\t\tGrids created and ready to interpolate") + sys.stdout.flush() + if True: + # No Warnings Zone + filterwarnings("ignore") + # intersection_df["weight"] = array(intersection_df.apply( + # lambda x: x["geometry_src"].intersection(x["geometry_dst"]).buffer(0).area / x["geometry_src"].area, + # axis=1), dtype=float64) + if flux: + intersection_df["weight"] = array(intersection_df.apply( + lambda x: (x["geometry_src"].intersection(x["geometry_dst"]).buffer(0).area / x["geometry_src"].area) * + (nes.Nes.calculate_geometry_area([x["geometry_src"]])[0] / + nes.Nes.calculate_geometry_area([x["geometry_dst"]])[0]), + axis=1), dtype=float64) + else: + intersection_df["weight"] = array(intersection_df.apply( + lambda x: x["geometry_src"].intersection(x["geometry_dst"]).buffer(0).area / x["geometry_src"].area, + axis=1), dtype=float64) + + intersection_df.drop(columns=["geometry_src", "geometry_dst"], inplace=True) + gc.collect() + filterwarnings("default") + + # Format & Clean + if info and self.master: + print("\t\tWeights calculated. Formatting weight matrix.") + sys.stdout.flush() + + # Initialising weight matrix + if self.parallel_method != "T": + intersection_df = self.comm.gather(intersection_df, root=0) + if self.master: + if self.parallel_method != "T": + intersection_df = concat(intersection_df) + intersection_df = intersection_df.set_index( + ["FID_dst", intersection_df.groupby("FID_dst").cumcount()]).rename_axis(("FID", "level")).sort_index() + intersection_df.rename(columns={"FID_src": "idx"}, inplace=True) + weight_matrix = dst_nes.copy() + weight_matrix.time = [datetime(year=2000, month=1, day=1, hour=0, second=0, microsecond=0)] + weight_matrix._full_time = [datetime(year=2000, month=1, day=1, hour=0, second=0, microsecond=0)] + weight_matrix._full_time_bnds = None + weight_matrix.time_bnds = None + weight_matrix.last_level = None + weight_matrix.first_level = 0 + weight_matrix.hours_start = 0 + weight_matrix.hours_end = 0 + + weight_matrix.set_communicator(MPI.COMM_SELF) + + weight_matrix.set_levels({"data": arange(intersection_df.index.get_level_values("level").max() + 1), + "dimensions": ("lev",), + "units": "", + "positive": "up"}) + + # Creating Weight matrix empty variables + wm_shape = weight_matrix.get_full_shape() + shape = (1, len(weight_matrix.lev["data"]), wm_shape[0], wm_shape[1],) + shape_flat = (1, len(weight_matrix.lev["data"]), wm_shape[0] * wm_shape[1],) + + weight_matrix.variables["weight"] = {"data": empty(shape_flat), "units": "-"} + weight_matrix.variables["weight"]["data"][:] = -999 + weight_matrix.variables["idx"] = {"data": empty(shape_flat), "units": "-"} + weight_matrix.variables["idx"]["data"][:] = -999 + + # Filling Weight matrix variables + for aux_lev in weight_matrix.lev["data"]: + aux_data = intersection_df.xs(level="level", key=aux_lev) + weight_matrix.variables["weight"]["data"][0, aux_lev, aux_data.index] = aux_data.loc[:, "weight"].values + weight_matrix.variables["idx"]["data"][0, aux_lev, aux_data.index] = aux_data.loc[:, "idx"].values + # Re-shaping + weight_matrix.variables["weight"]["data"] = weight_matrix.variables["weight"]["data"].reshape(shape) + weight_matrix.variables["idx"]["data"] = weight_matrix.variables["idx"]["data"].reshape(shape) + if wm_path is not None: + if info and self.master: + print("\t\tWeight matrix saved at {0}".format(wm_path)) + sys.stdout.flush() + weight_matrix.to_netcdf(wm_path) + else: + weight_matrix = True + return weight_matrix + + +# noinspection DuplicatedCode +def __lon_lat_to_cartesian(lon, lat, radius=6378137.0): + """ + Calculate lon, lat coordinates of a point on a sphere. + + DEPRECATED!!!! + + Parameters + ---------- + lon : array + Longitude values. + lat : array + Latitude values. + radius : float + Radius of the sphere to get the distances. + """ + + lon_r = radians(lon) + lat_r = radians(lat) + + x = radius * cos(lat_r) * cos(lon_r) + y = radius * cos(lat_r) * sin(lon_r) + z = radius * sin(lat_r) + + return column_stack([x, y, z]) + + +def __lon_lat_to_cartesian_ecef(lon, lat): + """ + Convert observational/model geographic longitude/latitude coordinates to cartesian ECEF (Earth Centred, + Earth Fixed) coordinates, assuming WGS84 datum and ellipsoid, and that all heights = 0. + ECEF coordinates represent positions (in meters) as X, Y, Z coordinates, approximating the earth surface + as an ellipsoid of revolution. + This conversion is for the subsequent calculation of Euclidean distances of the model grid cell centres + from each observational station. + Defining the distance between two points on the earth's surface as simply the Euclidean distance + between the two lat/lon pairs could lead to inaccurate results depending on the distance + between two points (i.e. 1 deg. of longitude varies with latitude). + + Parameters + ---------- + lon : array + Longitude values. + lat : array + Latitude values. + """ + + lla = Proj(proj="latlong", ellps="WGS84", datum="WGS84") + ecef = Proj(proj="geocent", ellps="WGS84", datum="WGS84") + + # x, y, z = pyproj.transform(lla, ecef, lon, lat, zeros(lon.shape), radians=False) + # Deprecated: https://pyproj4.github.io/pyproj/stable/gotchas.html#upgrading-to-pyproj-2-from-pyproj-1 + transformer = Transformer.from_proj(lla, ecef) + x, y, z = transformer.transform(lon, lat, zeros(lon.shape), radians=False) + return column_stack([x, y, z]) diff --git a/build/lib/nes/methods/spatial_join.py b/build/lib/nes/methods/spatial_join.py new file mode 100644 index 0000000..eb35864 --- /dev/null +++ b/build/lib/nes/methods/spatial_join.py @@ -0,0 +1,305 @@ +#!/usr/bin/env python + +import sys +from warnings import warn, filterwarnings +from geopandas import sjoin_nearest, sjoin, read_file +from pandas import DataFrame +from numpy import array, uint32, nan +from shapely.errors import TopologicalError + + +def spatial_join(self, ext_shp, method=None, var_list=None, info=False, apply_bbox=True): + """ + Compute overlay intersection of two GeoPandasDataFrames. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + ext_shp : GeoPandasDataFrame or str + File or path from where the data will be obtained on the intersection. + method : str + Overlay method. Accepted values: ["nearest", "intersection", "centroid"]. + var_list : List or None or str + Variables that will be included in the resulting shapefile. + info : bool + Indicates if you want to print the process info. + apply_bbox : bool + Indicates if you want to reduce the shapefile to a bbox. + """ + + if self.master and info: + print("Starting spatial join") + if isinstance(var_list, str): + # Transforming string (variable name) to a list with length 0 + var_list = [var_list] + + # Create source shapefile if it does not exist + if self.shapefile is None: + if self.master and info: + print("\tCreating shapefile") + sys.stdout.flush() + self.create_shapefile() + + ext_shp = __prepare_external_shapefile(self, ext_shp=ext_shp, var_list=var_list, info=info, + apply_bbox=apply_bbox) + + if method == "nearest": + # Nearest centroids to the shapefile polygons + __spatial_join_nearest(self, ext_shp=ext_shp, info=info) + elif method == "intersection": + # Intersect the areas of the shapefile polygons, outside the shapefile there will be NaN + __spatial_join_intersection(self, ext_shp=ext_shp, info=info) + elif method == "centroid": + # Centroids that fall on the shapefile polygons, outside the shapefile there will be NaN + __spatial_join_centroid(self, ext_shp=ext_shp, info=info) + + else: + accepted_values = ["nearest", "intersection", "centroid"] + raise NotImplementedError("{0} is not implemented. Choose from: {1}".format(method, accepted_values)) + + return None + + +def __prepare_external_shapefile(self, ext_shp, var_list, info=False, apply_bbox=True): + """ + Prepare the external shapefile. + + It is high recommended to pass ext_shp parameter as string because it will clip the external shapefile to the rank. + + 1. Read if it is not already read + 2. Filter variables list + 3. Standardize projections + + Parameters + ---------- + self : nes.Nes + A Nes Object. + ext_shp : geopandas.GeoDataFrame or str + External shapefile or path to it. + var_list : List[str] or None + External shapefile variables to be computed. + info : bool + Indicates if you want to print the information. + apply_bbox : bool + Indicates if you want to reduce the shapefile to a bbox. + + Returns + ------- + GeoDataFrame + External shapefile. + """ + + if isinstance(ext_shp, str): + # Reading external shapefile + if self.master and info: + print("\tReading external shapefile") + # ext_shp = read_file(ext_shp, include_fields=var_list, mask=self.shapefile.geometry) + if apply_bbox: + ext_shp = read_file(ext_shp, include_fields=var_list, bbox=__get_bbox(self)) + else: + ext_shp = read_file(ext_shp, include_fields=var_list) + else: + msg = "WARNING!!! " + msg += "External shapefile already read. If you pass the path to the shapefile instead of the opened shapefile " + msg += "a best usage of memory is performed because the external shape will be clipped while reading." + warn(msg) + sys.stderr.flush() + ext_shp.reset_index(inplace=True) + if var_list is not None: + ext_shp = ext_shp.loc[:, var_list + ["geometry"]] + + self.comm.Barrier() + if self.master and info: + print("\t\tReading external shapefile done!") + + # Standardizing projection + ext_shp = ext_shp.to_crs(self.shapefile.crs) + + return ext_shp + + +def __get_bbox(self): + """ + Obtain the bounding box of the rank data (lon_min, lat_min, lon_max, lat_max). + + Parameters + ---------- + self : nes.Nes + A Nes Object. + + Returns + ------- + tuple + Bounding box + """ + + bbox = (self.lon_bnds["data"].min(), self.lat_bnds["data"].min(), + self.lon_bnds["data"].max(), self.lat_bnds["data"].max(), ) + + return bbox + + +# noinspection DuplicatedCode +def __spatial_join_nearest(self, ext_shp, info=False): + """ + Perform the spatial join using the nearest method. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + ext_shp : GeoDataFrame + External shapefile. + info : bool + Indicates if you want to print the information. + """ + + if self.master and info: + print("\tNearest spatial joint") + sys.stdout.flush() + grid_shp = self.get_centroids_from_coordinates() + + # From geodetic coordinates (e.g. 4326) to meters (e.g. 4328) to use sjoin_nearest + # TODO: Check if the projection 4328 does not distort the coordinates too much + # https://gis.stackexchange.com/questions/372564/ + # userwarning-when-trying-to-get-centroid-from-a-polygon-geopandas + # ext_shp = ext_shp.to_crs("EPSG:4328") + # grid_shp = grid_shp.to_crs("EPSG:4328") + + # Calculate spatial joint by distance + aux_grid = sjoin_nearest(grid_shp, ext_shp, distance_col="distance") + + # Get data from closest shapes to centroids + del aux_grid["geometry"], aux_grid["index_right"] + self.shapefile.loc[aux_grid.index, aux_grid.columns] = aux_grid + + var_list = list(ext_shp.columns) + var_list.remove("geometry") + for var_name in var_list: + self.shapefile.loc[:, var_name] = array(self.shapefile.loc[:, var_name], dtype=ext_shp[var_name].dtype) + + return None + + +# noinspection DuplicatedCode +def __spatial_join_centroid(self, ext_shp, info=False): + """ + Perform the spatial join using the centroid method. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + ext_shp : GeoDataFrame + External shapefile. + info : bool + Indicates if you want to print the information. + """ + + if self.master and info: + print("\tCentroid spatial join") + sys.stdout.flush() + if info and self.master: + print("\t\tCalculating centroids") + sys.stdout.flush() + + # Get centroids + grid_shp = self.get_centroids_from_coordinates() + + # Calculate spatial joint + if info and self.master: + print("\t\tCalculating centroid spatial join") + sys.stdout.flush() + aux_grid = sjoin(grid_shp, ext_shp, predicate="within") + + # Get data from shapes where there are centroids, rest will be NaN + del aux_grid["geometry"], aux_grid["index_right"] + self.shapefile.loc[aux_grid.index, aux_grid.columns] = aux_grid + + var_list = list(ext_shp.columns) + var_list.remove("geometry") + for var_name in var_list: + self.shapefile.loc[:, var_name] = array(self.shapefile.loc[:, var_name], dtype=ext_shp[var_name].dtype) + + return None + + +def __spatial_join_intersection(self, ext_shp, info=False): + """ + Perform the spatial join using the intersection method. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + ext_shp : GeoDataFrame + External shapefile. + info : bool + Indicates if you want to print the information. + """ + + var_list = list(ext_shp.columns) + var_list.remove("geometry") + + grid_shp = self.shapefile + grid_shp["FID_grid"] = grid_shp.index + grid_shp = grid_shp.reset_index() + + # Get intersected areas + # inp, res = ext_shp.sindex.query(grid_shp.geometry, predicate="intersects") + inp, res = grid_shp.sindex.query(ext_shp.geometry, predicate="intersects") + + if info: + print("\t\tRank {0:03d}: {1} intersected areas found".format(self.rank, len(inp))) + sys.stdout.flush() + + # Calculate intersected areas and fractions + intersection = DataFrame(columns=["FID", "ext_shp_id", "weight"]) + intersection["FID"] = array(grid_shp.loc[res, "FID_grid"], dtype=uint32) + intersection["ext_shp_id"] = array(inp, dtype=uint32) + + if len(intersection) > 0: + if True: + # No Warnings Zone + counts = intersection["FID"].value_counts() + filterwarnings("ignore") + intersection.loc[:, "weight"] = 1. + + for i, row in intersection.iterrows(): + if isinstance(i, int) and i % 1000 == 0 and info: + print("\t\t\tRank {0:03d}: {1:.3f} %".format(self.rank, i * 100 / len(intersection))) + sys.stdout.flush() + # Filter to do not calculate percentages over 100% grid cells spatial joint + if counts[row["FID"]] > 1: + try: + intersection.loc[i, "weight"] = grid_shp.loc[res[i], "geometry"].intersection( + ext_shp.loc[inp[i], "geometry"]).area / grid_shp.loc[res[i], "geometry"].area + except TopologicalError: + # If for some reason the geometry is corrupted it should work with the buffer function + ext_shp.loc[[inp[i]], "geometry"] = ext_shp.loc[[inp[i]], "geometry"].buffer(0) + intersection.loc[i, "weight"] = grid_shp.loc[res[i], "geometry"].intersection( + ext_shp.loc[inp[i], "geometry"]).area / grid_shp.loc[res[i], "geometry"].area + # intersection["intersect_area"] = intersection.apply( + # lambda x: x["geometry_grid"].intersection(x["geometry_ext"]).area, axis=1) + intersection.drop(intersection[intersection["weight"] <= 0].index, inplace=True) + + filterwarnings("default") + + # Choose the biggest area from intersected areas with multiple options + intersection.sort_values("weight", ascending=False, inplace=True) + intersection = intersection.drop_duplicates(subset="FID", keep="first") + intersection = intersection.sort_values("FID").set_index("FID") + + for var_name in var_list: + self.shapefile.loc[intersection.index, var_name] = array( + ext_shp.loc[intersection["ext_shp_id"], var_name]) + + else: + for var_name in var_list: + self.shapefile.loc[:, var_name] = nan + + for var_name in var_list: + self.shapefile.loc[:, var_name] = array(self.shapefile.loc[:, var_name], dtype=ext_shp[var_name].dtype) + + return None diff --git a/build/lib/nes/methods/vertical_interpolation.py b/build/lib/nes/methods/vertical_interpolation.py new file mode 100644 index 0000000..23ca712 --- /dev/null +++ b/build/lib/nes/methods/vertical_interpolation.py @@ -0,0 +1,335 @@ +#!/usr/bin/env python + +import sys +from numpy import nan, flip, cumsum, nanmean, empty, ndarray, ma, float64, array, interp, where +from scipy.interpolate import interp1d +from copy import copy + + +def add_4d_vertical_info(self, info_to_add): + """ + To add the vertical information from other source. + + Parameters + ---------- + self : nes.Nes + Source Nes object. + info_to_add : nes.Nes, str + Nes object with the vertical information as variable or str with the path to the NetCDF file that contains + the vertical data. + """ + + vertical_var = list(self.concatenate(info_to_add)) + self.vertical_var_name = vertical_var[0] + + return None + + +def __parse_extrapolate(extrapolate) -> tuple: + """ + Parses the "extrapolate" parameter and returns a tuple representing the extrapolation options. + + Parameters + ---------- + extrapolate : bool or tuple or None or number or NaN + If bool: + - If True, both extrapolation options are set to "extrapolate". + - If False, extrapolation options are set to ("bottom", "top"). + If tuple: + - The first element represents the extrapolation option for the lower bound. + - The second element represents the extrapolation option for the upper bound. + - If any element is bool: + - If True, it represents "extrapolate". + - If False: + - If it"s the first element, it represents "bottom". + - If it"s the second element, it represents "top". + - If any element is None, it is replaced with numpy.nan. + - Other numeric values are kept as they are. + - If any element is NaN, it is kept as NaN. + If None: + - Both extrapolation options are set to (NaN, NaN). + If number: + - Both extrapolation options are set to the provided number. + If NaN: + - Both extrapolation options are set to NaN. + + Returns + ------- + tuple + A tuple representing the extrapolation options. If the input is invalid, it returns + ("extrapolate", "extrapolate"). + """ + if isinstance(extrapolate, bool): + if extrapolate: + extrapolate_options = ("extrapolate", "extrapolate") + else: + extrapolate_options = ("bottom", "top") + elif isinstance(extrapolate, tuple): + extrapolate_options = [None, None] + for i in range(len(extrapolate)): + if isinstance(extrapolate[i], bool): + if extrapolate[i]: + extrapolate_options[i] = "extrapolate" + else: + if i == 0: + extrapolate_options[i] = "bottom" + else: + extrapolate_options[i] = "top" + elif extrapolate[i] is None: + extrapolate_options[i] = nan + else: + extrapolate_options[i] = extrapolate[i] + extrapolate_options = tuple(extrapolate_options) + elif extrapolate is None: + extrapolate_options = ("bottom", "top") + else: + extrapolate_options = (extrapolate, extrapolate) + + return extrapolate_options + + +def interpolate_vertical(self, new_levels, new_src_vertical=None, kind="linear", extrapolate_options=False, info=None, + overwrite=False): + """ + Vertical interpolation. + + Parameters + ---------- + self : Nes + Source Nes object. + new_levels : List + A List of new vertical levels. + new_src_vertical : nes.Nes, str + Nes object with the vertical information as variable or str with the path to the NetCDF file that contains + the vertical data. + kind : str + Vertical methods type. + extrapolate_options : bool or tuple or None or number or NaN + If bool: + - If True, both extrapolation options are set to "extrapolate". + - If False, extrapolation options are set to ("bottom", "top"). + If tuple: + - The first element represents the extrapolation option for the lower bound. + - The second element represents the extrapolation option for the upper bound. + - If any element is bool: + - If True, it represents "extrapolate". + - If False: + - If it"s the first element, it represents "bottom". + - If it"s the second element, it represents "top". + - If any element is None, it is replaced with numpy.nan. + - Other numeric values are kept as they are. + - If any element is NaN, it is kept as NaN. + If None: + - Both extrapolation options are set to (NaN, NaN). + If number: + - Both extrapolation options are set to the provided number. + If NaN: + - Both extrapolation options are set to NaN. + info: None, bool + Indicates if you want to print extra information. + overwrite: bool + Indicates if you want to compute the vertical interpolation in the same object or not. + """ + src_levels_aux = None + fill_value = None + + extrapolate_options = __parse_extrapolate(extrapolate_options) + do_extrapolation = "extrapolate" in extrapolate_options + + if len(self.lev) == 1: + raise RuntimeError("1D data cannot be vertically interpolated.") + if not overwrite: + self = self.copy(copy_vars=True) + if info is None: + info = self.info + + if new_src_vertical is not None: + self.add_4d_vertical_info(new_src_vertical) + if new_levels[0] > new_levels[-1]: + ascendant = False + else: + ascendant = True + + nz_new = len(new_levels) + + if self.vertical_var_name is None: + # To use current level data + current_level = True + # Checking old order + src_levels = self.lev["data"] + if src_levels[0] > src_levels[-1]: + if not ascendant: + do_flip = False + else: + do_flip = True + src_levels = flip(src_levels) + else: + if ascendant: + do_flip = False + else: + do_flip = True + src_levels = flip(src_levels) + else: + current_level = False + src_levels = self.variables[self.vertical_var_name]["data"] + if self.vertical_var_name == "layer_thickness": + src_levels = flip(cumsum(flip(src_levels, axis=1), axis=1)) + else: + # src_levels = flip(src_levels, axis=1) + pass + # Checking old order + if nanmean(src_levels[:, 0, :, :]) > nanmean(src_levels[:, -1, :, :]): + if not ascendant: + do_flip = False + else: + do_flip = True + src_levels = flip(src_levels, axis=1) + else: + if ascendant: + do_flip = False + else: + do_flip = True + src_levels = flip(src_levels, axis=1) + + # Loop over variables + for var_name in self.variables.keys(): + if self.variables[var_name]["data"] is None: + # Load data if it is not loaded yet + self.load(var_name) + + if var_name != self.vertical_var_name: + if do_flip: + self.variables[var_name]["data"] = flip(self.variables[var_name]["data"], axis=1) + if info and self.master: + print("\t{var} vertical methods".format(var=var_name)) + sys.stdout.flush() + nt, nz, ny, nx = self.variables[var_name]["data"].shape + dst_data = empty((nt, nz_new, ny, nx), dtype=self.variables[var_name]["data"].dtype) + for t in range(nt): + # if info and self.rank == self.size - 1: + if self.info and self.master: + print("\t\t{3} time step {0} ({1}/{2}))".format(self.time[t], t + 1, nt, var_name)) + sys.stdout.flush() + for j in range(ny): + for i in range(nx): + if len(src_levels.shape) == 1: + # To use 1D level information + curr_level_values = src_levels + else: + # To use 4D level data + curr_level_values = src_levels[t, :, j, i] + try: + # Check if all values are identical or masked + if ((isinstance(curr_level_values, ndarray) and + (curr_level_values == curr_level_values[0]).all()) or + (isinstance(curr_level_values, ma.core.MaskedArray) and + curr_level_values.mask.all())): + kind = "slinear" + else: + kind = kind # "cubic" + + # Filtering filling values to extrapolation + fill_value = [nan, nan] + if "bottom" in extrapolate_options: + if ascendant: + fill_value[0] = float64(self.variables[var_name]["data"][t, 0, j, i]) + else: + fill_value[0] = float64(self.variables[var_name]["data"][t, -1, j, i]) + else: + fill_value[0] = extrapolate_options[0] + if "top" in extrapolate_options: + if ascendant: + fill_value[1] = float64(self.variables[var_name]["data"][t, -1, j, i]) + else: + fill_value[1] = float64(self.variables[var_name]["data"][t, 0, j, i]) + else: + fill_value[1] = extrapolate_options[1] + fill_value = tuple(fill_value) + + # We force the methods with float64 to avoid negative values + # We don"t know why the negatives appears with float34 + if current_level: + # 1D vertical component + src_levels_aux = src_levels + else: + # 4D vertical component + src_levels_aux = src_levels[t, :, j, i] + + if kind == "linear" and ascendant and not do_extrapolation: + dst_data[t, :, j, i] = array( + interp(new_levels, + array(src_levels_aux, dtype=float64), + array(self.variables[var_name]["data"][t, :, j, i], dtype=float64), + left=fill_value[0], right=fill_value[1]), + dtype=self.variables[var_name]["data"].dtype) + else: + if not do_extrapolation: + dst_data[t, :, j, i] = array( + interp1d(array(src_levels_aux, dtype=float64), + array(self.variables[var_name]["data"][t, :, j, i], dtype=float64), + kind=kind, + bounds_error=False, + fill_value=fill_value)(new_levels), + dtype=self.variables[var_name]["data"].dtype) + else: + # If extrapolation first we need to extrapolate all (below & above) + dst_data[t, :, j, i] = array( + interp1d(array(src_levels_aux, dtype=float64), + array(self.variables[var_name]["data"][t, :, j, i], + dtype=float64), + kind=kind, + bounds_error=False, + fill_value="extrapolate")(new_levels), + dtype=self.variables[var_name]["data"].dtype) + # Check values below the lower vertical level + if fill_value[0] != "extrapolate": + if ascendant: + idx_bellow = where(new_levels < src_levels_aux[0]) + else: + idx_bellow = where(new_levels > src_levels_aux[0]) + dst_data[t, idx_bellow, j, i] = fill_value[0] + # Check values above the upper vertical level + if fill_value[1] != "extrapolate": + if ascendant: + idx_above = where(new_levels > src_levels_aux[-1]) + else: + idx_above = where(new_levels < src_levels_aux[-1]) + dst_data[t, idx_above, j, i] = fill_value[1] + except Exception as e: + print("time lat lon", t, j, i) + print("***********************") + print("LEVELS", src_levels_aux) + print("DATA", array(self.variables[var_name]["data"][t, :, j, i], dtype=float64)) + print("METHOD", kind) + print("FILL_VALUE", fill_value) + print("+++++++++++++++++++++++") + raise Exception(str(e)) + # if level_array is not None: + # dst_data[t, :, j, i] = array(f(level_array), dtype=float32) + + self.variables[var_name]["data"] = copy(dst_data) + # print(self.variables[var_name]["data"]) + + # Update level information + new_lev_info = {"data": array(new_levels)} + if "positive" in self.lev.keys(): + # Vertical level direction + if flip: + self.reverse_level_direction() + new_lev_info["positive"] = self.lev["positive"] + + if self.vertical_var_name is not None: + for var_attr, attr_info in self.variables[self.vertical_var_name].items(): + if var_attr not in ["data", "dimensions", "crs", "grid_mapping"]: + new_lev_info[var_attr] = copy(attr_info) + self.free_vars(self.vertical_var_name) + self.vertical_var_name = None + + self.set_levels(new_lev_info) + + # Remove original file information + self.__ini_path = None + self.dataset = None + self.dataset = None + + return self diff --git a/build/lib/nes/nc_projections/__init__.py b/build/lib/nes/nc_projections/__init__.py new file mode 100644 index 0000000..4839ec5 --- /dev/null +++ b/build/lib/nes/nc_projections/__init__.py @@ -0,0 +1,15 @@ +from .default_nes import Nes +from .latlon_nes import LatLonNes +from .rotated_nes import RotatedNes +from .rotated_nested_nes import RotatedNestedNes +from .points_nes import PointsNes +from .points_nes_ghost import PointsNesGHOST +from .points_nes_providentia import PointsNesProvidentia +from .lcc_nes import LCCNes +from .mercator_nes import MercatorNes +# from .raster_nes import RasterNes + +__all__ = [ + 'MercatorNes', 'Nes', 'LatLonNes', 'RotatedNes', 'RotatedNestedNes', 'PointsNes', 'PointsNesGHOST', + 'PointsNesProvidentia', 'LCCNes', +] diff --git a/build/lib/nes/nc_projections/default_nes.py b/build/lib/nes/nc_projections/default_nes.py new file mode 100644 index 0000000..d7c28c9 --- /dev/null +++ b/build/lib/nes/nc_projections/default_nes.py @@ -0,0 +1,4252 @@ +#!/usr/bin/env python + +import sys +from gc import collect +from warnings import warn +from numpy import (array, ndarray, abs, mean, diff, dstack, append, tile, empty, unique, stack, vstack, full, isnan, + flipud, nan, float32, float64, ma, generic, character, issubdtype, arange, newaxis, concatenate, + split, cumsum, zeros, column_stack) +from pandas import Index, concat +from geopandas import GeoDataFrame +from datetime import timedelta, datetime +from netCDF4 import Dataset, num2date, date2num, stringtochar +from mpi4py import MPI +from shapely.geometry import Polygon, Point +from copy import deepcopy, copy +from dateutil.relativedelta import relativedelta +from typing import Union, List, Dict, Any +from pyproj import Proj, Transformer +from ..methods import vertical_interpolation, horizontal_interpolation, cell_measures, spatial_join +from ..nes_formats import to_netcdf_cams_ra, to_netcdf_monarch, to_monarch_units, to_netcdf_cmaq, to_cmaq_units, \ + to_netcdf_wrf_chem, to_wrf_chem_units + + +class Nes(object): + """ + A class to handle netCDF data with parallel processing capabilities using MPI. + + Attributes + ---------- + comm : MPI.Comm + MPI communicator. + rank : int + MPI rank. + master : bool + True when rank == 0. + size : int + Size of the communicator. + info : bool + Indicates if you want to print reading/writing info. + __ini_path : str + Path to the original file to read when open_netcdf is called. + hours_start : int + Number of hours to avoid from the first original values. + hours_end : int + Number of hours to avoid from the last original values. + dataset : Dataset + netcdf4-python Dataset. + variables : Dict[str, Dict[str, Any]] + Variables information. The dictionary structure is: + { + var_name: { + "data": ndarray or None, # Array values or None if the variable is not loaded. + attr_name: attr_value, # Variable attributes. + ... + }, + ... + } + _full_time : List[datetime] + Complete list of original time step values. + _full_lev : Dict[str, array] + Vertical level dictionary with the complete "data" key for all the values and the rest of the attributes. + { + "data": ndarray, # Array of vertical level values. + attr_name: attr_value, # Vertical level attributes. + ... + } + _full_lat : dict + Latitudes dictionary with the complete "data" key for all the values and the rest of the attributes. + { + "data": ndarray, # Array of latitude values. + attr_name: attr_value, # Latitude attributes. + ... + } + _full_lon : dict + Longitudes dictionary with the complete "data" key for all the values and the rest of the attributes. + { + "data": ndarray, # Array of longitude values. + attr_name: attr_value, # Longitude attributes. + ... + } + _full_lat_bnds : dict + Latitude bounds dictionary with the complete "data" key for the latitudinal boundaries of each grid and the + rest of the attributes. + { + "data": ndarray, # Array of latitude bounds. + attr_name: attr_value, # Latitude bounds attributes. + ... + } + _full_lon_bnds : dict + Longitude bounds dictionary with the complete "data" key for the longitudinal boundaries of each grid and the + rest of the attributes. + { + "data": ndarray, # Array of longitude bounds. + attr_name: attr_value, # Longitude bounds attributes. + ... + } + parallel_method : str + Parallel method to read/write. Can be chosen from any of the following axes to parallelize: "T", "Y", or "X". + read_axis_limits : dict + Dictionary with the 4D limits of the rank data to read. Structure: + { + "t_min": int, "t_max": int, # Time axis limits. + "z_min": int, "z_max": int, # Vertical axis limits. + "y_min": int, "y_max": int, # Latitudinal axis limits. + "x_min": int, "x_max": int, # Longitudinal axis limits. + } + write_axis_limits : dict + Dictionary with the 4D limits of the rank data to write. Structure: + { + "t_min": int, "t_max": int, # Time axis limits. + "z_min": int, "z_max": int, # Vertical axis limits. + "y_min": int, "y_max": int, # Latitudinal axis limits. + "x_min": int, "x_max": int, # Longitudinal axis limits. + } + time : List[datetime] + List of time steps of the rank data. + lev : dict + Vertical levels dictionary with the portion of "data" corresponding to the rank values. Structure: + { + "data": ndarray, # Array of vertical level values for the rank. + attr_name: attr_value, # Vertical level attributes. + ... + } + lat : dict + Latitudes dictionary with the portion of "data" corresponding to the rank values. Structure: + { + "data": ndarray, # Array of latitude values for the rank. + attr_name: attr_value, # Latitude attributes. + ... + } + lon : dict + Longitudes dictionary with the portion of "data" corresponding to the rank values. Structure: + { + "data": ndarray, # Array of longitude values for the rank. + attr_name: attr_value, # Longitude attributes. + ... + } + lat_bnds : dict + Latitude bounds dictionary with the portion of "data" for the latitudinal boundaries corresponding to the rank + values. + Structure: + { + "data": ndarray, # Array of latitude bounds for the rank. + attr_name: attr_value, # Latitude bounds attributes. + ... + } + lon_bnds : dict + Longitude bounds dictionary with the portion of "data" for the longitudinal boundaries corresponding to the + rank values. + Structure: + { + "data": ndarray, # Array of longitude bounds for the rank. + attr_name: attr_value, # Longitude bounds attributes. + ... + } + global_attrs : dict + Global attributes with the attribute name as key and data as values. Structure: + { + attr_name: attr_value, # Global attribute name and value. + ... + } + _var_dim : tuple + Name of the Y and X dimensions for the variables. + _lat_dim : tuple + Name of the dimensions of the Latitude values. + _lon_dim : tuple + Name of the dimensions of the Longitude values. + projection : Proj + Grid projection. + projection_data : dict + Dictionary with the projection information. Structure: + { + proj_param: proj_value, # Projection parameters. + ... + } + """ + def __init__(self, comm: Union[MPI.Comm, None] = None, path: Union[str, None] = None, info: bool = False, + dataset: Union[Dataset, None] = None, parallel_method: str = "Y", avoid_first_hours: int = 0, + avoid_last_hours: int = 0, first_level: int = 0, last_level: Union[int, None] = None, + create_nes: bool = False, balanced: bool = False, times: Union[List[datetime], None] = None, + **kwargs) -> None: + """ + Initialize the Nes class + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset or None + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default over Y axis + accepted values: ["X", "Y", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int or None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : List[datetime] or None + List of times to substitute the current ones while creation. + """ + + # MPI Initialization + if comm is None: + self.comm = MPI.COMM_WORLD + else: + self.comm = comm + self.rank = self.comm.Get_rank() + self.master = self.rank == 0 + self.size = self.comm.Get_size() + + # General info + self.info = info + self.__ini_path = path + self.shapefile = None + + # Selecting info + self.hours_start = avoid_first_hours + self.hours_end = avoid_last_hours + self.first_level = first_level + self.last_level = last_level + self.lat_min = None + self.lat_max = None + self.lon_min = None + self.lon_max = None + self.balanced = balanced + + # Define parallel method + self.parallel_method = parallel_method + self.serial_nc = None # Place to store temporally the serial Nes instance + + # Get minor and major axes of Earth + self.earth_radius = self.get_earth_radius("WGS84") + + # Time resolution and climatology will be modified, if needed, during the time variable reading + self._time_resolution = "hours" + self._climatology = False + self._climatology_var_name = "climatology_bounds" # Default var_name but can be changed if the input is dif + + # NetCDF object + if create_nes: + + self.dataset = None + + # Set string length + self.strlen = None + + # Initialize variables + self.variables = {} + + # Projection data This is duplicated due to if it is needed to create the object NES needs that info to + # create coordinates data. + self.projection_data = self._get_projection_data(create_nes, **kwargs) + self.projection = self._get_pyproj_projection() + + # Complete dimensions + self._full_time = times + + self._full_time_bnds = self.__get_time_bnds(create_nes) + self._full_lat_bnds, self._full_lon_bnds = self.__get_coordinates_bnds(create_nes) + self._full_lev = {"data": array([0]), "units": "", "positive": "up"} + self._full_lat, self._full_lon = self._create_centre_coordinates(**kwargs) + + # Set axis limits for parallel reading + self.read_axis_limits = self._get_read_axis_limits() + self.write_axis_limits = self._get_write_axis_limits() + + # Dimensions screening + self.time = self.get_full_times()[self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"]] + self.time_bnds = self.get_full_time_bnds() + self.lev = self.get_full_levels() + self.lat_bnds = self.get_full_latitudes_boundaries() + self.lon_bnds = self.get_full_longitudes_boundaries() + + # Cell measures screening + self.cell_measures = self.__get_cell_measures(create_nes) + + # Set NetCDF attributes + self.global_attrs = self.__get_global_attributes(create_nes) + + else: + if dataset is not None: + self.dataset = dataset + elif self.__ini_path is not None: + self._open() + + # Get string length + self.strlen = self._get_strlen() + + # Lazy variables + self.variables = self._get_lazy_variables() + + # Complete dimensions + self._full_time = self.__get_time() + self._full_time_bnds = self.__get_time_bnds() + self._full_lev = self._get_coordinate_dimension(["lev", "level", "lm", "plev"]) + self._full_lat = self._get_coordinate_dimension(["lat", "latitude", "latitudes"]) + self._full_lon = self._get_coordinate_dimension(["lon", "longitude", "longitudes"]) + self._full_lat_bnds, self._full_lon_bnds = self.__get_coordinates_bnds() + + # Complete cell measures + self._cell_measures = self.__get_cell_measures() + + # Set axis limits for parallel reading + self.read_axis_limits = self._get_read_axis_limits() + # Set axis limits for parallel writing + self.write_axis_limits = self._get_write_axis_limits() + + # Dimensions screening + self.time = self.get_full_times()[self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"]] + self.time_bnds = self.get_full_time_bnds() + self.lev = self._get_coordinate_values(self.get_full_levels(), "Z") + self.lat = self._get_coordinate_values(self.get_full_latitudes(), "Y") + self.lon = self._get_coordinate_values(self.get_full_longitudes(), "X") + self.lat_bnds = self._get_coordinate_values(self.get_full_latitudes_boundaries(), "Y", bounds=True) + self.lon_bnds = self._get_coordinate_values(self.get_full_longitudes_boundaries(), "X", bounds=True) + + # Cell measures screening + self.cell_measures = self._get_cell_measures_values(self._cell_measures) + + # Set NetCDF attributes + self.global_attrs = self.__get_global_attributes() + + # Projection data + self.projection_data = self._get_projection_data(create_nes, **kwargs) + self.projection = self._get_pyproj_projection() + + # Writing options + self.zip_lvl = 0 + + # Dimensions information + self._var_dim = None + self._lat_dim = None + self._lon_dim = None + + self.vertical_var_name = None + + # Filtering (portion of the filter coordinates function) + idx = self._get_idx_intervals() + if self.master: + self.set_full_times(self._full_time[idx["idx_t_min"]:idx["idx_t_max"]]) + self._full_lev["data"] = self._full_lev["data"][idx["idx_z_min"]:idx["idx_z_max"]] + + self.hours_start = 0 + self.hours_end = 0 + self.last_level = None + self.first_level = None + + def __test_mpi__(self, num_test=None): + print(f"{self.rank} Barrier {num_test}") + sys.stdout.flush() + self.comm.Barrier() + if self.master: + data = 1 + else: + data = 0 + data = self.comm.bcast(data, root=0) + print(f"{self.rank} data {data}") + sys.stdout.flush() + return None + + @staticmethod + def new(comm=None, path=None, info=False, dataset=None, parallel_method="Y", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the Nes class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default over Y axis + accepted values: ["X", "Y", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int or None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : List[datetime] or None + List of times to substitute the current ones while creation. + """ + + new = Nes(comm=comm, path=path, info=info, dataset=dataset, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, + last_level=last_level, create_nes=create_nes, balanced=balanced, times=times, **kwargs) + + return new + + def _get_strlen(self): + """ + Get the strlen + + Returns + ------- + int + Max length of the string data + """ + + if "strlen" in self.dataset.dimensions: + strlen = self.dataset.dimensions["strlen"].size + else: + return None + + return strlen + + def set_strlen(self, strlen=75): + """ + Set the strlen + + 75 is the standard value used in GHOST data + + Parameters + ---------- + strlen : int or None + Max length of the string + """ + + self.strlen = strlen + + return None + + def __del__(self): + """ + To delete the Nes object and close all the open datasets. + """ + + self.close() + try: + self.free_vars(list(self.variables.keys())) + del self.variables + del self.time + del self._full_time + del self.time_bnds + del self._full_time_bnds + del self.lev + del self._full_lev + del self.lat + del self._full_lat + del self.lon + del self._full_lon + del self._full_lat_bnds + del self.lat_bnds + del self._full_lon_bnds + del self.lon_bnds + del self.strlen + del self.shapefile + for cell_measure in self.cell_measures.keys(): + if self.cell_measures[cell_measure]["data"] is not None: + del self.cell_measures[cell_measure]["data"] + del self.cell_measures + except (AttributeError, KeyError): + pass + + del self + collect() + + return None + + def __getstate__(self): + """ + Read the CSV file that contains all the Reduce variable specifications. + + Returns + ------- + state : dict + Dictionary with the class parameters. + """ + + d = self.__dict__ + state = {k: d[k] for k in d if k not in ["comm", "variables", "dataset", "cell_measures"]} + + return state + + def __setstate__(self, state): + """ + Set the state of the class. + + Parameters + ---------- + state: dict + Dictionary with the class parameters. + """ + + self.__dict__ = state + + return None + + def __add__(self, other): + """ + Sum two NES objects + + Parameters + ---------- + other : Nes + A Nes to be summed + + Returns + ------- + Nes + Summed Nes object + """ + nessy = self.copy(copy_vars=True) + for var_name in other.variables.keys(): + if var_name not in nessy.variables.keys(): + # Create New variable + nessy.variables[var_name] = deepcopy(other.variables[var_name]) + else: + nessy.variables[var_name]["data"] += other.variables[var_name]["data"] + return nessy + + def __radd__(self, other): + if other == 0 or other is None: + return self + else: + return self.__add__(other) + + def __getitem__(self, key: str) -> Union[array, None]: + """ + Retrieve the data associated with the specified key. + + Parameters + ---------- + key : str + The key to retrieve the data for. + + Returns + ------- + Union[array, None] + The data associated with the specified key, or None if the key + does not exist. + + Notes + ----- + This method allows accessing data in the variables dictionary using + dictionary-like syntax, e.g., obj[key]["data"]. + + """ + return self.variables[key]["data"] + + def copy(self, copy_vars: bool = False): + """ + Copy the Nes object. + The copy will avoid to copy the communicator, dataset and variables by default. + + Parameters + ---------- + copy_vars: bool + Indicates if you want to copy the variables (in lazy mode). + + Returns + ------- + nessy : Nes + Copy of the Nes object. + """ + + nessy = deepcopy(self) + nessy.dataset = None + if copy_vars: + nessy.set_communicator(self.comm) + nessy.variables = deepcopy(self.variables) + nessy.cell_measures = deepcopy(self.cell_measures) + else: + nessy.variables = {} + nessy.cell_measures = {} + + return nessy + + def get_full_times(self) -> List[datetime]: + """ + Retrieve the complete list of original time step values. + + Returns + ------- + List[datetime] + The complete list of original time step values from the netCDF data. + """ + if self.master: + data = self._full_time + else: + data = None + data = self.comm.bcast(data, root=0) + + if not isinstance(data, list): + data = list(data) + return data + + def get_full_time_bnds(self) -> List[datetime]: + """ + Retrieve the complete list of original time step boundaries. + + Returns + ------- + List[datetime] + The complete list of original time step boundary values from the netCDF data. + """ + data = self.comm.bcast(self._full_time_bnds) + return data + + def get_full_levels(self) -> Dict[str, Any]: + """ + Retrieve the complete vertical level information. + + Returns + ------- + Dict[str, Any] + A dictionary containing the complete vertical level data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of vertical level values. + attr_name: attr_value, # Vertical level attributes. + ... + } + """ + data = self.comm.bcast(self._full_lev) + return data + + def get_full_latitudes(self) -> Dict[str, Any]: + """ + Retrieve the complete latitude information. + + Returns + ------- + Dict[str, Any] + A dictionary containing the complete latitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of latitude values. + attr_name: attr_value, # Latitude attributes. + ... + } + """ + data = self.comm.bcast(self._full_lat) + + return data + + def get_full_longitudes(self) -> Dict[str, Any]: + """ + Retrieve the complete longitude information. + + Returns + ------- + Dict[str, Any] + A dictionary containing the complete longitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of longitude values. + attr_name: attr_value, # Longitude attributes. + ... + } + """ + data = self.comm.bcast(self._full_lon) + return data + + def get_full_latitudes_boundaries(self) -> Dict[str, Any]: + """ + Retrieve the complete latitude boundaries information. + + Returns + ------- + Dict[str, Any] + A dictionary containing the complete latitude boundaries data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of latitude boundaries values. + attr_name: attr_value, # Latitude boundaries attributes. + ... + } + """ + data = self.comm.bcast(self._full_lat_bnds) + return data + + def get_full_longitudes_boundaries(self) -> Dict[str, Any]: + """ + Retrieve the complete longitude boundaries information. + + Returns + ------- + Dict[str, Any] + A dictionary containing the complete longitude boundaries data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of longitude boundaries values. + attr_name: attr_value, # Longitude boundaries attributes. + ... + } + """ + data = self.comm.bcast(self._full_lon_bnds) + return data + + def set_full_times(self, data: List[datetime]) -> None: + """ + Set the complete list of original time step values. + + Parameters + ---------- + data : List[datetime] + The complete list of original time step values to set. + """ + if self.master: + self._full_time = data + return None + + def set_full_time_bnds(self, data: List[datetime]) -> None: + """ + Set the complete list of original time step boundaries. + + Parameters + ---------- + data : List[datetime] + The complete list of original time step boundary values to set. + """ + if self.master: + self._full_time_bnds = data + return None + + def set_full_levels(self, data: Dict[str, Any]) -> None: + """ + Set the complete vertical level information. + + Parameters + ---------- + data : Dict[str, Any] + A dictionary containing the complete vertical level data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of vertical level values. + attr_name: attr_value, # Vertical level attributes. + ... + } + """ + if self.master: + self._full_lev = data + return None + + def set_full_latitudes(self, data: Dict[str, Any]) -> None: + """ + Set the complete latitude information. + + Parameters + ---------- + data : Dict[str, Any] + A dictionary containing the complete latitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of latitude values. + attr_name: attr_value, # Latitude attributes. + ... + } + """ + if self.master: + self._full_lat = data + return None + + def set_full_longitudes(self, data: Dict[str, Any]) -> None: + """ + Set the complete longitude information. + + Parameters + ---------- + data : Dict[str, Any] + A dictionary containing the complete longitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of longitude values. + attr_name: attr_value, # Longitude attributes. + ... + } + """ + if self.master: + self._full_lon = data + return None + + def set_full_latitudes_boundaries(self, data: Dict[str, Any]) -> None: + """ + Set the complete latitude boundaries information. + + Parameters + ---------- + data : Dict[str, Any] + A dictionary containing the complete latitude boundaries data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of latitude boundaries values. + attr_name: attr_value, # Latitude boundaries attributes. + ... + } + """ + if self.master: + self._full_lat_bnds = data + return None + + def set_full_longitudes_boundaries(self, data: Dict[str, Any]) -> None: + """ + Set the complete longitude boundaries information. + + Parameters + ---------- + data : Dict[str, Any] + A dictionary containing the complete longitude boundaries data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of longitude boundaries values. + attr_name: attr_value, # Longitude boundaries attributes. + ... + } + """ + if self.master: + self._full_lon_bnds = data + + return None + + def get_fids(self, use_read=False): + """ + Obtain the FIDs in a 2D format. + + Parameters + ---------- + use_read : bool + Indicate if you want to use the read_axis_limits + + Returns + ------- + array + 2D array with the FID data. + """ + if self.master: + fids = arange(self._full_lat["data"].shape[0] * self._full_lon["data"].shape[-1]) + fids = fids.reshape((self._full_lat["data"].shape[0], self._full_lon["data"].shape[-1])) + else: + fids = None + fids = self.comm.bcast(fids) + + if use_read: + fids = fids[self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + else: + fids = fids[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] + return fids + + def get_full_shape(self): + """ + Obtain the Full 2D shape of tha data + + Returns + ------- + tuple + 2D shape of tha data. + """ + if self.master: + shape = (self._full_lat["data"].shape[0], self._full_lon["data"].shape[-1]) + else: + shape = None + shape = self.comm.bcast(shape) + + return shape + + def set_level_direction(self, new_direction): + """ + Set the direction of the vertical level values. + + Parameters + ---------- + new_direction : str + The new direction for the vertical levels. Must be either "up" or "down". + + Returns + ------- + bool + True if the direction was set successfully. + + Raises + ------ + ValueError + If `new_direction` is not "up" or "down". + """ + if new_direction not in ["up", "down"]: + raise ValueError(f"Level direction mus be up or down. '{new_direction}' is not a valid option") + if self.master: + self._full_lev["positive"] = new_direction + self.lev["positive"] = new_direction + + return True + + def reverse_level_direction(self): + """ + Reverse the current direction of the vertical level values. + + Returns + ------- + bool + True if the direction was reversed successfully. + """ + if "positive" in self.lev.keys(): + if self.lev["positive"] == "up": + if self.master: + self._full_lev["positive"] = "down" + self.lev["positive"] = "down" + else: + if self.master: + self._full_lev["positive"] = "up" + self.lev["positive"] = "up" + return True + + def clear_communicator(self): + """ + Erase the communicator and the parallelization indexes. + """ + + self.comm = None + self.rank = 0 + self.master = 0 + self.size = 0 + + return None + + def set_communicator(self, comm): + """ + Set a new communicator and the correspondent parallelization indexes. + + Parameters + ---------- + comm: MPI.COMM + Communicator to be set. + """ + + self.comm = comm + self.rank = self.comm.Get_rank() + self.master = self.rank == 0 + self.size = self.comm.Get_size() + + self.read_axis_limits = self._get_read_axis_limits() + self.write_axis_limits = self._get_write_axis_limits() + + return None + + def set_climatology(self, is_climatology): + """ + Set whether the dataset represents climatological data. + + Parameters + ---------- + is_climatology : bool + A boolean indicating if the dataset represents climatological data. + + Returns + ------- + None + + Raises + ------ + TypeError + If `is_climatology` is not a boolean. + """ + if not isinstance(is_climatology, bool): + raise TypeError("Only boolean values are accepted") + self._climatology = is_climatology + return None + + def get_climatology(self): + """ + Get whether the dataset represents climatological data. + + Returns + ------- + bool + True if the dataset represents climatological data, False otherwise. + """ + return self._climatology + + def set_levels(self, levels): + """ + Modify the original level values with new ones. + + Parameters + ---------- + levels : dict + Dictionary with the new level information to be set. + """ + self.set_full_levels(deepcopy(levels)) + self.lev = deepcopy(levels) + + return None + + def set_time(self, time_list): + """ + Modify the original level values with new ones. + + Parameters + ---------- + time_list : List[datetime] + List of time steps + """ + if self.parallel_method == "T": + raise TypeError("Cannot set time on a 'T' parallel method") + self.set_full_times(deepcopy(time_list)) + self.time = deepcopy(time_list) + + return None + + def set_time_bnds(self, time_bnds): + """ + Modify the original time bounds values with new ones. + + Parameters + ---------- + time_bnds : List + AList with the new time bounds information to be set. + """ + + correct_format = True + for time_bnd in array(time_bnds).flatten(): + if not isinstance(time_bnd, datetime): + print("{0} is not a datetime object".format(time_bnd)) + correct_format = False + if correct_format: + if len(self.get_full_times()) == len(time_bnds): + self.set_full_time_bnds(deepcopy(time_bnds)) + self.time_bnds = deepcopy(time_bnds) + else: + msg = "WARNING!!! " + msg += "The given time bounds list has a different length than the time array. " + msg += "(time:{0}, bnds:{1}). Time bounds will not be set.".format(len(self.time), len(time_bnds)) + warn(msg) + sys.stderr.flush() + else: + msg = "WARNING!!! " + msg += "There is at least one element in the time bounds to be set that is not a datetime object. " + msg += "Time bounds will not be set." + warn(msg) + sys.stderr.flush() + + return None + + def set_time_resolution(self, new_resolution): + """ + Set the time resolution for the dataset. + + Parameters + ---------- + new_resolution : str + The new time resolution. Accepted values are "second", "seconds", "minute", "minutes", + "hour", "hours", "day", "days". + + Returns + ------- + bool + True if the time resolution was set successfully. + + Raises + ------ + ValueError + If `new_resolution` is not one of the accepted values. + """ + accepted_resolutions = ["second", "seconds", "minute", "minutes", "hour", "hours", "day", "days"] + if new_resolution in accepted_resolutions: + self._time_resolution = new_resolution + else: + raise ValueError(f"Time resolution '{new_resolution}' is not accepted. " + + f"Use one of this: {accepted_resolutions}") + return True + + @staticmethod + def _create_single_spatial_bounds(coordinates, inc, spatial_nv=2, inverse=False): + """ + Calculate the vertices coordinates. + + Parameters + ---------- + coordinates : array + Coordinates in degrees (latitude or longitude). + inc : float + Increment between centre values. + spatial_nv : int + Non-mandatory parameter that informs the number of vertices that the boundaries must have. Default: 2. + inverse : bool + For some grid latitudes. + + Returns + ---------- + bounds : array + An Array with as many elements as vertices for each value of coords. + """ + + # Create new arrays moving the centres half increment less and more. + coords_left = coordinates - inc / 2 + coords_right = coordinates + inc / 2 + + # Defining the number of corners needed. 2 to regular grids and 4 for irregular ones. + if spatial_nv == 2: + # Create an array of N arrays of 2 elements to store the floor and the ceil values for each cell + bounds = dstack((coords_left, coords_right)) + bounds = bounds.reshape((len(coordinates), spatial_nv)) + elif spatial_nv == 4: + # Create an array of N arrays of 4 elements to store the corner values for each cell + # It can be stored in clockwise starting form the left-top element, or in inverse mode. + if inverse: + bounds = dstack((coords_left, coords_left, coords_right, coords_right)) + else: + bounds = dstack((coords_left, coords_right, coords_right, coords_left)) + else: + raise ValueError("The number of vertices of the boundaries must be 2 or 4.") + + return bounds + + def create_spatial_bounds(self): + """ + Calculate longitude and latitude bounds and set them. + """ + # Latitudes + full_lat = self.get_full_latitudes() + inc_lat = abs(mean(diff(full_lat["data"]))) + lat_bnds = self._create_single_spatial_bounds(full_lat["data"], inc_lat, spatial_nv=2) + + self.set_full_latitudes_boundaries({"data": deepcopy(lat_bnds)}) + self.lat_bnds = {"data": lat_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], :]} + + # Longitudes + full_lon = self.get_full_longitudes() + inc_lon = abs(mean(diff(full_lon["data"]))) + lon_bnds = self._create_single_spatial_bounds(full_lon["data"], inc_lon, spatial_nv=2) + + self.set_full_longitudes_boundaries({"data": deepcopy(lon_bnds)}) + self.lon_bnds = {"data": lon_bnds[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :]} + + return None + + def get_spatial_bounds_mesh_format(self): + """ + Get the spatial bounds in the pcolormesh format: + + see: https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.pcolormesh.html + + Returns + ------- + lon_bnds_mesh : numpy.ndarray + Longitude boundaries in the mesh format + lat_bnds_mesh : numpy.ndarray + Latitude boundaries in the mesh format + """ + if self.size > 1: + raise RuntimeError("NES.get_spatial_bounds_mesh_format() function only works in serial mode.") + if self.lat_bnds is None: + self.create_spatial_bounds() + + if self.lat_bnds["data"].shape[-1] == 2: + # get the lat_b and lon_b first rows + lat_b_0 = append(self.lat_bnds["data"][:, 0], self.lat_bnds["data"][-1, -1]) + lon_b_0 = append(self.lon_bnds["data"][:, 0], self.lon_bnds["data"][-1, -1]) + # expand lat_band lon_b in 2D + lat_bnds_mesh = tile(lat_b_0, (len(self.lon["data"]) + 1, 1)).transpose() + lon_bnds_mesh = tile(lon_b_0, (len(self.lat["data"]) + 1, 1)) + + elif self.lat_bnds["data"].shape[-1] == 4: + # Irregular quadrilateral polygon cell definition + lat_bnds_mesh = empty((self.lat["data"].shape[0] + 1, self.lat["data"].shape[1] + 1)) + lat_bnds_mesh[:-1, :-1] = self.lat_bnds["data"][:, :, 0] + lat_bnds_mesh[:-1, 1:] = self.lat_bnds["data"][:, :, 1] + lat_bnds_mesh[1:, 1:] = self.lat_bnds["data"][:, :, 2] + lat_bnds_mesh[1:, :-1] = self.lat_bnds["data"][:, :, 3] + + lon_bnds_mesh = empty((self.lat["data"].shape[0] + 1, self.lat["data"].shape[1] + 1)) + lon_bnds_mesh[:-1, :-1] = self.lon_bnds["data"][:, :, 0] + lon_bnds_mesh[:-1, 1:] = self.lon_bnds["data"][:, :, 1] + lon_bnds_mesh[1:, 1:] = self.lon_bnds["data"][:, :, 2] + lon_bnds_mesh[1:, :-1] = self.lon_bnds["data"][:, :, 3] + else: + raise RuntimeError("Invalid number of vertices: {0}".format(self.lat_bnds["data"].shape[-1])) + + return lon_bnds_mesh, lat_bnds_mesh + + def free_vars(self, var_list): + """ + Erase the selected variables from the variables' information. + + Parameters + ---------- + var_list : List or str + List (or single string) of the variables to be loaded. + """ + + if isinstance(var_list, str): + var_list = [var_list] + + if self.variables is not None: + for var_name in var_list: + if var_name in self.variables: + if "data" in self.variables[var_name].keys(): + del self.variables[var_name]["data"] + del self.variables[var_name] + collect() + + return None + + def keep_vars(self, var_list): + """ + Keep the selected variables and erases the rest. + + Parameters + ---------- + var_list : List or str + List (or single string) of the variables to be loaded. + """ + + if isinstance(var_list, str): + var_list = [var_list] + + to_remove = list(set(self.variables.keys()).difference(set(var_list))) + + self.free_vars(to_remove) + + return None + + @property + def get_time_interval(self): + """ + Calculate the interrval of hours between time steps. + + Returns + ------- + int + Number of hours between time steps. + """ + if self.master: + time_interval = self._full_time[1] - self._full_time[0] + time_interval = int(time_interval.seconds // 3600) + else: + time_interval = None + + return self.comm.bcast(time_interval) + + def sel_time(self, time, inplace=True): + """ + To select only one time step. + + Parameters + ---------- + time : datetime + Time stamp to select. + inplace : bool + Indicates if you want a copy with the selected time step (False) or to modify te existing one (True). + + Returns + ------- + Nes + A Nes object with the data (and metadata) of the selected time step. + """ + + if not inplace: + aux_nessy = self.copy(copy_vars=False) + aux_nessy.comm = self.comm + else: + aux_nessy = self + + aux_nessy.hours_start = 0 + aux_nessy.hours_end = 0 + + idx_time = aux_nessy.time.index(time) + + aux_nessy.time = [self.time[idx_time]] + aux_nessy._full_time = aux_nessy.time + for var_name, var_info in self.variables.items(): + if copy: + aux_nessy.variables[var_name] = {} + for att_name, att_value in var_info.items(): + if att_name == "data": + if att_value is None: + raise ValueError("{} data not loaded".format(var_name)) + aux_nessy.variables[var_name][att_name] = att_value[[idx_time]] + else: + aux_nessy.variables[var_name][att_name] = att_value + else: + aux_nessy.variables[var_name]["data"] = aux_nessy.variables[var_name]["data"][[idx_time]] + + return aux_nessy + + def sel(self, hours_start=None, time_min=None, hours_end=None, time_max=None, lev_min=None, lev_max=None, + lat_min=None, lat_max=None, lon_min=None, lon_max=None): + """ + Select a slice of time, vertical level, latitude, or longitude given minimum and maximum limits. + + Parameters + ---------- + hours_start : int, optional + The number of hours from the start to begin the selection. + time_min : datetime, optional + The minimum datetime for the time selection. Mutually exclusive with `hours_start`. + hours_end : int, optional + The number of hours from the end to end the selection. + time_max : datetime, optional + The maximum datetime for the time selection. Mutually exclusive with `hours_end`. + lev_min : int, optional + The minimum vertical level index for the selection. + lev_max : int, optional + The maximum vertical level index for the selection. + lat_min : float, optional + The minimum latitude for the selection. + lat_max : float, optional + The maximum latitude for the selection. + lon_min : float, optional + The minimum longitude for the selection. + lon_max : float, optional + The maximum longitude for the selection. + + Returns + ------- + None + + Raises + ------ + ValueError + If any variables are already loaded or if mutually exclusive parameters are both provided. + + Notes + ----- + This method updates the selection criteria for the dataset and recalculates the read and write axis limits + accordingly. It also updates the time, level, latitude, and longitude slices based on the new criteria. + """ + full_time = self.get_full_times() + loaded_vars = False + for var_info in self.variables.values(): + if var_info["data"] is not None: + loaded_vars = True + if loaded_vars: + raise ValueError("Some variables have been loaded. Use select function before load.") + + # First time filter + if hours_start is not None: + if time_min is not None: + raise ValueError("Choose to select by hours_start or time_min but not both") + self.hours_start = hours_start + elif time_min is not None: + if time_min <= full_time[0]: + self.hours_start = 0 + else: + self.hours_start = int((time_min - full_time[0]).total_seconds() // 3600) + + # Last time filter + if hours_end is not None: + if time_max is not None: + raise ValueError("Choose to select by hours_end or time_max but not both") + self.hours_end = hours_end + elif time_max is not None: + if time_max >= full_time[-1]: + self.hours_end = 0 + else: + self.hours_end = int((full_time[-1] - time_max).total_seconds() // 3600) + + # Level filter + self.first_level = lev_min + self.last_level = lev_max + + # Coordinate filter + self.lat_min = lat_min + self.lat_max = lat_max + self.lon_min = lon_min + self.lon_max = lon_max + + # New axis limits + self.read_axis_limits = self._get_read_axis_limits() + + # Dimensions screening + self.time = self.get_full_times()[self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"]] + self.time_bnds = self.get_full_time_bnds() + self.lev = self._get_coordinate_values(self.get_full_levels(), "Z") + self.lat = self._get_coordinate_values(self.get_full_latitudes(), "Y") + self.lon = self._get_coordinate_values(self.get_full_longitudes(), "X") + + self.lat_bnds = self._get_coordinate_values(self.get_full_latitudes_boundaries(), "Y", bounds=True) + self.lon_bnds = self._get_coordinate_values(self.get_full_longitudes_boundaries(), "X", bounds=True) + + # Filter dimensions + self._filter_coordinates_selection() + + # Removing complete coordinates + self.write_axis_limits = self._get_write_axis_limits() + + return None + + def _filter_coordinates_selection(self): + """ + Use the selection limits to filter time, lev, lat, lon, lon_bnds and lat_bnds. + """ + + idx = self._get_idx_intervals() + + if self.master: + self._full_time = self._full_time[idx["idx_t_min"]:idx["idx_t_max"]] + self._full_lev["data"] = self._full_lev["data"][idx["idx_z_min"]:idx["idx_z_max"]] + + if len(self._full_lat["data"].shape) == 1: + # Regular projection + self._full_lat["data"] = self._full_lat["data"][idx["idx_y_min"]:idx["idx_y_max"]] + self._full_lon["data"] = self._full_lon["data"][idx["idx_x_min"]:idx["idx_x_max"]] + + if self._full_lat_bnds is not None: + self._full_lat_bnds["data"] = self._full_lat_bnds["data"][idx["idx_y_min"]:idx["idx_y_max"], :] + if self._full_lon_bnds is not None: + self._full_lon_bnds["data"] = self._full_lon_bnds["data"][idx["idx_x_min"]:idx["idx_x_max"], :] + else: + # Irregular projections + self._full_lat["data"] = self._full_lat["data"][idx["idx_y_min"]:idx["idx_y_max"], + idx["idx_x_min"]:idx["idx_x_max"]] + self._full_lon["data"] = self._full_lon["data"][idx["idx_y_min"]:idx["idx_y_max"], + idx["idx_x_min"]:idx["idx_x_max"]] + + if self._full_lat_bnds is not None: + self._full_lat_bnds["data"] = self._full_lat_bnds["data"][idx["idx_y_min"]:idx["idx_y_max"], + idx["idx_x_min"]:idx["idx_x_max"], :] + if self._full_lon_bnds is not None: + self._full_lon_bnds["data"] = self._full_lon_bnds["data"][idx["idx_y_min"]:idx["idx_y_max"], + idx["idx_x_min"]:idx["idx_x_max"], :] + + self.hours_start = 0 + self.hours_end = 0 + self.last_level = None + self.first_level = None + self.lat_min = None + self.lat_max = None + self.lon_max = None + self.lon_min = None + + return None + + def _get_projection_data(self, create_nes, **kwargs): + """ + Retrieves projection data based on grid details. + + Parameters + ---------- + create_nes : bool + Flag indicating whether to create new object (True) or use existing (False). + **kwargs : dict + Additional keyword arguments for specifying projection details. + """ + + raise NotImplementedError("Must be implemented on inner class.") + + @staticmethod + def _get_pyproj_projection(): + """ + Retrieves Pyproj projection data based on grid details. + + """ + + raise NotImplementedError("Must be implemented on inner class.") + + def _get_idx_intervals(self): + """ + Calculate the index intervals + + Returns + ------- + dict + Dictionary with the index intervals + """ + full_lat = self.get_full_latitudes() + full_lon = self.get_full_longitudes() + idx = {"idx_t_min": self._get_time_id(self.hours_start, first=True), + "idx_t_max": self._get_time_id(self.hours_end, first=False), + "idx_z_min": self.first_level, + "idx_z_max": self.last_level} + + # Axis Y + if self.lat_min is None: + idx["idx_y_min"] = 0 + else: + idx["idx_y_min"] = self._get_coordinate_id(full_lat["data"], self.lat_min, axis=0) + if self.lat_max is None: + idx["idx_y_max"] = full_lat["data"].shape[0] + else: + idx["idx_y_max"] = self._get_coordinate_id(full_lat["data"], self.lat_max, axis=0) + 1 + + if idx["idx_y_min"] > idx["idx_y_max"]: + idx_aux = copy(idx["idx_y_min"]) + idx["idx_y_min"] = idx["idx_y_max"] + idx["idx_y_max"] = idx_aux + + # Axis X + + if self.lon_min is None: + idx["idx_x_min"] = 0 + else: + if len(full_lon["data"].shape) == 1: + axis = 0 + else: + axis = 1 + idx["idx_x_min"] = self._get_coordinate_id(full_lon["data"], self.lon_min, axis=axis) + if self.lon_max is None: + idx["idx_x_max"] = full_lon["data"].shape[-1] + else: + if len(full_lon["data"].shape) == 1: + axis = 0 + else: + axis = 1 + idx["idx_x_max"] = self._get_coordinate_id(full_lon["data"], self.lon_max, axis=axis) + 1 + + if idx["idx_x_min"] > idx["idx_x_max"]: + idx_aux = copy(idx["idx_x_min"]) + idx["idx_x_min"] = idx["idx_x_max"] + idx["idx_x_max"] = idx_aux + return idx + + # ================================================================================================================== + # Statistics + # ================================================================================================================== + + def last_time_step(self): + """ + Modify variables to keep only the last time step. + """ + + if self.parallel_method == "T": + raise NotImplementedError("Statistics are not implemented on time axis parallelization method.") + aux_time = self.get_full_times()[0].replace(hour=0, minute=0, second=0, microsecond=0) + self.set_full_times([aux_time]) + self.time = [aux_time] + + for var_name, var_info in self.variables.items(): + if var_info["data"] is None: + self.load(var_name) + aux_data = var_info["data"][-1, :] + if len(aux_data.shape) == 3: + aux_data = aux_data.reshape((1, aux_data.shape[0], aux_data.shape[1], aux_data.shape[2])) + self.variables[var_name]["data"] = aux_data + self.hours_start = 0 + self.hours_end = 0 + + return None + + def daily_statistic(self, op, type_op="calendar"): + """ + Calculate daily statistic. + + Parameters + ---------- + op : str + Statistic to perform. Accepted values: "max", "mean" and "min". + type_op : str + Type of statistic to perform. Accepted values: "calendar", "alltsteps", and "withoutt0". + - "calendar": Calculate the statistic using the time metadata. It will avoid single time step by day + calculations + - "alltsteps": Calculate a single time statistic with all the time steps. + - "withoutt0": Calculate a single time statistic with all the time steps avoiding the first one. + """ + + if self.parallel_method == "T": + raise NotImplementedError("Statistics are not implemented on time axis parallel method.") + time_interval = self.get_time_interval + if type_op == "calendar": + aux_time_bounds = [] + aux_time = [] + day_list = [date_aux.day for date_aux in self.time] + for var_name, var_info in self.variables.items(): + if var_info["data"] is None: + self.load(var_name) + stat_data = None + for day in unique(day_list): + idx_first = next(i for i, val in enumerate(day_list, 0) if val == day) + idx_last = len(day_list) - next(i for i, val in enumerate(reversed(day_list), 1) if val == day) + if idx_first != idx_last: # To avoid single time step statistic + if idx_last != len(day_list): + if op == "mean": + data_aux = var_info["data"][idx_first:idx_last + 1, :, :, :].mean(axis=0) + elif op == "max": + data_aux = var_info["data"][idx_first:idx_last + 1, :, :, :].max(axis=0) + elif op == "min": + data_aux = var_info["data"][idx_first:idx_last + 1, :, :, :].min(axis=0) + else: + raise NotImplementedError(f"Statistic operation '{op}' is not implemented.") + aux_time_bounds.append([self.time[idx_first], self.time[idx_last]]) + else: + if op == "mean": + data_aux = var_info["data"][idx_first:, :, :, :].mean(axis=0) + elif op == "max": + data_aux = var_info["data"][idx_first:, :, :, :].max(axis=0) + elif op == "min": + data_aux = var_info["data"][idx_first:, :, :, :].min(axis=0) + else: + raise NotImplementedError(f"Statistic operation '{op}' is not implemented.") + aux_time_bounds.append([self.time[idx_first], self.time[-1]]) + + data_aux = data_aux.reshape((1, data_aux.shape[0], data_aux.shape[1], data_aux.shape[2])) + aux_time.append(self.time[idx_first].replace(hour=0, minute=0, second=0)) + # Append over time dimension + if stat_data is None: + stat_data = data_aux.copy() + else: + stat_data = vstack([stat_data, data_aux]) + self.variables[var_name]["data"] = stat_data + self.variables[var_name]["cell_methods"] = "time: {0} (interval: {1}hr)".format(op, time_interval) + self.time = aux_time + self.set_full_times(self.time) + + self.set_time_bnds(aux_time_bounds) + + elif type_op == "alltsteps": + for var_name, var_info in self.variables.items(): + if var_info["data"] is None: + self.load(var_name) + if op == "mean": + aux_data = var_info["data"].mean(axis=0) + elif op == "max": + aux_data = var_info["data"].max(axis=0) + elif op == "min": + aux_data = var_info["data"].min(axis=0) + else: + raise NotImplementedError(f"Statistic operation '{op}' is not implemented.") + if len(aux_data.shape) == 3: + aux_data = aux_data.reshape((1, aux_data.shape[0], aux_data.shape[1], aux_data.shape[2])) + self.variables[var_name]["data"] = aux_data + self.variables[var_name]["cell_methods"] = "time: {0} (interval: {1}hr)".format(op, time_interval) + + aux_time = self.time[0].replace(hour=0, minute=0, second=0, microsecond=0) + aux_time_bounds = [[self.time[0], self.time[-1]]] + self.time = [aux_time] + self.set_full_times(self.time) + + self.set_time_bnds(aux_time_bounds) + + elif type_op == "withoutt0": + for var_name, var_info in self.variables.items(): + if var_info["data"] is None: + self.load(var_name) + if op == "mean": + aux_data = var_info["data"][1:, :].mean(axis=0) + elif op == "max": + aux_data = var_info["data"][1:, :].max(axis=0) + elif op == "min": + aux_data = var_info["data"][1:, :].min(axis=0) + else: + raise NotImplementedError(f"Statistic operation '{op}' is not implemented.") + if len(aux_data.shape) == 3: + aux_data = aux_data.reshape((1, aux_data.shape[0], aux_data.shape[1], aux_data.shape[2])) + self.variables[var_name]["data"] = aux_data + self.variables[var_name]["cell_methods"] = "time: {0} (interval: {1}hr)".format(op, time_interval) + full_time = self.get_full_times() + aux_time = full_time[1].replace(hour=0, minute=0, second=0, microsecond=0) + aux_time_bounds = [[full_time[1], full_time[-1]]] + self.time = [aux_time] + self.set_full_times(self.time) + + self.set_time_bnds(aux_time_bounds) + else: + raise NotImplementedError(f"Statistic operation type '{type_op}' is not implemented.") + self.hours_start = 0 + self.hours_end = 0 + + return None + + @staticmethod + def _get_axis_index_(axis): + + if axis == "T": + value = 0 + elif axis == "Z": + value = 1 + elif axis == "Y": + value = 2 + elif axis == "X": + value = 3 + else: + raise ValueError("Unknown axis: {0}".format(axis)) + + return value + + def sum_axis(self, axis="Z"): + + if self.parallel_method == axis: + raise NotImplementedError( + f"It is not possible to sum the axis with it is parallelized '{self.parallel_method}'") + + for var_name, var_info in self.variables.items(): + if var_info["data"] is not None: + self.variables[var_name]["data"] = self.variables[var_name]["data"].sum( + axis=self._get_axis_index_(axis), keepdims=True) + if axis == "T": + self.variables[var_name]["cell_methods"] = "time: sum (interval: {0}hr)".format( + (self.time[-1] - self.time[0]).total_seconds() // 3600) + + if axis == "T": + self.set_time_bnds([self.time[0], self.time[-1]]) + self.time = [self.time[0]] + self.set_full_times([self.time[0]]) + if axis == "Z": + self.lev["data"] = array([self.lev["data"][0]]) + self.set_full_levels(self.lev) + + return None + + def find_time_id(self, time): + """ + Find index of time in time array. + + Parameters + ---------- + time : datetime + Time element. + + Returns + ------- + int + Index of time element. + """ + + if time in self.time: + return self.time.index(time) + + def rolling_mean(self, var_list=None, hours=8): + """ + Calculate rolling mean for given hours + + Parameters + ---------- + var_list : : List, str, None + List (or single string) of the variables to be loaded. + hours : int, optional + Window hours to calculate rolling mean, by default 8 + + Returns + ------- + Nes + A Nes object + """ + + if self.parallel_method == "T": + raise NotImplementedError("The rolling mean cannot be calculated using the time axis parallel method.") + + aux_nessy = self.copy(copy_vars=False) + aux_nessy.set_communicator(self.comm) + + if isinstance(var_list, str): + var_list = [var_list] + elif var_list is None: + var_list = list(self.variables.keys()) + + for var_name in var_list: + # Load variables if they have not been loaded previously + if self.variables[var_name]["data"] is None: + self.load(var_name) + + # Get original file shape + nessy_shape = self.variables[var_name]["data"].shape + + # Initialise array + aux_nessy.variables[var_name] = {} + aux_nessy.variables[var_name]["data"] = empty(shape=nessy_shape) + aux_nessy.variables[var_name]["dimensions"] = deepcopy(self.variables[var_name]["dimensions"]) + + for curr_time in self.time: + # Get previous time given a set of hours + prev_time = curr_time - timedelta(hours=(hours-1)) + + # Get time indices + curr_time_id = self.find_time_id(curr_time) + prev_time_id = self.find_time_id(prev_time) + + # Get mean if previous time is available + if prev_time_id is not None: + if self.info: + print(f"Calculating mean between {prev_time} and {curr_time}.") + aux_nessy.variables[var_name]["data"][curr_time_id, :, :, :] = self.variables[var_name]["data"][ + prev_time_id:curr_time_id, :, :, :].mean(axis=0, keepdims=True) + # Fill with nan if previous time is not available + else: + if self.info: + msg = f"Mean between {prev_time} and {curr_time} cannot be calculated " + msg += f"because data for {prev_time} is not available." + print(msg) + aux_nessy.variables[var_name]["data"][curr_time_id, :, :, :] = full( + shape=(1, nessy_shape[1], nessy_shape[2], nessy_shape[3]), fill_value=nan) + + return aux_nessy + + # ================================================================================================================== + # Reading + # ================================================================================================================== + + def _get_read_axis_limits(self): + """ + Calculate the 4D reading axis limits depending on if them have to balanced or not. + + Returns + ------- + dict + Dictionary with the 4D limits of the rank data to read. + t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max. + """ + + if self.balanced: + return self._get_read_axis_limits_balanced() + else: + return self._get_read_axis_limits_unbalanced() + + def _get_read_axis_limits_unbalanced(self): + """ + Calculate the 4D reading axis limits. + + Returns + ------- + dict + Dictionary with the 4D limits of the rank data to read. + t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max. + """ + + axis_limits = {"x_min": None, "x_max": None, + "y_min": None, "y_max": None, + "z_min": None, "z_max": None, + "t_min": None, "t_max": None} + + idx = self._get_idx_intervals() + if self.parallel_method == "Y": + y_len = idx["idx_y_max"] - idx["idx_y_min"] + if y_len < self.size: + raise IndexError("More processors (size={0}) selected than Y elements (size={1})".format( + self.size, y_len)) + axis_limits["y_min"] = ((y_len // self.size) * self.rank) + idx["idx_y_min"] + if self.rank + 1 < self.size: + axis_limits["y_max"] = ((y_len // self.size) * (self.rank + 1)) + idx["idx_y_min"] + else: + axis_limits["y_max"] = idx["idx_y_max"] + + # Non parallel filters + axis_limits["x_min"] = idx["idx_x_min"] + axis_limits["x_max"] = idx["idx_x_max"] + + axis_limits["t_min"] = idx["idx_t_min"] + axis_limits["t_max"] = idx["idx_t_max"] + + elif self.parallel_method == "X": + x_len = idx["idx_x_max"] - idx["idx_x_min"] + if x_len < self.size: + raise IndexError("More processors (size={0}) selected than X elements (size={1})".format( + self.size, x_len)) + axis_limits["x_min"] = ((x_len // self.size) * self.rank) + idx["idx_x_min"] + if self.rank + 1 < self.size: + axis_limits["x_max"] = ((x_len // self.size) * (self.rank + 1)) + idx["idx_x_min"] + else: + axis_limits["x_max"] = idx["idx_x_max"] + + # Non parallel filters + axis_limits["y_min"] = idx["idx_y_min"] + axis_limits["y_max"] = idx["idx_y_max"] + + axis_limits["t_min"] = idx["idx_t_min"] + axis_limits["t_max"] = idx["idx_t_max"] + + elif self.parallel_method == "T": + t_len = idx["idx_t_max"] - idx["idx_t_min"] + if t_len < self.size: + raise IndexError("More processors (size={0}) selected than T elements (size={1})".format( + self.size, t_len)) + axis_limits["t_min"] = ((t_len // self.size) * self.rank) + idx["idx_t_min"] + if self.rank + 1 < self.size: + axis_limits["t_max"] = ((t_len // self.size) * (self.rank + 1)) + idx["idx_t_min"] + + # Non parallel filters + axis_limits["y_min"] = idx["idx_y_min"] + axis_limits["y_max"] = idx["idx_y_max"] + + axis_limits["x_min"] = idx["idx_x_min"] + axis_limits["x_max"] = idx["idx_x_max"] + + else: + raise NotImplementedError("Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( + meth=self.parallel_method, accept=["X", "Y", "T"])) + + # Vertical levels selection: + axis_limits["z_min"] = self.first_level + if self.last_level == -1 or self.last_level is None: + self.last_level = None + elif self.last_level + 1 == len(self.get_full_levels()["data"]): + self.last_level = None + else: + self.last_level += 1 + axis_limits["z_max"] = self.last_level + + return axis_limits + + def _get_read_axis_limits_balanced(self): + """ + Calculate the 4D reading balanced axis limits. + + Returns + ------- + dict + Dictionary with the 4D limits of the rank data to read. + t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max. + """ + idx = self._get_idx_intervals() + + fid_dist = {} + if self.parallel_method == "Y": + len_to_split = idx["idx_y_max"] - idx["idx_y_min"] + if len_to_split < self.size: + raise IndexError("More processors (size={0}) selected than Y elements (size={1})".format( + self.size, len_to_split)) + min_axis = "y_min" + max_axis = "y_max" + to_add = idx["idx_y_min"] + + elif self.parallel_method == "X": + len_to_split = idx["idx_x_max"] - idx["idx_x_min"] + if len_to_split < self.size: + raise IndexError("More processors (size={0}) selected than X elements (size={1})".format( + self.size, len_to_split)) + min_axis = "x_min" + max_axis = "x_max" + to_add = idx["idx_x_min"] + elif self.parallel_method == "T": + len_to_split = idx["idx_t_max"] - idx["idx_t_min"] + if len_to_split < self.size: + raise IndexError(f"More processors (size={self.size}) selected than T elements (size={len_to_split})") + min_axis = "t_min" + max_axis = "t_max" + to_add = idx["idx_t_min"] + else: + raise NotImplementedError("Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( + meth=self.parallel_method, accept=["X", "Y", "T"])) + + procs_len = len_to_split // self.size + procs_rows_extended = len_to_split - (procs_len * self.size) + + rows_sum = 0 + for proc in range(self.size): + fid_dist[proc] = {"x_min": 0, "x_max": None, + "y_min": 0, "y_max": None, + "z_min": 0, "z_max": None, + "t_min": 0, "t_max": None} + if proc < procs_rows_extended: + aux_rows = procs_len + 1 + else: + aux_rows = procs_len + + len_to_split -= aux_rows + if len_to_split < 0: + rows = len_to_split + aux_rows + else: + rows = aux_rows + + fid_dist[proc][min_axis] = rows_sum + fid_dist[proc][max_axis] = rows_sum + rows + + if to_add is not None: + fid_dist[proc][min_axis] += to_add + fid_dist[proc][max_axis] += to_add + + # # Last element + # if len_to_split == 0 and to_add == 0: + # fid_dist[proc][max_axis] = None + + rows_sum += rows + + axis_limits = fid_dist[self.rank] + + # Non parallel filters + if self.parallel_method != "T": + axis_limits["t_min"] = idx["idx_t_min"] + axis_limits["t_max"] = idx["idx_t_max"] + if self.parallel_method != "X": + axis_limits["x_min"] = idx["idx_x_min"] + axis_limits["x_max"] = idx["idx_x_max"] + if self.parallel_method != "Y": + axis_limits["y_min"] = idx["idx_y_min"] + axis_limits["y_max"] = idx["idx_y_max"] + + # Vertical levels selection: + axis_limits["z_min"] = self.first_level + if self.last_level == -1 or self.last_level is None: + self.last_level = None + elif self.last_level + 1 == len(self.get_full_levels()["data"]): + self.last_level = None + else: + self.last_level += 1 + axis_limits["z_max"] = self.last_level + + return axis_limits + + def _get_time_id(self, hours, first=True): + """ + Get the index of the corresponding time value. + + Parameters + ---------- + hours : int + Number of hours to avoid. + first : bool + Indicates if you want to avoid from the first hours (True) or from the last (False). + Default: True. + + Returns + ------- + int + Index of the time array. + """ + full_time = self.get_full_times() + + if first: + idx = full_time.index(full_time[0] + timedelta(hours=hours)) + else: + idx = full_time.index(full_time[-1] - timedelta(hours=hours)) + 1 + + return idx + + @staticmethod + def _get_coordinate_id(my_array, value, axis=0): + """ + Get the index of the corresponding coordinate value. + + Parameters + ---------- + my_array : array + An Array with the coordinate data + value : float + Coordinate value to search. + axis : int + Axis where find the value + Default: 0. + + Returns + ------- + int + Index of the coordinate array. + """ + idx = (abs(my_array - value)).argmin(axis=axis).min() + + return idx + + def _open(self): + """ + Open the NetCDF. + """ + + self.dataset = self.__open_netcdf4() + + return None + + def __open_netcdf4(self, mode="r"): + """ + Open the NetCDF with netcdf4-python. + + Parameters + ---------- + mode : str + Inheritance from mode parameter from https://unidata.github.io/netcdf4-python/#Dataset.__init__ + Default: "r" (read-only). + Returns + ------- + netcdf : Dataset + Open dataset. + """ + + if self.size == 1: + netcdf = Dataset(self.__ini_path, format="NETCDF4", mode=mode, parallel=False) + else: + netcdf = Dataset(self.__ini_path, format="NETCDF4", mode=mode, parallel=True, comm=self.comm, + info=MPI.Info()) + self.dataset = netcdf + + return netcdf + + def close(self): + """ + Close the NetCDF with netcdf4-python. + """ + if (hasattr(self, "serial_nc")) and (self.serial_nc is not None): + if self.master: + self.serial_nc.close() + self.serial_nc = None + if (hasattr(self, "dataset")) and (self.dataset is not None): + self.dataset.close() + self.dataset = None + + return None + + @staticmethod + def __get_dates_from_months(time): + """ + Calculates the number of days since the first date + in the "time" list and store in new list: + This is useful when the units are "months since", + which cannot be transformed to dates using "num2date". + + Parameter + --------- + time: List[datetime] + Original time. + + Returns + ------- + time: List + CF compliant time. + """ + + start_date_str = time.units.split("since")[1].lstrip() + start_date = datetime(int(start_date_str[0:4]), int(start_date_str[5:7]), int(start_date_str[8:10])) + + new_time_deltas = [] + + for month_delta in time[:]: + # Transform current_date into number of days since base date + current_date = start_date + relativedelta(months=month_delta) + + # Calculate number of days between base date and the other dates + n_days = int((current_date - start_date).days) + + # Store in list + new_time_deltas.append(n_days) + + return new_time_deltas + + def __parse_time(self, time): + """ + Parses the time to be CF compliant. + + Parameters + ---------- + time: Namespace + Original time. + + Returns + ------- + time : str + CF compliant time. + """ + + units = self.__parse_time_unit(time.units) + + if not hasattr(time, "calendar"): + calendar = "standard" + else: + calendar = time.calendar + + if "months since" in time.units: + units = "days since " + time.units.split("since")[1].lstrip() + time = self.__get_dates_from_months(time) + + time_data = time[:] + + if len(time_data) == 1 and isnan(time_data[0]): + time_data[0] = 0 + + return time_data, units, calendar + + @staticmethod + def __parse_time_unit(t_units): + """ + Parses the time units to be CF compliant. + + Parameters + ---------- + t_units : str + Original time units. + + Returns + ------- + t_units : str + CF compliant time units. + """ + + if "h @" in t_units: + t_units = "hours since {0}-{1}-{2} {3}:{4}:{5} UTC".format( + t_units[4:8], t_units[8:10], t_units[10:12], t_units[13:15], t_units[15:17], t_units[17:-4]) + + return t_units + + @staticmethod + def __get_time_resolution_from_units(units): + """ + Parses the time units to get the time resolution + + Parameters + ---------- + units : str + Time variable units + + Returns + ------- + str + Time variable resolution + """ + if "day" in units or "days" in units: + resolution = "days" + elif "hour" in units or "hours" in units: + resolution = "hours" + elif "minute" in units or "minutes" in units: + resolution = "minutes" + elif "second" in units or "seconds" in units: + resolution = "seconds" + else: + # Default resolution is "hours" + resolution = "hours" + return resolution + + def __get_time(self): + """ + Get the NetCDF file time values. + + Returns + ------- + time : List[datetime] + List of times (datetime) of the NetCDF data. + """ + if self.master: + nc_var = self.dataset.variables["time"] + time_data, units, calendar = self.__parse_time(nc_var) + # Extracting time resolution depending on the units + self._time_resolution = self.__get_time_resolution_from_units(units) + # Checking if it is a climatology dataset + if hasattr(nc_var, "climatology"): + self._climatology = True + self._climatology_var_name = nc_var.climatology + time = num2date(time_data, units, calendar=calendar) + time = [datetime(year=dt.year, month=dt.month, day=dt.day, hour=dt.hour, minute=dt.minute) for dt in time] + else: + time = None + self.free_vars("time") + + return time + + def __get_time_bnds(self, create_nes=False): + """ + Get the NetCDF time bounds values. + + Parameters + ---------- + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + + Returns + ------- + time_bnds : List + A List of time bounds (datetime) of the NetCDF data. + """ + + if not create_nes: + if self.master: + if "time_bnds" in self.dataset.variables.keys() or self._climatology: + time = self.dataset.variables["time"] + if self._climatology: + nc_var = self.dataset.variables[self._climatology_var_name] + else: + nc_var = self.dataset.variables["time_bnds"] + time_bnds = num2date(nc_var[:], self.__parse_time_unit(time.units), + calendar=time.calendar).tolist() + # Iterate over each inner list + for inner_list in time_bnds: + # Create a new list to store datetime objects + new_inner_list = [] + # Iterate over datetime objects within each inner list + for dt in inner_list: + # Access year, month, day, hour, and minute attributes of datetime objects + new_dt = datetime(year=dt.year, month=dt.month, day=dt.day, hour=dt.hour, minute=dt.minute) + # Append the new datetime object to the new inner list + new_inner_list.append(new_dt) + # Replace the old inner list with the new one + time_bnds[time_bnds.index(inner_list)] = new_inner_list + else: + time_bnds = None + else: + time_bnds = None + else: + time_bnds = None + + self.free_vars("time_bnds") + + return time_bnds + + def __get_coordinates_bnds(self, create_nes=False): + """ + Get the NetCDF coordinates bounds values. + + Parameters + ---------- + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + + Returns + ------- + lat_bnds : dict + Latitude bounds of the NetCDF data. + lon_bnds : dict + Longitude bounds of the NetCDF data. + """ + + if not create_nes: + if self.master: + if "lat_bnds" in self.dataset.variables.keys(): + lat_bnds = {"data": self._unmask_array(self.dataset.variables["lat_bnds"][:])} + else: + lat_bnds = None + + if "lon_bnds" in self.dataset.variables.keys(): + lon_bnds = {"data": self._unmask_array(self.dataset.variables["lon_bnds"][:])} + else: + lon_bnds = None + else: + lat_bnds = None + lon_bnds = None + else: + lat_bnds = None + lon_bnds = None + + self.free_vars(["lat_bnds", "lon_bnds"]) + + return lat_bnds, lon_bnds + + def __get_cell_measures(self, create_nes=False): + """ + Get the NetCDF cell measures values. + + Parameters + ---------- + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + + Returns + ------- + dict + Dictionary of cell measures of the NetCDF data. + """ + + c_measures = {} + if self.master: + if not create_nes: + if "cell_area" in self.dataset.variables.keys(): + c_measures["cell_area"] = {} + c_measures["cell_area"]["data"] = self._unmask_array(self.dataset.variables["cell_area"][:]) + c_measures = self.comm.bcast(c_measures, root=0) + + self.free_vars(["cell_area"]) + + return c_measures + + def _get_coordinate_dimension(self, possible_names): + """ + Read the coordinate dimension data. + + This will read the complete data of the coordinate. + + Parameters + ---------- + possible_names: List, str, list + A List (or single string) of the possible names of the coordinate (e.g. ["lat", "latitude"]). + + Returns + ------- + nc_var : dict + Dictionary with the "data" key with the coordinate variable values. and the attributes as other keys. + """ + + if isinstance(possible_names, str): + possible_names = [possible_names] + + try: + dimension_name = set(possible_names).intersection(set(self.variables.keys())).pop() + if self.master: + nc_var = self.variables[dimension_name].copy() + nc_var["data"] = self.dataset.variables[dimension_name][:] + if hasattr(nc_var, "units"): + if nc_var["units"] in ["unitless", "-"]: + nc_var["units"] = "" + else: + nc_var = None + self.free_vars(dimension_name) + except KeyError: + if self.master: + nc_var = {"data": array([0]), + "units": ""} + else: + nc_var = None + + return nc_var + + def _get_coordinate_values(self, coordinate_info, coordinate_axis, bounds=False): + """ + Get the coordinate data of the current portion. + + Parameters + ---------- + coordinate_info : dict, list + Dictionary with the "data" key with the coordinate variable values. and the attributes as other keys. + coordinate_axis : str + Name of the coordinate to extract. Accepted values: ["Z", "Y", "X"]. + bounds : bool + Boolean variable to know if there are coordinate bounds. + Returns + ------- + values : dict + Dictionary with the portion of data corresponding to the rank. + """ + + if coordinate_info is None: + return None + + if not isinstance(coordinate_info, dict): + values = {"data": deepcopy(coordinate_info)} + else: + values = deepcopy(coordinate_info) + + coordinate_len = len(values["data"].shape) + if bounds: + coordinate_len -= 1 + + if coordinate_axis == "Y": + if coordinate_len == 1: + values["data"] = values["data"][self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"]] + elif coordinate_len == 2: + values["data"] = values["data"][self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + else: + raise NotImplementedError("The coordinate has wrong dimensions: {dim}".format( + dim=values["data"].shape)) + elif coordinate_axis == "X": + if coordinate_len == 1: + values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + elif coordinate_len == 2: + values["data"] = values["data"][self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + else: + raise NotImplementedError("The coordinate has wrong dimensions: {dim}".format( + dim=values["data"].shape)) + elif coordinate_axis == "Z": + if coordinate_len == 1: + values["data"] = values["data"][self.read_axis_limits["z_min"]:self.read_axis_limits["z_max"]] + else: + raise NotImplementedError("The coordinate has wrong dimensions: {dim}".format( + dim=values["data"].shape)) + + return values + + def _get_cell_measures_values(self, cell_measures_info): + """ + Get the cell measures data of the current portion. + + Parameters + ---------- + cell_measures_info : dict, list + Dictionary with the "data" key with the cell measures variable values. and the attributes as other keys. + + Returns + ------- + values : dict + Dictionary with the portion of data corresponding to the rank. + """ + + if cell_measures_info is None: + return None + + cell_measures_values = {} + + for cell_measures_var in cell_measures_info.keys(): + + values = deepcopy(cell_measures_info[cell_measures_var]) + coordinate_len = len(values["data"].shape) + + if coordinate_len == 1: + values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + elif coordinate_len == 2: + values["data"] = values["data"][self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + else: + raise NotImplementedError("The coordinate has wrong dimensions: {dim}".format( + dim=values["data"].shape)) + + cell_measures_values[cell_measures_var] = values + + return cell_measures_values + + def _get_lazy_variables(self): + """ + Get all the variables' information. + + Returns + ------- + variables : dict + Dictionary with the variable name as key and another dictionary as value. + De value dictionary will have the "data" key with None as value and all the variable attributes as the + other keys. + e.g. + {"var_name_1": {"data": None, "attr_1": value_1_1, "attr_2": value_1_2, ...}, + "var_name_2": {"data": None, "attr_1": value_2_1, "attr_2": value_2_2, ...}, + ...} + """ + + if self.master: + variables = {} + # Initialise data + for var_name, var_info in self.dataset.variables.items(): + variables[var_name] = {} + variables[var_name]["data"] = None + variables[var_name]["dimensions"] = var_info.dimensions + variables[var_name]["dtype"] = var_info.dtype + if variables[var_name]["dtype"] in [str, object]: + if self.strlen is None: + self.set_strlen() + variables[var_name]["dtype"] = str + + # Avoid some attributes + for attrname in var_info.ncattrs(): + if attrname not in ["missing_value", "_FillValue", "add_offset", "scale_factor"]: + value = getattr(var_info, attrname) + if str(value) in ["unitless", "-"]: + value = "" + variables[var_name][attrname] = value + else: + variables = None + variables = self.comm.bcast(variables, root=0) + + return variables + + def _read_variable(self, var_name): + """ + Read the corresponding variable data according to the current rank. + + Parameters + ---------- + var_name : str + Name of the variable to read. + + Returns + ------- + data: array + Portion of the variable data corresponding to the rank. + """ + + nc_var = self.dataset.variables[var_name] + var_dims = nc_var.dimensions + + # Read data in 4 dimensions + if len(var_dims) < 2: + data = nc_var[:] + elif len(var_dims) == 2: + data = nc_var[self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + data = data.reshape(1, 1, data.shape[-2], data.shape[-1]) + elif len(var_dims) == 3: + if "strlen" in var_dims: + data = nc_var[self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], + :] + data_aux = empty(shape=(data.shape[0], data.shape[1]), dtype=object) + for lat_n in range(data.shape[0]): + for lon_n in range(data.shape[1]): + data_aux[lat_n, lon_n] = "".join( + data[lat_n, lon_n].tobytes().decode("ascii").replace("\x00", "")) + data = data_aux.reshape((1, 1, data_aux.shape[-2], data_aux.shape[-1])) + else: + data = nc_var[self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], + self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + data = data.reshape(data.shape[-3], 1, data.shape[-2], data.shape[-1]) + elif len(var_dims) == 4: + data = nc_var[self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], + self.read_axis_limits["z_min"]:self.read_axis_limits["z_max"], + self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + elif len(var_dims) == 5: + if "strlen" in var_dims: + data = nc_var[self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], + self.read_axis_limits["z_min"]:self.read_axis_limits["z_max"], + self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], + :] + data_aux = empty(shape=(data.shape[0], data.shape[1], data.shape[2], data.shape[3]), dtype=object) + for time_n in range(data.shape[0]): + for lev_n in range(data.shape[1]): + for lat_n in range(data.shape[2]): + for lon_n in range(data.shape[3]): + data_aux[time_n, lev_n, lat_n, lon_n] = "".join( + data[time_n, lev_n, lat_n, lon_n].tobytes().decode("ascii").replace("\x00", "")) + data = data_aux + else: + raise NotImplementedError("Error with {0}. Only can be read netCDF with 4 dimensions or less".format( + var_name)) + else: + raise NotImplementedError("Error with {0}. Only can be read netCDF with 4 dimensions or less".format( + var_name)) + + # Unmask array + data = self._unmask_array(data) + + return data + + def load(self, var_list=None): + """ + Load of the selected variables. + + That function will fill the variable "data" key with the corresponding values. + + Parameters + ---------- + var_list : List, str, None + List (or single string) of the variables to be loaded. + """ + + if (self.__ini_path is None) and (self.dataset is None): + raise RuntimeError("Only data from existing files can be loaded.") + + if self.dataset is None: + self.__open_netcdf4() + close = True + else: + close = False + + if isinstance(var_list, str): + var_list = [var_list] + elif var_list is None: + var_list = list(self.variables.keys()) + + for i, var_name in enumerate(var_list): + if self.info: + print("Rank {0:03d}: Loading {1} var ({2}/{3})".format(self.rank, var_name, i + 1, len(var_list))) + if self.variables[var_name]["data"] is None: + self.variables[var_name]["data"] = self._read_variable(var_name) + # Data type changes when joining characters in read_variable (S1 to S+strlen) + if "strlen" in self.variables[var_name]["dimensions"]: + if self.strlen is None: + self.set_strlen() + self.variables[var_name]["dtype"] = str + self.variables[var_name]["dimensions"] = tuple([x for x in self.variables[var_name]["dimensions"] + if x != "strlen"]) + else: + if self.master: + print("Data for {0} was previously loaded. Skipping variable.".format(var_name)) + if self.info: + print("Rank {0:03d}: Loaded {1} var ({2})".format( + self.rank, var_name, self.variables[var_name]["data"].shape)) + + if close: + self.close() + + return None + + @staticmethod + def _unmask_array(data): + """ + Missing to nan. This operation is done because sometimes the missing value is lost during the calculation. + + Parameters + ---------- + data : array + Masked array to unmask. + + Returns + ------- + array + Unmasked array. + """ + + if isinstance(data, ma.MaskedArray): + try: + data = data.filled(nan) + except TypeError: + msg = "Data missing values cannot be converted to nan." + warn(msg) + sys.stderr.flush() + + return data + + def to_dtype(self, data_type="float32"): + """ Cast variables data into selected data type. + + Parameters + ---------- + data_type : str or Type + Data type, by default "float32" + """ + + for var_name, var_info in self.variables.items(): + if isinstance(var_info["data"], ndarray): + self.variables[var_name]["data"] = self.variables[var_name]["data"].astype(data_type) + self.variables[var_name]["dtype"] = data_type + + return None + + def concatenate(self, aux_nessy): + """ + Concatenate different variables into the same NES object. + + Parameters + ---------- + aux_nessy : Nes, str + Nes object or str with the path to the NetCDF file that contains the variables to add. + + Returns + ------- + list + A List of var names added. + """ + + if isinstance(aux_nessy, str): + aux_nessy = self.new(path=aux_nessy, comm=self.comm, parallel_method=self.parallel_method, + avoid_first_hours=self.hours_start, avoid_last_hours=self.hours_end, + first_level=self.first_level, last_level=self.last_level) + new = True + else: + new = False + for var_name, var_info in aux_nessy.variables.items(): + if var_info["data"] is None: + aux_nessy.read_axis_limits = self.read_axis_limits + aux_nessy.load(var_name) + + new_vars_added = [] + for new_var_name, new_var_data in aux_nessy.variables.items(): + if new_var_name not in self.variables.keys(): + self.variables[new_var_name] = deepcopy(new_var_data) + new_vars_added.append(new_var_name) + + if new: + del aux_nessy + + return new_vars_added + + def __get_global_attributes(self, create_nes=False): + """ + Read the netcdf global attributes. + + Parameters + ---------- + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + + Returns + ------- + gl_attrs : dict + Dictionary with the netCDF global attributes. + """ + + gl_attrs = {} + + if not create_nes: + for attrname in self.dataset.ncattrs(): + gl_attrs[attrname] = getattr(self.dataset, attrname) + + return gl_attrs + + # ================================================================================================================== + # Writing + # ================================================================================================================== + + def _get_write_axis_limits(self): + """ + Calculate the 4D writing axis limits depending on if them have to balanced or not. + + Returns + ------- + dict + Dictionary with the 4D limits of the rank data to write. + t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max. + """ + + if self.balanced: + return self._get_write_axis_limits_balanced() + else: + return self._get_write_axis_limits_unbalanced() + + def _get_write_axis_limits_unbalanced(self): + """ + Calculate the 4D writing axis limits. + + Returns + ------- + dict + Dictionary with the 4D limits of the rank data to write. + t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max. + """ + + axis_limits = {"x_min": None, "x_max": None, + "y_min": None, "y_max": None, + "z_min": None, "z_max": None, + "t_min": None, "t_max": None} + my_shape = self.get_full_shape() + if self.parallel_method == "Y": + y_len = my_shape[0] + axis_limits["y_min"] = (y_len // self.size) * self.rank + if self.rank + 1 < self.size: + axis_limits["y_max"] = (y_len // self.size) * (self.rank + 1) + elif self.parallel_method == "X": + x_len = my_shape[-1] + axis_limits["x_min"] = (x_len // self.size) * self.rank + if self.rank + 1 < self.size: + axis_limits["x_max"] = (x_len // self.size) * (self.rank + 1) + elif self.parallel_method == "T": + t_len = len(self.get_full_times()) + axis_limits["t_min"] = ((t_len // self.size) * self.rank) + if self.rank + 1 < self.size: + axis_limits["t_max"] = (t_len // self.size) * (self.rank + 1) + else: + raise NotImplementedError("Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( + meth=self.parallel_method, accept=["X", "Y", "T"])) + + return axis_limits + + def _get_write_axis_limits_balanced(self): + """ + Calculate the 4D reading balanced axis limits. + + Returns + ------- + dict + Dictionary with the 4D limits of the rank data to read. + t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max. + """ + my_shape = self.get_full_shape() + fid_dist = {} + if self.parallel_method == "Y": + len_to_split = my_shape[0] + min_axis = "y_min" + max_axis = "y_max" + elif self.parallel_method == "X": + len_to_split = my_shape[-1] + min_axis = "x_min" + max_axis = "x_max" + elif self.parallel_method == "T": + len_to_split = len(self.get_full_times()) + min_axis = "t_min" + max_axis = "t_max" + else: + raise NotImplementedError("Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( + meth=self.parallel_method, accept=["X", "Y", "T"])) + + procs_len = len_to_split // self.size + procs_rows_extended = len_to_split - (procs_len * self.size) + + rows_sum = 0 + for proc in range(self.size): + fid_dist[proc] = {"x_min": 0, "x_max": None, + "y_min": 0, "y_max": None, + "z_min": 0, "z_max": None, + "t_min": 0, "t_max": None} + if proc < procs_rows_extended: + aux_rows = procs_len + 1 + else: + aux_rows = procs_len + + len_to_split -= aux_rows + if len_to_split < 0: + rows = len_to_split + aux_rows + else: + rows = aux_rows + + fid_dist[proc][min_axis] = rows_sum + fid_dist[proc][max_axis] = rows_sum + rows + + # Last element + if len_to_split == 0: + fid_dist[proc][max_axis] = None + + rows_sum += rows + + axis_limits = fid_dist[self.rank] + + return axis_limits + + def _create_dimensions(self, netcdf): + """ + Create "time", "time_bnds", "lev", "lon" and "lat" dimensions. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python open dataset. + """ + + # Create time dimension + netcdf.createDimension("time", None) + + # Create time_nv (number of vertices) dimension + full_time_bnds = self.get_full_time_bnds() + if full_time_bnds is not None: + netcdf.createDimension("time_nv", 2) + + # Create lev, lon and lat dimensions + netcdf.createDimension("lev", len(self.lev["data"])) + + # Create string length dimension + if self.strlen is not None: + netcdf.createDimension("strlen", self.strlen) + + return None + + def _create_dimension_variables(self, netcdf): + """ + Create the "time", "time_bnds", "lev", "lat", "lat_bnds", "lon" and "lon_bnds" variables. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python open dataset. + """ + + self._create_dimension_variables_64(netcdf) + + return None + + def _create_dimension_variables_32(self, netcdf): + """ + Create the "time", "time_bnds", "lev", "lat", "lat_bnds", "lon" and "lon_bnds" variables. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python open dataset. + """ + + # TIMES + full_time = self.get_full_times() + full_time_bnds = self.get_full_time_bnds() + time_var = netcdf.createVariable("time", float32, ("time",), zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + time_var.units = "{0} since {1}".format(self._time_resolution, full_time[0].strftime("%Y-%m-%d %H:%M:%S")) + time_var.standard_name = "time" + time_var.calendar = "standard" + time_var.long_name = "time" + if full_time_bnds is not None: + if self._climatology: + time_var.climatology = self._climatology_var_name + else: + time_var.bounds = "time_bnds" + if self.size > 1: + time_var.set_collective(True) + time_var[:] = date2num(full_time[:], time_var.units, time_var.calendar) + + # TIME BOUNDS + if full_time_bnds is not None: + if self._climatology: + time_bnds_var = netcdf.createVariable(self._climatology_var_name, float64, ("time", "time_nv",), + zlib=self.zip_lvl, complevel=self.zip_lvl) + else: + time_bnds_var = netcdf.createVariable("time_bnds", float64, ("time", "time_nv",), + zlib=self.zip_lvl, complevel=self.zip_lvl) + if self.size > 1: + time_bnds_var.set_collective(True) + time_bnds_var[:] = date2num(full_time_bnds, time_var.units, calendar="standard") + + # LEVELS + full_lev = self.get_full_levels() + lev = netcdf.createVariable("lev", float32, ("lev",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + if "units" in full_lev.keys(): + lev.units = full_lev["units"] + else: + lev.units = "" + if "positive" in full_lev.keys(): + lev.positive = full_lev["positive"] + + if self.size > 1: + lev.set_collective(True) + lev[:] = array(full_lev["data"], dtype=float32) + + # LATITUDES + full_lat = self.get_full_latitudes() + full_lat_bnds = self.get_full_latitudes_boundaries() + lat = netcdf.createVariable("lat", float32, self._lat_dim, + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + lat.units = "degrees_north" + lat.axis = "Y" + lat.long_name = "latitude coordinate" + lat.standard_name = "latitude" + if full_lat_bnds is not None: + lat.bounds = "lat_bnds" + if self.size > 1: + lat.set_collective(True) + lat[:] = array(full_lat["data"], dtype=float32) + + # LATITUDES BOUNDS + if full_lat_bnds is not None: + lat_bnds_var = netcdf.createVariable("lat_bnds", float32, + self._lat_dim + ("spatial_nv",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + if self.size > 1: + lat_bnds_var.set_collective(True) + lat_bnds_var[:] = array(full_lat_bnds["data"], dtype=float32) + + # LONGITUDES + full_lon = self.get_full_longitudes() + full_lon_bnds = self.get_full_longitudes_boundaries() + lon = netcdf.createVariable("lon", float32, self._lon_dim, + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + lon.units = "degrees_east" + lon.axis = "X" + lon.long_name = "longitude coordinate" + lon.standard_name = "longitude" + if full_lon_bnds is not None: + lon.bounds = "lon_bnds" + if self.size > 1: + lon.set_collective(True) + lon[:] = array(full_lon["data"], dtype=float32) + + # LONGITUDES BOUNDS + if full_lon_bnds is not None: + lon_bnds_var = netcdf.createVariable("lon_bnds", float32, + self._lon_dim + ("spatial_nv",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + if self.size > 1: + lon_bnds_var.set_collective(True) + lon_bnds_var[:] = array(full_lon_bnds["data"], dtype=float32) + + return None + + def _create_dimension_variables_64(self, netcdf): + """ + Create the "time", "time_bnds", "lev", "lat", "lat_bnds", "lon" and "lon_bnds" variables. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python open dataset. + """ + + # TIMES + full_time = self.get_full_times() + full_time_bnds = self.get_full_time_bnds() + time_var = netcdf.createVariable("time", float64, ("time",), zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + time_var.units = "{0} since {1}".format(self._time_resolution, full_time[0].strftime("%Y-%m-%d %H:%M:%S")) + time_var.standard_name = "time" + time_var.calendar = "standard" + time_var.long_name = "time" + if full_time_bnds is not None: + if self._climatology: + time_var.climatology = self._climatology_var_name + else: + time_var.bounds = "time_bnds" + if self.size > 1: + time_var.set_collective(True) + time_var[:] = date2num(full_time[:], time_var.units, time_var.calendar) + + # TIME BOUNDS + if full_time_bnds is not None: + if self._climatology: + time_bnds_var = netcdf.createVariable(self._climatology_var_name, float64, ("time", "time_nv",), + zlib=self.zip_lvl, complevel=self.zip_lvl) + else: + time_bnds_var = netcdf.createVariable("time_bnds", float64, ("time", "time_nv",), + zlib=self.zip_lvl, complevel=self.zip_lvl) + if self.size > 1: + time_bnds_var.set_collective(True) + time_bnds_var[:] = date2num(full_time_bnds, time_var.units, calendar="standard") + + # LEVELS + full_lev = self.get_full_levels() + lev = netcdf.createVariable("lev", full_lev["data"].dtype, ("lev",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + if "units" in full_lev.keys(): + lev.units = full_lev["units"] + else: + lev.units = "" + if "positive" in full_lev.keys(): + lev.positive = full_lev["positive"] + + if self.size > 1: + lev.set_collective(True) + lev[:] = full_lev["data"] + + # LATITUDES + full_lat = self.get_full_latitudes() + full_lat_bnds = self.get_full_latitudes_boundaries() + lat = netcdf.createVariable("lat", full_lat["data"].dtype, self._lat_dim, + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + lat.units = "degrees_north" + lat.axis = "Y" + lat.long_name = "latitude coordinate" + lat.standard_name = "latitude" + if full_lat_bnds is not None: + lat.bounds = "lat_bnds" + if self.size > 1: + lat.set_collective(True) + lat[:] = full_lat["data"] + + # LATITUDES BOUNDS + if full_lat_bnds is not None: + lat_bnds_var = netcdf.createVariable("lat_bnds", full_lat_bnds["data"].dtype, + self._lat_dim + ("spatial_nv",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + if self.size > 1: + lat_bnds_var.set_collective(True) + lat_bnds_var[:] = full_lat_bnds["data"] + + # LONGITUDES + full_lon = self.get_full_longitudes() + full_lon_bnds = self.get_full_longitudes_boundaries() + lon = netcdf.createVariable("lon", full_lon["data"].dtype, self._lon_dim, + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + lon.units = "degrees_east" + lon.axis = "X" + lon.long_name = "longitude coordinate" + lon.standard_name = "longitude" + if full_lon_bnds is not None: + lon.bounds = "lon_bnds" + if self.size > 1: + lon.set_collective(True) + lon[:] = full_lon["data"] + + # LONGITUDES BOUNDS + if full_lon_bnds is not None: + lon_bnds_var = netcdf.createVariable("lon_bnds", full_lon_bnds["data"].dtype, + self._lon_dim + ("spatial_nv",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + if self.size > 1: + lon_bnds_var.set_collective(True) + lon_bnds_var[:] = full_lon_bnds["data"] + + return None + + def _create_cell_measures(self, netcdf): + + # CELL AREA + if "cell_area" in self.cell_measures.keys(): + cell_area = netcdf.createVariable("cell_area", self.cell_measures["cell_area"]["data"].dtype, self._var_dim, + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + if self.size > 1: + cell_area.set_collective(True) + cell_area[self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] = \ + self.cell_measures["cell_area"]["data"] + + cell_area.long_name = "area of grid cell" + cell_area.standard_name = "cell_area" + cell_area.units = "m2" + + for var_name in self.variables.keys(): + self.variables[var_name]["cell_measures"] = "area: cell_area" + + if self.info: + print("Rank {0:03d}: Cell measures done".format(self.rank)) + return None + + def _str2char(self, data): + + if self.strlen is None: + msg = "String data could not be converted into chars while writing." + msg += " Please, set the maximum string length (set_strlen) before writing." + raise RuntimeError(msg) + + # Get final shape by adding strlen at the end + data_new_shape = data.shape + (self.strlen, ) + + # nD (2D, 3D, 4D) data as 1D string array + data = data.flatten() + + # Split strings into chars (S1) + data_aux = stringtochar(array([v.encode("ascii", "ignore") for v in data]).astype("S" + str(self.strlen))) + data_aux = data_aux.reshape(data_new_shape) + + return data_aux + + def _create_variables(self, netcdf, chunking=False): + """ + Create the netCDF file variables. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python open dataset. + chunking : bool + Indicates if you want to chunk the output netCDF. + """ + + for i, (var_name, var_dict) in enumerate(self.variables.items()): + if isinstance(var_dict["data"], int) and var_dict["data"] == 0: + var_dims = ("time", "lev",) + self._var_dim + var_dtype = float32 + else: + # Get dimensions + if (var_dict["data"] is None) or (len(var_dict["data"].shape) == 4): + var_dims = ("time", "lev",) + self._var_dim + else: + var_dims = self._var_dim + + # Get data type + if "dtype" in var_dict.keys(): + var_dtype = var_dict["dtype"] + if (var_dict["data"] is not None) and (var_dtype != var_dict["data"].dtype): + msg = "WARNING!!! " + msg += "Different data types for variable {0}. ".format(var_name) + msg += "Input dtype={0}. Data dtype={1}.".format(var_dtype, var_dict["data"].dtype) + warn(msg) + sys.stderr.flush() + try: + var_dict["data"] = var_dict["data"].astype(var_dtype) + except Exception as e: # TODO: Detect exception + print(e) + raise TypeError("It was not possible to cast the data to the input dtype.") + else: + var_dtype = var_dict["data"].dtype + if var_dtype is object: + raise TypeError("Data dtype is object. Define dtype explicitly as dictionary key 'dtype'") + + if var_dict["data"] is not None: + + # Ensure data is of type numpy array (to create NES) + if not isinstance(var_dict["data"], (ndarray, generic)): + try: + var_dict["data"] = array(var_dict["data"]) + except AttributeError: + raise AttributeError("Data for variable {0} must be a numpy array.".format(var_name)) + + # Convert list of strings to chars for parallelization + if issubdtype(var_dtype, character): + var_dict["data_aux"] = self._str2char(var_dict["data"]) + var_dims += ("strlen",) + var_dtype = "S1" + + if self.info: + print("Rank {0:03d}: Writing {1} var ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + + if not chunking: + var = netcdf.createVariable(var_name, var_dtype, var_dims, + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + else: + if self.balanced: + raise NotImplementedError("A balanced data cannot be chunked.") + if self.master: + chunk_size = var_dict["data"].shape + else: + chunk_size = None + chunk_size = self.comm.bcast(chunk_size, root=0) + var = netcdf.createVariable(var_name, var_dtype, var_dims, + zlib=self.zip_lvl > 0, complevel=self.zip_lvl, + chunksizes=chunk_size) + if self.info: + print("Rank {0:03d}: Var {1} created ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + if self.size > 1: + var.set_collective(True) + if self.info: + print("Rank {0:03d}: Var {1} collective ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + + for att_name, att_value in var_dict.items(): + if att_name == "data": + if att_value is not None: + if self.info: + print("Rank {0:03d}: Filling {1}".format(self.rank, var_name)) + if "data_aux" in var_dict.keys(): + att_value = var_dict["data_aux"] + if isinstance(att_value, int) and att_value == 0: + var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = 0 + + elif len(att_value.shape) == 5: + if "strlen" in var_dims: + var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + :] = att_value + else: + raise NotImplementedError("It is not possible to write 5D variables.") + + elif len(att_value.shape) == 4: + var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value + + elif len(att_value.shape) == 3: + if "strlen" in var_dims: + var[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + :] = att_value + else: + raise NotImplementedError("It is not possible to write 3D variables.") + + if self.info: + print("Rank {0:03d}: Var {1} data ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + + elif att_name not in ["chunk_size", "var_dims", "dimensions", "dtype", "data_aux"]: + var.setncattr(att_name, att_value) + + if "data_aux" in var_dict.keys(): + del var_dict["data_aux"] + + self._set_var_crs(var) + if self.info: + print("Rank {0:03d}: Var {1} completed ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + + return None + + def append_time_step_data(self, i_time, out_format="DEFAULT"): + """ + Fill the netCDF data for the indicated index time. + + Parameters + ---------- + i_time : int + index of the time step to write + out_format : str + Indicates the output format type to change the units (if needed) + """ + if self.serial_nc is not None: + try: + data = self._gather_data(self.variables) + except KeyError: + # Key Error means string data + data = self.__gather_data_py_object(self.variables) + if self.master: + self.serial_nc.variables = data + self.serial_nc.append_time_step_data(i_time, out_format=out_format) + self.comm.Barrier() + else: + if out_format == "MONARCH": + self.variables = to_monarch_units(self) + elif out_format == "CMAQ": + self.variables = to_cmaq_units(self) + elif out_format == "WRF_CHEM": + self.variables = to_wrf_chem_units(self) + for i, (var_name, var_dict) in enumerate(self.variables.items()): + for att_name, att_value in var_dict.items(): + if att_name == "data": + + if att_value is not None: + if self.info: + print("Rank {0:03d}: Filling {1}".format(self.rank, var_name)) + var = self.dataset.variables[var_name] + if isinstance(att_value, int) and att_value == 0: + var[i_time, + self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = 0 + elif len(att_value.shape) == 4: + var[i_time, + self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value + + elif len(att_value.shape) == 3: + raise NotImplementedError("It is not possible to write 3D variables.") + else: + raise NotImplementedError("SHAPE APPEND ERROR: {0}".format(att_value.shape)) + if self.info: + print("Rank {0:03d}: Var {1} data ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + else: + raise ValueError("Cannot append None Data for {0}".format(var_name)) + else: + # Metadata already writen + pass + + return None + + def _create_centre_coordinates(self, **kwargs): + """ + Calculate centre latitudes and longitudes from grid details. + + Must be implemented on inner classes + + Returns + ---------- + centre_lat : dict + Dictionary with data of centre latitudes in 1D + centre_lon : dict + Dictionary with data of centre longitudes in 1D + """ + + return None + + def _create_metadata(self, netcdf): + """ + Must be implemented on inner class. + """ + + return None + + @staticmethod + def _set_var_crs(var): + """ + Must be implemented on inner class. + + Parameters + ---------- + var : Variable + netCDF4-python variable object. + """ + + return None + + def __to_netcdf_py(self, path, chunking=False, keep_open=False): + """ + Create the NetCDF using netcdf4-python methods. + + Parameters + ---------- + path : str + Path to the output netCDF file. + chunking: bool + Indicates if you want to chunk the output netCDF. + keep_open : bool + Indicates if you want to keep open the NetCDH to fill the data by time-step + """ + + # Open NetCDF + if self.info: + print("Rank {0:03d}: Creating {1}".format(self.rank, path)) + if self.size > 1: + netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=True, comm=self.comm, info=MPI.Info()) + else: + netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=False) + if self.info: + print("Rank {0:03d}: NetCDF ready to write".format(self.rank)) + + # Create dimensions + self._create_dimensions(netcdf) + + # Create dimension variables + self._create_dimension_variables(netcdf) + if self.info: + print("Rank {0:03d}: Dimensions done".format(self.rank)) + + # Create cell measures + self._create_cell_measures(netcdf) + + # Create variables + self._create_variables(netcdf, chunking=chunking) + + # Create metadata + self._create_metadata(netcdf) + + # Close NetCDF + if self.global_attrs is not None: + for att_name, att_value in self.global_attrs.items(): + netcdf.setncattr(att_name, att_value) + netcdf.setncattr("Conventions", "CF-1.7") + + if keep_open: + self.dataset = netcdf + else: + netcdf.close() + + return None + + def __to_netcdf_cams_ra(self, path): + return to_netcdf_cams_ra(self, path) + + def to_netcdf(self, path, compression_level=0, serial=False, info=False, chunking=False, nc_type="NES", + keep_open=False): + """ + Write the netCDF output file. + + Parameters + ---------- + path : str + Path to the output netCDF file. + compression_level : int + Level of compression (0 to 9) Default: 0 (no compression). + serial : bool + Indicates if you want to write in serial or not. Default: False. + info : bool + Indicates if you want to print the information of each writing step by stdout Default: False. + chunking : bool + Indicates if you want a chunked netCDF output. Only available with non-serial writes. Default: False. + nc_type : str + Type to NetCDf to write. "CAMS_RA" or "NES" + keep_open : bool + Indicates if you want to keep open the NetCDH to fill the data by time-step + """ + nc_type = nc_type + old_info = self.info + self.info = info + self.serial_nc = None + self.zip_lvl = compression_level + + # if serial: + if serial and self.size > 1: + try: + data = self._gather_data(self.variables) + except KeyError: + data = self.__gather_data_py_object(self.variables) + try: + c_measures = self._gather_data(self.cell_measures) + except KeyError: + c_measures = self.__gather_data_py_object(self.cell_measures) + if self.master: + new_nc = self.copy(copy_vars=False) + new_nc.set_communicator(MPI.COMM_SELF) + new_nc.variables = data + new_nc.cell_measures = c_measures + if nc_type in ["NES", "DEFAULT"]: + new_nc.__to_netcdf_py(path, keep_open=keep_open) + elif nc_type == "CAMS_RA": + new_nc.__to_netcdf_cams_ra(path) + elif nc_type == "MONARCH": + to_netcdf_monarch(new_nc, path, chunking=chunking, keep_open=keep_open) + elif nc_type == "CMAQ": + to_netcdf_cmaq(new_nc, path, keep_open=keep_open) + elif nc_type == "WRF_CHEM": + to_netcdf_wrf_chem(new_nc, path, keep_open=keep_open) + else: + msg = f"Unknown NetCDF type '{nc_type}'. " + msg += "Use CAMS_RA, MONARCH or NES (or DEFAULT)" + raise ValueError(msg) + self.serial_nc = new_nc + else: + self.serial_nc = True + else: + if nc_type in ["NES", "DEFAULT"]: + self.__to_netcdf_py(path, chunking=chunking, keep_open=keep_open) + elif nc_type == "CAMS_RA": + self.__to_netcdf_cams_ra(path) + elif nc_type == "MONARCH": + to_netcdf_monarch(self, path, chunking=chunking, keep_open=keep_open) + elif nc_type == "CMAQ": + to_netcdf_cmaq(self, path, keep_open=keep_open) + elif nc_type == "WRF_CHEM": + to_netcdf_wrf_chem(self, path, keep_open=keep_open) + else: + msg = f"Unknown NetCDF type '{nc_type}''. " + msg += "Use CAMS_RA, MONARCH or NES (or DEFAULT)" + raise ValueError(msg) + + self.info = old_info + + return None + + def __to_grib2(self, path, grib_keys, grib_template_path, lat_flip=True, info=False): + """ + Private method to write output file with grib2 format. + + Parameters + ---------- + path : str + Path to the output file. + grib_keys : dict + Dictionary with the grib2 keys. + grib_template_path : str + Path to the grib2 file to use as template. + info : bool + Indicates if you want to print extra information during the process. + """ + + from eccodes import codes_grib_new_from_file + from eccodes import codes_keys_iterator_new + from eccodes import codes_keys_iterator_next + from eccodes import codes_keys_iterator_get_name + from eccodes import codes_get_string + from eccodes import codes_keys_iterator_delete + from eccodes import codes_clone + from eccodes import codes_set + from eccodes import codes_set_values + from eccodes import codes_write + from eccodes import codes_release + + fout = open(path, "wb") + + # read template + fin = open(grib_template_path, "rb") + + gid = codes_grib_new_from_file(fin) + if gid is None: + sys.exit(1) + + iterid = codes_keys_iterator_new(gid, "ls") + while codes_keys_iterator_next(iterid): + keyname = codes_keys_iterator_get_name(iterid) + keyval = codes_get_string(gid, keyname) + if info: + print("%s = %s" % (keyname, keyval)) + + codes_keys_iterator_delete(iterid) + for var_name, var_info in self.variables.items(): + for i_time, time in enumerate(self.time): + for i_lev, lev in enumerate(self.lev["data"]): + clone_id = codes_clone(gid) + + # Adding grib2 keys to file + for key, value in grib_keys.items(): + if value not in ["", "None", None, nan]: + try: + codes_set(clone_id, key, value) + except Exception as e: + print(f"Something went wrong while writing the Grib key '{key}': {value}") + raise e + + # Time dependent keys + if "dataTime" in grib_keys.keys() and grib_keys["dataTime"] in ["", "None", None, nan]: + codes_set(clone_id, "dataTime", int(i_time * 100)) + if "stepRange" in grib_keys.keys() and grib_keys["stepRange"] in ["", "None", None, nan]: + n_secs = (time - self.get_full_times()[0]).total_seconds() + codes_set(clone_id, "stepRange", int(n_secs // 3600)) + if "forecastTime" in grib_keys.keys() and grib_keys["forecastTime"] in ["", "None", None, nan]: + n_secs = (time - self.get_full_times()[0]).total_seconds() + codes_set(clone_id, "forecastTime", int(n_secs)) + + # Level dependent keys + if "typeOfFirstFixedSurface" in grib_keys.keys() and \ + grib_keys["typeOfFirstFixedSurface"] in ["", "None", None, nan]: + if float(lev) == 0: + codes_set(clone_id, "typeOfFirstFixedSurface", 1) + # grib_keys["typeOfFirstFixedSurface"] = 1 + else: + codes_set(clone_id, "typeOfFirstFixedSurface", 103) + # grib_keys["typeOfFirstFixedSurface"] = 103 + if "level" in grib_keys.keys() and grib_keys["level"] in ["", "None", None, nan]: + codes_set(clone_id, "level", float(lev)) + + newval = var_info["data"][i_time, i_lev, :, :] + if lat_flip: + newval = flipud(newval) + + # TODO Check default NaN Value + newval[isnan(newval)] = 0. + + codes_set_values(clone_id, array(newval.ravel(), dtype="float64")) + codes_write(clone_id, fout) + del newval + codes_release(gid) + fout.close() + fin.close() + + return None + + def to_grib2(self, path, grib_keys, grib_template_path, lat_flip=True, info=False): + """ + Write output file with grib2 format. + + Parameters + ---------- + path : str + Path to the output file. + grib_keys : dict + Dictionary with the grib2 keys. + grib_template_path : str + Path to the grib2 file to use as template. + lat_flip : bool + Indicates if the latitude values (and data) has to be flipped + info : bool + Indicates if you want to print extra information during the process. + """ + + # if serial: + if self.parallel_method in ["X", "Y"] and self.size > 1: + try: + data = self._gather_data(self.variables) + except KeyError: + data = self.__gather_data_py_object(self.variables) + try: + c_measures = self._gather_data(self.cell_measures) + except KeyError: + c_measures = self.__gather_data_py_object(self.cell_measures) + if self.master: + new_nc = self.copy(copy_vars=False) + new_nc.set_communicator(MPI.COMM_SELF) + new_nc.variables = data + new_nc.cell_measures = c_measures + new_nc.__to_grib2(path, grib_keys, grib_template_path, lat_flip=lat_flip, info=info) + else: + self.__to_grib2(path, grib_keys, grib_template_path, lat_flip=lat_flip, info=info) + + return None + + def create_shapefile(self): + """ + Create spatial GeoDataFrame (shapefile). + + Returns + ------- + shapefile : GeoPandasDataFrame + Shapefile dataframe. + """ + + if self.shapefile is None: + + if self.lat_bnds is None or self.lon_bnds is None: + self.create_spatial_bounds() + + # Reshape arrays to create geometry + aux_shape = (self.lat_bnds["data"].shape[0], self.lon_bnds["data"].shape[0], 4) + lon_bnds_aux = empty(aux_shape) + lon_bnds_aux[:, :, 0] = self.lon_bnds["data"][newaxis, :, 0] + lon_bnds_aux[:, :, 1] = self.lon_bnds["data"][newaxis, :, 1] + lon_bnds_aux[:, :, 2] = self.lon_bnds["data"][newaxis, :, 1] + lon_bnds_aux[:, :, 3] = self.lon_bnds["data"][newaxis, :, 0] + + lon_bnds = lon_bnds_aux + del lon_bnds_aux + + lat_bnds_aux = empty(aux_shape) + lat_bnds_aux[:, :, 0] = self.lat_bnds["data"][:, newaxis, 0] + lat_bnds_aux[:, :, 1] = self.lat_bnds["data"][:, newaxis, 0] + lat_bnds_aux[:, :, 2] = self.lat_bnds["data"][:, newaxis, 1] + lat_bnds_aux[:, :, 3] = self.lat_bnds["data"][:, newaxis, 1] + + lat_bnds = lat_bnds_aux + del lat_bnds_aux + + aux_b_lats = lat_bnds.reshape((lat_bnds.shape[0] * lat_bnds.shape[1], lat_bnds.shape[2])) + aux_b_lons = lon_bnds.reshape((lon_bnds.shape[0] * lon_bnds.shape[1], lon_bnds.shape[2])) + + # Create dataframe cointaining all polygons + geometry = [] + for i in range(aux_b_lons.shape[0]): + geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), + (aux_b_lons[i, 1], aux_b_lats[i, 1]), + (aux_b_lons[i, 2], aux_b_lats[i, 2]), + (aux_b_lons[i, 3], aux_b_lats[i, 3]), + (aux_b_lons[i, 0], aux_b_lats[i, 0])])) + + fids = self.get_fids() + gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=geometry, crs="EPSG:4326") + self.shapefile = gdf + + else: + gdf = self.shapefile + + return gdf + + def write_shapefile(self, path): + """ + Save spatial GeoDataFrame (shapefile). + + Parameters + ---------- + path : str + Path to the output file. + """ + + if self.shapefile is None: + raise ValueError("Shapefile was not created.") + + if self.size == 1: + # In serial, avoid gather + self.shapefile.to_file(path) + else: + # In parallel + data = self.comm.gather(self.shapefile, root=0) + if self.master: + data = concat(data) + data.to_file(path) + + return None + + def to_shapefile(self, path, time=None, lev=None, var_list=None, info=True): + """ + Create shapefile from NES data. + + 1. Create grid shapefile. + 2. Add variables to shapefile (as independent function). + 3. Write shapefile. + + Parameters + ---------- + path : str + Path to the output file. + time : datetime + Time stamp to select. + lev : int + Vertical level to select. + var_list : List, str, None + List (or single string) of the variables to be loaded and saved in the shapefile. + info: bool + Flag to allow/suppress warnings when the 'time' or 'lev' parameters are None. Default is True. + """ + + # If list is not defined, get all variables + if var_list is None: + var_list = list(self.variables.keys()) + else: + if isinstance(var_list, str): + var_list = [var_list] + + # Add warning for unloaded variables + unloaded_vars = [] + for var_name in var_list: + if self.variables[var_name]["data"] is None: + unloaded_vars.append(var_name) + if len(unloaded_vars) > 0: + raise ValueError("The variables {0} need to be loaded/created before using to_shapefile.".format( + unloaded_vars)) + + # Select first vertical level (if needed) + if lev is None: + if info: + msg = "No vertical level has been specified. The first one will be selected." + warn(msg) + sys.stderr.flush() + idx_lev = 0 + else: + if lev not in self.lev["data"]: + raise ValueError("Level {} is not available. Choose from {}".format(lev, self.lev["data"])) + idx_lev = lev + + # Select first time (if needed) + if time is None: + if info: + msg = "No time has been specified. The first one will be selected." + warn(msg) + sys.stderr.flush() + idx_time = 0 + else: + if time not in self.time: + raise ValueError("Time {} is not available. Choose from {}".format(time, self.time)) + idx_time = self.time.index(time) + + # Create shapefile + self.create_shapefile() + + # Load variables from original file and get data for selected time / level + self.add_variables_to_shapefile(var_list, idx_lev, idx_time) + + # Write shapefile + self.write_shapefile(path) + + return None + + def add_variables_to_shapefile(self, var_list, idx_lev=0, idx_time=0): + """ + Add variables data to shapefile. + + var_list : List or str + Variables to be loaded and saved in the shapefile. + idx_lev : int + Index of vertical level for which the data will be saved in the shapefile. + idx_time : int + Index of time for which the data will be saved in the shapefile. + """ + + for var_name in var_list: + self.shapefile[var_name] = self.variables[var_name]["data"][idx_time, idx_lev, :].ravel() + + return None + + def get_centroids_from_coordinates(self): + """ + Get centroids from geographical coordinates. + + Returns + ------- + centroids_gdf: GeoPandasDataFrame + Centroids dataframe. + """ + + # Get centroids from coordinates + centroids = [] + for lat_ind in range(0, len(self.lat["data"])): + for lon_ind in range(0, len(self.lon["data"])): + centroids.append(Point(self.lon["data"][lon_ind], + self.lat["data"][lat_ind])) + + # Create dataframe containing all points + fids = self.get_fids() + centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=centroids, crs="EPSG:4326") + + return centroids_gdf + + def __gather_data_py_object(self, data_to_gather): + """ + Gather all the variable data into the MPI rank 0 to perform a serial write. + + Returns + ------- + data_list: dict + Variables dictionary with all the data from all the ranks. + """ + + data_list = deepcopy(data_to_gather) + for var_name in data_list.keys(): + try: + # noinspection PyArgumentList + data_aux = self.comm.gather(data_list[var_name]["data"], root=0) + if self.rank == 0: + shp_len = len(data_list[var_name]["data"].shape) + add_dimension = False # to Add a dimension + if self.parallel_method == "Y": + if shp_len == 2: + # if is a 2D concatenate over first axis + axis = 0 + elif shp_len == 3: + # if is a 3D concatenate over second axis + axis = 1 + else: + # if is a 4D concatenate over third axis + axis = 2 + elif self.parallel_method == "X": + if shp_len == 2: + # if is a 2D concatenate over second axis + axis = 1 + elif shp_len == 3: + # if is a 3D concatenate over third axis + axis = 2 + else: + # if is a 4D concatenate over forth axis + axis = 3 + elif self.parallel_method == "T": + if shp_len == 2: + # if is a 2D add dimension + add_dimension = True + axis = None # Not used + elif shp_len == 3: + # if is a 3D concatenate over first axis + axis = 0 + else: + # if is a 4D concatenate over second axis + axis = 0 + else: + raise NotImplementedError( + "Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( + meth=self.parallel_method, accept=["X", "Y", "T"])) + if add_dimension: + data_list[var_name]["data"] = stack(data_aux) + else: + data_list[var_name]["data"] = concatenate(data_aux, axis=axis) + except Exception as e: + msg = f"**ERROR** an error has occurred while gathering the '{var_name}' variable.\n" + print(msg) + sys.stderr.write(msg) + print(e) + sys.stderr.write(str(e)) + sys.stderr.flush() + self.comm.Abort(1) + + return data_list + + def _gather_data(self, data_to_gather): + """ + Gather all the variable data into the MPI rank 0 to perform a serial write. + + Returns + ------- + data_to_gather: dict + Variables to gather. + """ + + data_list = deepcopy(data_to_gather) + for var_name in data_list.keys(): + if self.info and self.master: + print("Gathering {0}".format(var_name)) + if data_list[var_name]["data"] is None: + data_list[var_name]["data"] = None + elif isinstance(data_list[var_name]["data"], int) and data_list[var_name]["data"] == 0: + data_list[var_name]["data"] = 0 + else: + shp_len = len(data_list[var_name]["data"].shape) + # Collect local array sizes using the gather communication pattern + rank_shapes = array(self.comm.gather(data_list[var_name]["data"].shape, root=0)) + sendbuf = data_list[var_name]["data"].flatten() + sendcounts = array(self.comm.gather(len(sendbuf), root=0)) + if self.master: + recvbuf = empty(sum(sendcounts), dtype=type(sendbuf.max())) + else: + recvbuf = None + self.comm.Gatherv(sendbuf=sendbuf, recvbuf=(recvbuf, sendcounts), root=0) + if self.master: + recvbuf = split(recvbuf, cumsum(sendcounts)) + # TODO ask + # I don"t understand why it is giving one more split + if len(recvbuf) > len(sendcounts): + recvbuf = recvbuf[:-1] + for i, shape in enumerate(rank_shapes): + recvbuf[i] = recvbuf[i].reshape(shape) + add_dimension = False # to Add a dimension + if self.parallel_method == "Y": + if shp_len == 2: + # if is a 2D concatenate over first axis + axis = 0 + elif shp_len == 3: + # if is a 3D concatenate over second axis + axis = 1 + else: + # if is a 4D concatenate over third axis + axis = 2 + elif self.parallel_method == "X": + if shp_len == 2: + # if is a 2D concatenate over second axis + axis = 1 + elif shp_len == 3: + # if is a 3D concatenate over third axis + axis = 2 + else: + # if is a 4D concatenate over forth axis + axis = 3 + elif self.parallel_method == "T": + if shp_len == 2: + # if is a 2D add dimension + add_dimension = True + axis = None # Not used + elif shp_len == 3: + # if is a 3D concatenate over first axis + axis = 0 + else: + # if is a 4D concatenate over second axis + axis = 0 + else: + raise NotImplementedError( + "Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( + meth=self.parallel_method, accept=["X", "Y", "T"])) + if add_dimension: + data_list[var_name]["data"] = stack(recvbuf) + else: + data_list[var_name]["data"] = concatenate(recvbuf, axis=axis) + + return data_list + + # ================================================================================================================== + # Extra Methods + # ================================================================================================================== + @staticmethod + def lon_lat_to_cartesian_ecef(lon, lat): + """ + # Convert observational/model geographic longitude/latitude coordinates to cartesian ECEF (Earth Centred, + # Earth Fixed) coordinates, assuming WGS84 datum and ellipsoid, and that all heights = 0. + # ECEF coordinates represent positions (in meters) as X, Y, Z coordinates, approximating the earth surface + # as an ellipsoid of revolution. + # This conversion is for the subsequent calculation of Euclidean distances of the model grid cell centres + # from each observational station. + # Defining the distance between two points on the earth's surface as simply the Euclidean distance + # between the two lat/lon pairs could lead to inaccurate results depending on the distance + # between two points (i.e. 1 deg. of longitude varies with latitude). + + Parameters + ---------- + lon : array + Longitude values. + lat : array + Latitude values. + """ + + lla = Proj(proj="latlong", ellps="WGS84", datum="WGS84") + ecef = Proj(proj="geocent", ellps="WGS84", datum="WGS84") + # x, y, z = pyproj.transform(lla, ecef, lon, lat, zeros(lon.shape), radians=False) + # Deprecated: https://pyproj4.github.io/pyproj/stable/gotchas.html#upgrading-to-pyproj-2-from-pyproj-1 + transformer = Transformer.from_proj(lla, ecef) + x, y, z = transformer.transform(lon, lat, zeros(lon.shape), radians=False) + return column_stack([x, y, z]) + + def add_4d_vertical_info(self, info_to_add): + """ + To add the vertical information from other source. + + Parameters + ---------- + info_to_add : nes.Nes, str + Nes object with the vertical information as variable or str with the path to the NetCDF file that contains + the vertical data. + """ + + return vertical_interpolation.add_4d_vertical_info(self, info_to_add) + + def interpolate_vertical(self, new_levels, new_src_vertical=None, kind="linear", extrapolate=None, info=None, + overwrite=False): + """ + Vertical interpolation function. + + Parameters + ---------- + self : Nes + Source Nes object. + new_levels : List + A List of new vertical levels. + new_src_vertical : nes.Nes, str + Nes object with the vertical information as variable or str with the path to the NetCDF file that contains + the vertical data. + kind : str + Vertical methods type. + extrapolate : bool or tuple or None or number or NaN + If bool: + - If True, both extrapolation options are set to "extrapolate". + - If False, extrapolation options are set to ("bottom", "top"). + If tuple: + - The first element represents the extrapolation option for the lower bound. + - The second element represents the extrapolation option for the upper bound. + - If any element is bool: + - If True, it represents "extrapolate". + - If False: + - If it"s the first element, it represents "bottom". + - If it"s the second element, it represents "top". + - If any element is None, it is replaced with numpy.nan. + - Other numeric values are kept as they are. + - If any element is NaN, it is kept as NaN. + If None: + - Both extrapolation options are set to (NaN, NaN). + If number: + - Both extrapolation options are set to the provided number. + If NaN: + - Both extrapolation options are set to NaN. + info: None, bool + Indicates if you want to print extra information. + overwrite: bool + Indicates if you want to compute the vertical interpolation in the same object or not. + """ + + return vertical_interpolation.interpolate_vertical( + self, new_levels, new_src_vertical=new_src_vertical, kind=kind, extrapolate_options=extrapolate, info=info, + overwrite=overwrite) + + def interpolate_horizontal(self, dst_grid, weight_matrix_path=None, kind="NearestNeighbour", n_neighbours=4, + info=False, to_providentia=False, only_create_wm=False, wm=None, flux=False): + """ + Horizontal methods from the current grid to another one. + + Parameters + ---------- + dst_grid : nes.Nes + Final projection Nes object. + weight_matrix_path : str, None + Path to the weight matrix to read/create. + kind : str + Kind of horizontal methods. choices = ["NearestNeighbour", "Conservative"]. + n_neighbours: int + Used if kind == NearestNeighbour. Number of nearest neighbours to interpolate. Default: 4. + info: bool + Indicates if you want to print extra info during the methods process. + to_providentia : bool + Indicates if we want the interpolated grid in Providentia format. + only_create_wm : bool + Indicates if you want to only create the Weight Matrix. + wm : Nes + Weight matrix Nes File. + flux : bool + Indicates if you want to calculate the weight matrix for flux variables. + """ + + return horizontal_interpolation.interpolate_horizontal( + self, dst_grid, weight_matrix_path=weight_matrix_path, kind=kind, n_neighbours=n_neighbours, info=info, + to_providentia=to_providentia, only_create_wm=only_create_wm, wm=wm, flux=flux) + + def spatial_join(self, ext_shp, method=None, var_list=None, info=False, apply_bbox=True): + """ + Compute overlay intersection of two GeoPandasDataFrames. + + Parameters + ---------- + ext_shp : GeoPandasDataFrame or str + File or path from where the data will be obtained on the intersection. + method : str + Overlay method. Accepted values: ["nearest", "intersection", "centroid"]. + var_list : List or None + Variables that will be included in the resulting shapefile. + info : bool + Indicates if you want to print the process info. + apply_bbox : bool + Indicates if you want to reduce the shapefile to a bbox. + """ + + return spatial_join(self, ext_shp=ext_shp, method=method, var_list=var_list, info=info, + apply_bbox=apply_bbox) + + def calculate_grid_area(self, overwrite=True): + """ + Get coordinate bounds and call function to calculate the area of each cell of a grid. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + overwrite : bool + Indicates if we want to overwrite the grid area. + """ + + if ("cell_area" not in self.cell_measures.keys()) or overwrite: + grid_area = cell_measures.calculate_grid_area(self) + grid_area = grid_area.reshape([self.lat["data"].shape[0], self.lon["data"].shape[-1]]) + self.cell_measures["cell_area"] = {"data": grid_area} + else: + grid_area = self.cell_measures["cell_area"]["data"] + + return grid_area + + @staticmethod + def calculate_geometry_area(geometry_list, earth_radius_minor_axis=6356752.3142, + earth_radius_major_axis=6378137.0): + """ + Get coordinate bounds and call function to calculate the area of each cell of a set of geometries. + + Parameters + ---------- + geometry_list : List + A List with polygon geometries. + earth_radius_minor_axis : float + Radius of the minor axis of the Earth. + earth_radius_major_axis : float + Radius of the major axis of the Earth. + """ + + return cell_measures.calculate_geometry_area(geometry_list, earth_radius_minor_axis=earth_radius_minor_axis, + earth_radius_major_axis=earth_radius_major_axis) + + @staticmethod + def get_earth_radius(ellps): + """ + Get minor and major axis of Earth. + + Parameters + ---------- + ellps : str + Spatial reference system. + """ + + # WGS84 with radius defined in Cartopy source code + earth_radius_dict = {"WGS84": [6356752.3142, 6378137.0]} + + return earth_radius_dict[ellps] + + def create_providentia_exp_centre_coordinates(self): + """ + Calculate centre latitudes and longitudes from original coordinates and store as 2D arrays. + + Returns + ---------- + model_centre_lat : dict + Dictionary with data of centre coordinates for latitude in 2D (latitude, longitude). + model_centre_lon : dict + Dictionary with data of centre coordinates for longitude in 2D (latitude, longitude). + """ + + raise NotImplementedError("create_providentia_exp_centre_coordinates function is not implemented by default") + + # noinspection DuplicatedCode + def create_providentia_exp_grid_edge_coordinates(self): + """ + Calculate grid edge latitudes and longitudes and get model grid outline. + + Returns + ---------- + grid_edge_lat : dict + Dictionary with data of grid edge latitudes. + grid_edge_lon : dict + Dictionary with data of grid edge longitudes. + """ + raise NotImplementedError("create_providentia_exp_grid_edge_coordinates function is not implemented by default") diff --git a/build/lib/nes/nc_projections/latlon_nes.py b/build/lib/nes/nc_projections/latlon_nes.py new file mode 100644 index 0000000..35d68c8 --- /dev/null +++ b/build/lib/nes/nc_projections/latlon_nes.py @@ -0,0 +1,395 @@ +#!/usr/bin/env python + +from numpy import float64, linspace, meshgrid, mean, diff, append, flip, repeat, concatenate, vstack +from pyproj import Proj +from .default_nes import Nes + + +class LatLonNes(Nes): + """ + + Attributes + ---------- + _var_dim : tuple + A Tuple with the name of the Y and X dimensions for the variables. + ("lat", "lon") for a regular latitude-longitude projection. + _lat_dim : tuple + A Tuple with the name of the dimensions of the Latitude values. + ("lat", ) for a regular latitude-longitude projection. + _lon_dim : tuple + A Tuple with the name of the dimensions of the Longitude values. + ("lon", ) for a regular latitude-longitude projection. + """ + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="Y", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the LatLonNes class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "Y". + Accepted values: ["X", "Y", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + """ + + super(LatLonNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, balanced=balanced, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=create_nes, + times=times, **kwargs) + + if create_nes: + # Dimensions screening + self.lat = self._get_coordinate_values(self.get_full_latitudes(), "Y") + self.lon = self._get_coordinate_values(self.get_full_longitudes(), "X") + + # Set axis limits for parallel writing + self.write_axis_limits = self._get_write_axis_limits() + + self._var_dim = ("lat", "lon") + self._lat_dim = ("lat",) + self._lon_dim = ("lon",) + + self.free_vars("crs") + + @staticmethod + def new(comm=None, path=None, info=False, dataset=None, parallel_method="Y", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the Nes class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "Y". + Accepted values: ["X", "Y", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + """ + + new = LatLonNes(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, + avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, + create_nes=create_nes, balanced=balanced, times=times, **kwargs) + + return new + + @staticmethod + def _get_pyproj_projection(): + """ + Get projection data as in Pyproj library. + + Returns + ---------- + projection : pyproj.Proj + Grid projection. + """ + + projection = Proj(proj="latlong", ellps="WGS84",) + + return projection + + # noinspection DuplicatedCode + def _get_projection_data(self, create_nes, **kwargs): + """ + Retrieves projection data based on grid details. + + Parameters + ---------- + create_nes : bool + Flag indicating whether to create new object (True) or use existing (False). + **kwargs : dict + Additional keyword arguments for specifying projection details. + + Returns + ------- + Dict[str, Any] + A dictionary containing projection data with the following keys: + - "grid_mapping_name" : str + Type of grid mapping (e.g., "latitude_longitude"). + - "semi_major_axis" : float + Semi-major axis of the Earth's ellipsoid. + - "inverse_flattening" : int + Inverse flattening parameter. + - "inc_lat" : float + Increment in latitude. + - "inc_lon" : float + Increment in longitude. + - "lat_orig" : float + Origin latitude of the grid. + - "lon_orig" : float + Origin longitude of the grid. + - "n_lat" : int + Number of grid points along latitude. + - "n_lon" : int + Number of grid points along longitude. + + Notes + ----- + Depending on the `create_nes` flag and input `kwargs`, the method constructs + or retrieves projection data. If `create_nes` is True, the method initializes + projection details based on provided arguments such as increments (`inc_lat`, `inc_lon`), + and if additional keyword arguments (`lat_orig`, `lon_orig`, `n_lat`, `n_lon`) are not provided, + defaults for the global domain are used. If `create_nes` is False, the method checks for + an existing "crs" variable in `self.variables` and retrieves its data, freeing the "crs" variable + afterward to optimize memory usage. + + """ + if create_nes: + projection_data = {"grid_mapping_name": "latitude_longitude", + "semi_major_axis": self.earth_radius[1], + "inverse_flattening": 0, + "inc_lat": kwargs["inc_lat"], + "inc_lon": kwargs["inc_lon"], + } + # Global domain + if len(kwargs) == 2: + projection_data["lat_orig"] = -90 + projection_data["lon_orig"] = -180 + projection_data["n_lat"] = int(180 // float64(projection_data["inc_lat"])) + projection_data["n_lon"] = int(360 // float64(projection_data["inc_lon"])) + # Other domains + else: + projection_data["lat_orig"] = kwargs["lat_orig"] + projection_data["lon_orig"] = kwargs["lon_orig"] + projection_data["n_lat"] = kwargs["n_lat"] + projection_data["n_lon"] = kwargs["n_lon"] + else: + if "crs" in self.variables.keys(): + projection_data = self.variables["crs"] + self.free_vars("crs") + else: + projection_data = {"grid_mapping_name": "latitude_longitude", + "semi_major_axis": self.earth_radius[1], + "inverse_flattening": 0, + } + + if "dtype" in projection_data.keys(): + del projection_data["dtype"] + + if "data" in projection_data.keys(): + del projection_data["data"] + + if "dimensions" in projection_data.keys(): + del projection_data["dimensions"] + + return projection_data + + def _create_dimensions(self, netcdf): + """ + Create "spatial_nv" dimensions and the super dimensions "lev", "time", "time_nv", "lon" and "lat". + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + super(LatLonNes, self)._create_dimensions(netcdf) + + netcdf.createDimension("lon", len(self.get_full_longitudes()["data"])) + netcdf.createDimension("lat", len(self.get_full_latitudes()["data"])) + + # Create spatial_nv (number of vertices) dimension + if (self.lat_bnds is not None) and (self.lon_bnds is not None): + netcdf.createDimension("spatial_nv", 2) + + return None + + def _create_centre_coordinates(self, **kwargs): + """ + Calculate centre latitudes and longitudes from grid details. + + Returns + ---------- + centre_lat : dict + Dictionary with data of centre latitudes in 1D + centre_lon : dict + Dictionary with data of centre longitudes in 1D + """ + + # Get grid resolution + inc_lat = float64(self.projection_data["inc_lat"]) + inc_lon = float64(self.projection_data["inc_lon"]) + + # Get coordinates origen + lat_orig = float64(self.projection_data["lat_orig"]) + lon_orig = float64(self.projection_data["lon_orig"]) + + # Get number of coordinates + n_lat = int(self.projection_data["n_lat"]) + n_lon = int(self.projection_data["n_lon"]) + + # Calculate centre latitudes + lat_c_orig = lat_orig + (inc_lat / 2) + centre_lat = linspace(lat_c_orig, lat_c_orig + (inc_lat * (n_lat - 1)), n_lat, dtype=float64) + + # Calculate centre longitudes + lon_c_orig = lon_orig + (inc_lon / 2) + centre_lon = linspace(lon_c_orig, lon_c_orig + (inc_lon * (n_lon - 1)), n_lon, dtype=float64) + + return {"data": centre_lat}, {"data": centre_lon} + + def create_providentia_exp_centre_coordinates(self): + """ + Calculate centre latitudes and longitudes from original coordinates and store as 2D arrays. + + Returns + ---------- + model_centre_lat : dict + Dictionary with data of centre coordinates for latitude in 2D (latitude, longitude). + model_centre_lon : dict + Dictionary with data of centre coordinates for longitude in 2D (latitude, longitude). + """ + + model_centre_lon_data, model_centre_lat_data = meshgrid(self.lon["data"], self.lat["data"]) + + # Calculate centre latitudes + model_centre_lat = {"data": model_centre_lat_data} + + # Calculate centre longitudes + model_centre_lon = {"data": model_centre_lon_data} + + return model_centre_lat, model_centre_lon + + # noinspection DuplicatedCode + def create_providentia_exp_grid_edge_coordinates(self): + """ + Calculate grid edge latitudes and longitudes and get model grid outline. + + Returns + ---------- + grid_edge_lat : dict + Dictionary with data of grid edge latitudes. + grid_edge_lon : dict + Dictionary with data of grid edge longitudes. + """ + + # Get grid resolution + inc_lon = abs(mean(diff(self.lon["data"]))) + inc_lat = abs(mean(diff(self.lat["data"]))) + + # Get bounds + lat_bounds = self._create_single_spatial_bounds(self.lat["data"], inc_lat) + lon_bounds = self._create_single_spatial_bounds(self.lon["data"], inc_lon) + + # Get latitudes for grid edge + left_edge_lat = append(lat_bounds.flatten()[::2], lat_bounds.flatten()[-1]) + right_edge_lat = flip(left_edge_lat, 0) + top_edge_lat = repeat(lat_bounds[-1][-1], len(self.lon["data"]) - 1) + bottom_edge_lat = repeat(lat_bounds[0][0], len(self.lon["data"])) + lat_grid_edge = concatenate((left_edge_lat, top_edge_lat, right_edge_lat, bottom_edge_lat)) + + # Get longitudes for grid edge + left_edge_lon = repeat(lon_bounds[0][0], len(self.lat["data"]) + 1) + top_edge_lon = lon_bounds.flatten()[1:-1:2] + right_edge_lon = repeat(lon_bounds[-1][-1], len(self.lat["data"]) + 1) + bottom_edge_lon = flip(lon_bounds.flatten()[:-1:2], 0) + lon_grid_edge = concatenate((left_edge_lon, top_edge_lon, right_edge_lon, bottom_edge_lon)) + + # Create grid outline by stacking the edges in both coordinates + model_grid_outline = vstack((lon_grid_edge, lat_grid_edge)).T + grid_edge_lat = {"data": model_grid_outline[:, 1]} + grid_edge_lon = {"data": model_grid_outline[:, 0]} + + return grid_edge_lat, grid_edge_lon + + @staticmethod + def _set_var_crs(var): + """ + Set the grid_mapping to "crs". + + Parameters + ---------- + var : Variable + netCDF4-python variable object. + """ + + var.grid_mapping = "crs" + var.coordinates = "lat lon" + + return None + + def _create_metadata(self, netcdf): + """ + Create the "crs" variable for the rotated latitude longitude grid_mapping. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python Dataset. + """ + + if self.projection_data is not None: + mapping = netcdf.createVariable("crs", "i") + mapping.grid_mapping_name = self.projection_data["grid_mapping_name"] + mapping.semi_major_axis = self.projection_data["semi_major_axis"] + mapping.inverse_flattening = self.projection_data["inverse_flattening"] + + return None + + def to_grib2(self, path, grib_keys, grib_template_path, lat_flip=False, info=False): + """ + Write output file with grib2 format. + + Parameters + ---------- + lat_flip : bool + Indicates if the latitudes have to be flipped + path : str + Path to the output file. + grib_keys : dict + Dictionary with the grib2 keys. + grib_template_path : str + Path to the grib2 file to use as template. + info : bool + Indicates if you want to print extra information during the process. + """ + + return super(LatLonNes, self).to_grib2(path, grib_keys, grib_template_path, lat_flip=lat_flip, info=info) diff --git a/build/lib/nes/nc_projections/lcc_nes.py b/build/lib/nes/nc_projections/lcc_nes.py new file mode 100644 index 0000000..f9eda6e --- /dev/null +++ b/build/lib/nes/nc_projections/lcc_nes.py @@ -0,0 +1,630 @@ +#!/usr/bin/env python + +from numpy import float64, linspace, array, mean, diff, append, flip, repeat, concatenate, vstack +from geopandas import GeoDataFrame +from pandas import Index +from pyproj import Proj +from copy import deepcopy +from typing import Dict, Any +from shapely.geometry import Polygon, Point +from .default_nes import Nes + + +class LCCNes(Nes): + """ + + Attributes + ---------- + _full_y : dict + Y coordinates dictionary with the complete "data" key for all the values and the rest of the attributes. + _full_x : dict + X coordinates dictionary with the complete "data" key for all the values and the rest of the attributes. + y : dict + Y coordinates dictionary with the portion of "data" corresponding to the rank values. + x : dict + X coordinates dictionary with the portion of "data" corresponding to the rank values. + _var_dim : tuple + A Tuple with the name of the Y and X dimensions for the variables. + ("y", "x", ) for an LCC projection. + _lat_dim : tuple + A Tuple with the name of the dimensions of the Latitude values. + ("y", "x", ) for an LCC projection. + _lon_dim : tuple + ATuple with the name of the dimensions of the Longitude values. + ("y", "x") for an LCC projection. + """ + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="Y", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the LCCNes class + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "Y". + Accepted values: ["X", "Y", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + """ + self._full_y = None + self._full_x = None + + super(LCCNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, balanced=balanced, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=create_nes, + times=times, **kwargs) + + if create_nes: + # Dimensions screening + self.lat = self._get_coordinate_values(self.get_full_latitudes(), "Y") + self.lon = self._get_coordinate_values(self.get_full_longitudes(), "X") + else: + # Complete dimensions + self._full_y = self._get_coordinate_dimension("y") + self._full_x = self._get_coordinate_dimension("x") + + # Dimensions screening + self.y = self._get_coordinate_values(self.get_full_y(), "Y") + self.x = self._get_coordinate_values(self.get_full_x(), "X") + + # Set axis limits for parallel writing + self.write_axis_limits = self._get_write_axis_limits() + + self._var_dim = ("y", "x") + self._lat_dim = ("y", "x") + self._lon_dim = ("y", "x") + + self.free_vars("crs") + + @staticmethod + def new(comm=None, path=None, info=False, dataset=None, parallel_method="Y", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the Nes class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "Y". + Accepted values: ["X", "Y", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + """ + + new = LCCNes(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, + avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, + create_nes=create_nes, balanced=balanced, times=times, **kwargs) + + return new + + def get_full_y(self) -> Dict[str, Any]: + """ + Retrieve the complete Y information. + + Returns + ------- + Dict[str, Any] + A dictionary containing the complete latitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of latitude values. + attr_name: attr_value, # Latitude attributes. + ... + } + """ + data = self.comm.bcast(self._full_y) + + return data + + def get_full_x(self) -> Dict[str, Any]: + """ + Retrieve the complete X information. + + Returns + ------- + Dict[str, Any] + A dictionary containing the complete longitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of longitude values. + attr_name: attr_value, # Longitude attributes. + ... + } + """ + data = self.comm.bcast(self._full_x) + return data + + def set_full_y(self, data: Dict[str, Any]) -> None: + """ + Set the complete Y information. + + Parameters + ---------- + data : Dict[str, Any] + A dictionary containing the complete latitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of latitude values. + attr_name: attr_value, # Latitude attributes. + ... + } + """ + if self.master: + self._full_y = data + return None + + def set_full_x(self, data: Dict[str, Any]) -> None: + """ + Set the complete rotated longitude information. + + Parameters + ---------- + data : Dict[str, Any] + A dictionary containing the complete longitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of longitude values. + attr_name: attr_value, # Longitude attributes. + ... + } + """ + if self.master: + self._full_x = data + return None + + # noinspection DuplicatedCode + def _filter_coordinates_selection(self): + """ + Use the selection limits to filter y, x, time, lev, lat, lon, lon_bnds and lat_bnds. + """ + + idx = self._get_idx_intervals() + + self.y = self._get_coordinate_values(self.get_full_y(), "Y") + self.x = self._get_coordinate_values(self.get_full_x(), "X") + + self.set_full_y({'data': self.y["data"][idx["idx_y_min"]:idx["idx_y_max"]]}) + self.set_full_x({'data': self.x["data"][idx["idx_x_min"]:idx["idx_x_max"]]}) + + super(LCCNes, self)._filter_coordinates_selection() + + return None + + def _get_pyproj_projection(self): + """ + Get projection data as in Pyproj library. + + Returns + ---------- + projection : pyproj.Proj + Grid projection. + """ + + projection = Proj(proj="lcc", + ellps="WGS84", + R=self.earth_radius[0], + lat_1=float64(self.projection_data["standard_parallel"][0]), + lat_2=float64(self.projection_data["standard_parallel"][1]), + lon_0=float64(self.projection_data["longitude_of_central_meridian"]), + lat_0=float64(self.projection_data["latitude_of_projection_origin"]), + to_meter=1, + x_0=0, + y_0=0, + a=self.earth_radius[1], + k_0=1.0, + ) + + return projection + + def _get_projection_data(self, create_nes, **kwargs): + """ + Retrieves projection data based on grid details. + + Parameters + ---------- + create_nes : bool + Flag indicating whether to create new object (True) or use existing (False). + **kwargs : dict + Additional keyword arguments for specifying projection details. """ + if create_nes: + projection_data = {"grid_mapping_name": "lambert_conformal_conic", + "standard_parallel": [kwargs["lat_1"], kwargs["lat_2"]], + "longitude_of_central_meridian": kwargs["lon_0"], + "latitude_of_projection_origin": kwargs["lat_0"], + "x_0": kwargs["x_0"], "y_0": kwargs["y_0"], + "inc_x": kwargs["inc_x"], "inc_y": kwargs["inc_y"], + "nx": kwargs["nx"], "ny": kwargs["ny"], + } + else: + if "Lambert_Conformal" in self.variables.keys(): + projection_data = self.variables["Lambert_Conformal"] + self.free_vars("Lambert_Conformal") + elif "Lambert_conformal" in self.variables.keys(): + projection_data = self.variables["Lambert_conformal"] + self.free_vars("Lambert_conformal") + else: + # We will never have this condition since the LCC grid will never be correctly detected + # since the function __is_lcc in load_nes only detects LCC grids when there is Lambert_conformal + msg = "There is no variable called Lambert_Conformal, projection has not been defined." + raise RuntimeError(msg) + + if "dtype" in projection_data.keys(): + del projection_data["dtype"] + + if "data" in projection_data.keys(): + del projection_data["data"] + + if "dimensions" in projection_data.keys(): + del projection_data["dimensions"] + + if isinstance(projection_data["standard_parallel"], str): + projection_data["standard_parallel"] = [projection_data["standard_parallel"].split(", ")[0], + projection_data["standard_parallel"].split(", ")[1]] + + return projection_data + + # noinspection DuplicatedCode + def _create_dimensions(self, netcdf): + """ + Create "y", "x" and "spatial_nv" dimensions and the super dimensions "lev", "time", "time_nv", "lon" and "lat" + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + super(LCCNes, self)._create_dimensions(netcdf) + + # Create y and x dimensions + netcdf.createDimension("y", len(self.get_full_y()["data"])) + netcdf.createDimension("x", len(self.get_full_x()["data"])) + + # Create spatial_nv (number of vertices) dimension + if (self.lat_bnds is not None) and (self.lon_bnds is not None): + netcdf.createDimension("spatial_nv", 4) + + return None + + # noinspection DuplicatedCode + def _create_dimension_variables(self, netcdf): + """ + Create the "y" and "x" variables. + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + super(LCCNes, self)._create_dimension_variables(netcdf) + + # LCC Y COORDINATES + full_y = self.get_full_y() + y = netcdf.createVariable("y", full_y["data"].dtype, ("y",)) + y.long_name = "y coordinate of projection" + if "units" in full_y.keys(): + y.units = full_y["units"] + else: + y.units = "m" + y.standard_name = "projection_y_coordinate" + if self.size > 1: + y.set_collective(True) + y[:] = full_y["data"] + + # LCC X COORDINATES + full_x = self.get_full_x() + x = netcdf.createVariable("x", full_x["data"].dtype, ("x",)) + x.long_name = "x coordinate of projection" + if "units" in full_x.keys(): + x.units = full_x["units"] + else: + x.units = "m" + x.standard_name = "projection_x_coordinate" + if self.size > 1: + x.set_collective(True) + x[:] = full_x["data"] + + return None + + # noinspection DuplicatedCode + def _create_centre_coordinates(self, **kwargs): + """ + Calculate centre latitudes and longitudes from grid details. + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + if self.master: + # Get projection details on x + x_0 = float64(self.projection_data["x_0"]) + inc_x = float64(self.projection_data["inc_x"]) + nx = int(self.projection_data["nx"]) + + # Get projection details on y + y_0 = float64(self.projection_data["y_0"]) + inc_y = float64(self.projection_data["inc_y"]) + ny = int(self.projection_data["ny"]) + + # Create a regular grid in metres (1D) + self._full_x = {"data": linspace(x_0 + (inc_x / 2), x_0 + (inc_x / 2) + (inc_x * (nx - 1)), nx, + dtype=float64)} + self._full_y = {"data": linspace(y_0 + (inc_y / 2), y_0 + (inc_y / 2) + (inc_y * (ny - 1)), ny, + dtype=float64)} + + # Create a regular grid in metres (1D to 2D) + x = array([self._full_x["data"]] * len(self._full_y["data"])) + y = array([self._full_y["data"]] * len(self._full_x["data"])).T + + # Calculate centre latitudes and longitudes (UTM to LCC) + centre_lon, centre_lat = self.projection(x, y, inverse=True) + + return {"data": centre_lat}, {"data": centre_lon} + else: + return None, None + + def create_providentia_exp_centre_coordinates(self): + """ + Calculate centre latitudes and longitudes from original coordinates and store as 2D arrays. + + Returns + ---------- + model_centre_lat : dict + Dictionary with data of centre coordinates for latitude in 2D (latitude, longitude). + model_centre_lon : dict + Dictionary with data of centre coordinates for longitude in 2D (latitude, longitude). + """ + + # Get centre latitudes + model_centre_lat = self.lat + + # Get centre longitudes + model_centre_lon = self.lon + + return model_centre_lat, model_centre_lon + + # noinspection DuplicatedCode + def create_providentia_exp_grid_edge_coordinates(self): + """ + Calculate grid edge latitudes and longitudes and get model grid outline. + + Returns + ---------- + grid_edge_lat : dict + Dictionary with data of grid edge latitudes. + grid_edge_lon : dict + Dictionary with data of grid edge longitudes. + """ + # Get grid resolution + inc_x = abs(mean(diff(self.x["data"]))) + inc_y = abs(mean(diff(self.y["data"]))) + + # Get bounds for rotated coordinates + y_bnds = self._create_single_spatial_bounds(self.y["data"], inc_y) + x_bnds = self._create_single_spatial_bounds(self.x["data"], inc_x) + + # Get rotated latitudes for grid edge + left_edge_y = append(y_bnds.flatten()[::2], y_bnds.flatten()[-1]) + right_edge_y = flip(left_edge_y, 0) + top_edge_y = repeat(y_bnds[-1][-1], len(self.x["data"]) - 1) + bottom_edge_y = repeat(y_bnds[0][0], len(self.x["data"])) + y_grid_edge = concatenate((left_edge_y, top_edge_y, right_edge_y, bottom_edge_y)) + + # Get rotated longitudes for grid edge + left_edge_x = repeat(x_bnds[0][0], len(self.y["data"]) + 1) + top_edge_x = x_bnds.flatten()[1:-1:2] + right_edge_x = repeat(x_bnds[-1][-1], len(self.y["data"]) + 1) + bottom_edge_x = flip(x_bnds.flatten()[:-1:2], 0) + x_grid_edge = concatenate((left_edge_x, top_edge_x, right_edge_x, bottom_edge_x)) + + # Get edges for regular coordinates + grid_edge_lon_data, grid_edge_lat_data = self.projection(x_grid_edge, y_grid_edge, inverse=True) + + # Create grid outline by stacking the edges in both coordinates + model_grid_outline = vstack((grid_edge_lon_data, grid_edge_lat_data)).T + grid_edge_lat = {"data": model_grid_outline[:, 1]} + grid_edge_lon = {"data": model_grid_outline[:, 0]} + + return grid_edge_lat, grid_edge_lon + + # noinspection DuplicatedCode + def create_spatial_bounds(self): + """ + Calculate longitude and latitude bounds and set them. + """ + + # Calculate LCC coordinates bounds + full_x = self.get_full_x() + full_y = self.get_full_y() + inc_x = abs(mean(diff(full_x["data"]))) + x_bnds = self._create_single_spatial_bounds(array([full_x["data"]] * len(full_y["data"])), + inc_x, spatial_nv=4) + + inc_y = abs(mean(diff(full_y["data"]))) + y_bnds = self._create_single_spatial_bounds(array([full_y["data"]] * len(full_x["data"])).T, + inc_y, spatial_nv=4, inverse=True) + + # Transform LCC bounds to regular bounds + lon_bnds, lat_bnds = self.projection(x_bnds, y_bnds, inverse=True) + + # Obtain regular coordinates bounds + self.set_full_latitudes_boundaries({"data": deepcopy(lat_bnds)}) + self.lat_bnds = {"data": lat_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + :]} + + self.set_full_longitudes_boundaries({"data": deepcopy(lon_bnds)}) + self.lon_bnds = {"data": lon_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + :]} + + return None + + @staticmethod + def _set_var_crs(var): + """ + Set the grid_mapping to "Lambert_Conformal". + + Parameters + ---------- + var : Variable + netCDF4-python variable object. + """ + + var.grid_mapping = "Lambert_Conformal" + var.coordinates = "lat lon" + + return None + + def _create_metadata(self, netcdf): + """ + Create the "crs" variable for the lambert conformal grid_mapping. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python Dataset + """ + + if self.projection_data is not None: + mapping = netcdf.createVariable("Lambert_Conformal", "i") + mapping.grid_mapping_name = self.projection_data["grid_mapping_name"] + mapping.standard_parallel = self.projection_data["standard_parallel"] + mapping.longitude_of_central_meridian = self.projection_data["longitude_of_central_meridian"] + mapping.latitude_of_projection_origin = self.projection_data["latitude_of_projection_origin"] + + return None + + def to_grib2(self, path, grib_keys, grib_template_path, lat_flip=False, info=False): + """ + Write output file with grib2 format. + + Parameters + ---------- + lat_flip : bool + Indicates if the latitudes need to be flipped Up-Down or Down-Up. Default False. + path : str + Path to the output file. + grib_keys : dict + Dictionary with the grib2 keys. + grib_template_path : str + Path to the grib2 file to use as template. + info : bool + Indicates if you want to print extra information during the process. + """ + + raise NotImplementedError("Grib2 format cannot be written in a Lambert Conformal Conic projection.") + + # noinspection DuplicatedCode + def create_shapefile(self): + """ + Create spatial GeoDataFrame (shapefile). + + Returns + ------- + shapefile : GeoPandasDataFrame + Shapefile dataframe. + """ + + if self.shapefile is None: + + # Get latitude and longitude cell boundaries + if self.lat_bnds is None or self.lon_bnds is None: + self.create_spatial_bounds() + + # Reshape arrays to create geometry + aux_b_lat = self.lat_bnds["data"].reshape((self.lat_bnds["data"].shape[0] * self.lat_bnds["data"].shape[1], + self.lat_bnds["data"].shape[2])) + aux_b_lon = self.lon_bnds["data"].reshape((self.lon_bnds["data"].shape[0] * self.lon_bnds["data"].shape[1], + self.lon_bnds["data"].shape[2])) + + # Get polygons from bounds + geometry = [] + for i in range(aux_b_lon.shape[0]): + geometry.append(Polygon([(aux_b_lon[i, 0], aux_b_lat[i, 0]), + (aux_b_lon[i, 1], aux_b_lat[i, 1]), + (aux_b_lon[i, 2], aux_b_lat[i, 2]), + (aux_b_lon[i, 3], aux_b_lat[i, 3]), + (aux_b_lon[i, 0], aux_b_lat[i, 0])])) + + # Create dataframe containing all polygons + fids = self.get_fids() + gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=geometry, crs="EPSG:4326") + self.shapefile = gdf + + else: + gdf = self.shapefile + + return gdf + + # noinspection DuplicatedCode + def get_centroids_from_coordinates(self): + """ + Get centroids from geographical coordinates. + + Returns + ------- + centroids_gdf: GeoPandasDataFrame + Centroids dataframe. + """ + + # Get centroids from coordinates + centroids = [] + for lat_ind in range(0, self.lon["data"].shape[0]): + for lon_ind in range(0, self.lon["data"].shape[1]): + centroids.append(Point(self.lon["data"][lat_ind, lon_ind], + self.lat["data"][lat_ind, lon_ind])) + + # Create dataframe containing all points + fids = self.get_fids() + centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=centroids, crs="EPSG:4326") + + return centroids_gdf diff --git a/build/lib/nes/nc_projections/mercator_nes.py b/build/lib/nes/nc_projections/mercator_nes.py new file mode 100644 index 0000000..520f9bb --- /dev/null +++ b/build/lib/nes/nc_projections/mercator_nes.py @@ -0,0 +1,610 @@ +#!/usr/bin/env python + +from numpy import float64, linspace, array, mean, diff, append, flip, repeat, concatenate, vstack +from geopandas import GeoDataFrame +from pandas import Index +from pyproj import Proj +from copy import deepcopy +from typing import Dict, Any +from shapely.geometry import Polygon, Point +from nes.nc_projections.default_nes import Nes + + +class MercatorNes(Nes): + """ + + Attributes + ---------- + _full_y : dict + Y coordinates dictionary with the complete "data" key for all the values and the rest of the attributes. + _full_x : dict + X coordinates dictionary with the complete "data" key for all the values and the rest of the attributes. + y : dict + Y coordinates dictionary with the portion of "data" corresponding to the rank values. + x : dict + X coordinates dictionary with the portion of "data" corresponding to the rank values. + _var_dim : tuple + A Tuple with the name of the Y and X dimensions for the variables. + ("y", "x") for a Mercator projection. + _lat_dim : tuple + A Tuple with the name of the dimensions of the Latitude values. + ("y", "x") for a Mercator projection. + _lon_dim : tuple + A Tuple with the name of the dimensions of the Longitude values. + ("y", "x") for a Mercator projection. + """ + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="Y", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the MercatorNes class + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "Y". + Accepted values: ["X", "Y", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + times : list, None + List of times to substitute the current ones while creation. + + """ + self._full_y = None + self._full_x = None + + super(MercatorNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, balanced=balanced, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=create_nes, + times=times, **kwargs) + + if create_nes: + # Dimensions screening + self.lat = self._get_coordinate_values(self.get_full_latitudes(), "Y") + self.lon = self._get_coordinate_values(self.get_full_longitudes(), "X") + else: + # Complete dimensions + self._full_y = self._get_coordinate_dimension("y") + self._full_x = self._get_coordinate_dimension("x") + + # Dimensions screening + self.y = self._get_coordinate_values(self.get_full_y(), "Y") + self.x = self._get_coordinate_values(self.get_full_x(), "X") + + # Set axis limits for parallel writing + self.write_axis_limits = self._get_write_axis_limits() + + self._var_dim = ("y", "x") + self._lat_dim = ("y", "x") + self._lon_dim = ("y", "x") + + self.free_vars("crs") + + @staticmethod + def new(comm=None, path=None, info=False, dataset=None, parallel_method="Y", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the Nes class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "Y". + Accepted values: ["X", "Y", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + """ + + new = MercatorNes(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, + avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, + create_nes=create_nes, balanced=balanced, times=times, **kwargs) + + return new + + def get_full_y(self) -> Dict[str, Any]: + """ + Retrieve the complete Y information. + + Returns + ------- + Dict[str, Any] + A dictionary containing the complete latitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of latitude values. + attr_name: attr_value, # Latitude attributes. + ... + } + """ + data = self.comm.bcast(self._full_y) + + return data + + def get_full_x(self) -> Dict[str, Any]: + """ + Retrieve the complete X information. + + Returns + ------- + Dict[str, Any] + A dictionary containing the complete longitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of longitude values. + attr_name: attr_value, # Longitude attributes. + ... + } + """ + data = self.comm.bcast(self._full_x) + return data + + def set_full_y(self, data: Dict[str, Any]) -> None: + """ + Set the complete Y information. + + Parameters + ---------- + data : Dict[str, Any] + A dictionary containing the complete latitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of latitude values. + attr_name: attr_value, # Latitude attributes. + ... + } + """ + if self.master: + self._full_y = data + return None + + def set_full_x(self, data: Dict[str, Any]) -> None: + """ + Set the complete rotated longitude information. + + Parameters + ---------- + data : Dict[str, Any] + A dictionary containing the complete longitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of longitude values. + attr_name: attr_value, # Longitude attributes. + ... + } + """ + if self.master: + self._full_x = data + return None + + # noinspection DuplicatedCode + def _filter_coordinates_selection(self): + """ + Use the selection limits to filter y, x, time, lev, lat, lon, lon_bnds and lat_bnds. + """ + + idx = self._get_idx_intervals() + + self.y = self._get_coordinate_values(self.get_full_y(), "Y") + self.x = self._get_coordinate_values(self.get_full_x(), "X") + + self.set_full_y({'data': self.y["data"][idx["idx_y_min"]:idx["idx_y_max"]]}) + self.set_full_x({'data': self.x["data"][idx["idx_x_min"]:idx["idx_x_max"]]}) + + super(MercatorNes, self)._filter_coordinates_selection() + + return None + + def _get_pyproj_projection(self): + """ + Get projection data as in Pyproj library. + + Returns + ---------- + projection : pyproj.Proj + Grid projection. + """ + + projection = Proj(proj="merc", + a=self.earth_radius[1], + b=self.earth_radius[0], + lat_ts=float64(self.projection_data["standard_parallel"]), + lon_0=float64(self.projection_data["longitude_of_projection_origin"]),) + + return projection + + # noinspection DuplicatedCode + def _get_projection_data(self, create_nes, **kwargs): + """ + Retrieves projection data based on grid details. + + Parameters + ---------- + create_nes : bool + Flag indicating whether to create new object (True) or use existing (False). + **kwargs : dict + Additional keyword arguments for specifying projection details. + """ + if create_nes: + projection_data = {"grid_mapping_name": "mercator", + "standard_parallel": kwargs["lat_ts"], + "longitude_of_projection_origin": kwargs["lon_0"], + "x_0": kwargs["x_0"], "y_0": kwargs["y_0"], + "inc_x": kwargs["inc_x"], "inc_y": kwargs["inc_y"], + "nx": kwargs["nx"], "ny": kwargs["ny"], + } + else: + if "mercator" in self.variables.keys(): + projection_data = self.variables["mercator"] + self.free_vars("mercator") + + else: + msg = "There is no variable called mercator, projection has not been defined." + raise RuntimeError(msg) + + if "dtype" in projection_data.keys(): + del projection_data["dtype"] + + if "data" in projection_data.keys(): + del projection_data["data"] + + if "dimensions" in projection_data.keys(): + del projection_data["dimensions"] + + return projection_data + + # noinspection DuplicatedCode + def _create_dimensions(self, netcdf): + """ + Create "y", "x" and "spatial_nv" dimensions and the super dimensions "lev", "time", "time_nv", "lon" and "lat" + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + super(MercatorNes, self)._create_dimensions(netcdf) + + # Create y and x dimensions + netcdf.createDimension("y", len(self.get_full_y()["data"])) + netcdf.createDimension("x", len(self.get_full_x()["data"])) + + # Create spatial_nv (number of vertices) dimension + if (self.lat_bnds is not None) and (self.lon_bnds is not None): + netcdf.createDimension("spatial_nv", 4) + + return None + + # noinspection DuplicatedCode + def _create_dimension_variables(self, netcdf): + """ + Create the "y" and "x" variables. + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + super(MercatorNes, self)._create_dimension_variables(netcdf) + + # MERCATOR Y COORDINATES + full_y = self.get_full_y() + y = netcdf.createVariable("y", full_y["data"].dtype, ("y",)) + y.long_name = "y coordinate of projection" + if "units" in full_y.keys(): + y.units = full_y["units"] + else: + y.units = "m" + y.standard_name = "projection_y_coordinate" + if self.size > 1: + y.set_collective(True) + y[:] = full_y["data"] + + # MERCATOR X COORDINATES + full_x = self.get_full_x() + x = netcdf.createVariable("x", full_x["data"].dtype, ("x",)) + x.long_name = "x coordinate of projection" + if "units" in full_x.keys(): + x.units = full_x["units"] + else: + x.units = "m" + x.standard_name = "projection_x_coordinate" + if self.size > 1: + x.set_collective(True) + x[:] = full_x["data"] + + return None + + # noinspection DuplicatedCode + def _create_centre_coordinates(self, **kwargs): + """ + Calculate centre latitudes and longitudes from grid details. + """ + if self.master: + # Get projection details on x + x_0 = float64(self.projection_data["x_0"]) + inc_x = float64(self.projection_data["inc_x"]) + nx = int(self.projection_data["nx"]) + + # Get projection details on y + y_0 = float64(self.projection_data["y_0"]) + inc_y = float64(self.projection_data["inc_y"]) + ny = int(self.projection_data["ny"]) + + # Create a regular grid in metres (1D) + self._full_x = {"data": linspace(x_0 + (inc_x / 2), x_0 + (inc_x / 2) + (inc_x * (nx - 1)), nx, + dtype=float64)} + self._full_y = {"data": linspace(y_0 + (inc_y / 2), y_0 + (inc_y / 2) + (inc_y * (ny - 1)), ny, + dtype=float64)} + + # Create a regular grid in metres (1D to 2D) + x = array([self._full_x["data"]] * len(self._full_y["data"])) + y = array([self._full_y["data"]] * len(self._full_x["data"])).T + + # Calculate centre latitudes and longitudes (UTM to Mercator) + centre_lon, centre_lat = self.projection(x, y, inverse=True) + + return {"data": centre_lat}, {"data": centre_lon} + else: + return None, None + + def create_providentia_exp_centre_coordinates(self): + """ + Calculate centre latitudes and longitudes from original coordinates and store as 2D arrays. + + Returns + ---------- + model_centre_lat : dict + Dictionary with data of centre coordinates for latitude in 2D (latitude, longitude). + model_centre_lon : dict + Dictionary with data of centre coordinates for longitude in 2D (latitude, longitude). + """ + + # Get centre latitudes + model_centre_lat = self.lat + + # Get centre longitudes + model_centre_lon = self.lon + + return model_centre_lat, model_centre_lon + + # noinspection DuplicatedCode + def create_providentia_exp_grid_edge_coordinates(self): + """ + Calculate grid edge latitudes and longitudes and get model grid outline. + + Returns + ---------- + grid_edge_lat : dict + Dictionary with data of grid edge latitudes. + grid_edge_lon : dict + Dictionary with data of grid edge longitudes. + """ + + # Get grid resolution + inc_x = abs(mean(diff(self.x["data"]))) + inc_y = abs(mean(diff(self.y["data"]))) + + # Get bounds for rotated coordinates + y_bounds = self._create_single_spatial_bounds(self.y["data"], inc_y) + x_bounds = self._create_single_spatial_bounds(self.x["data"], inc_x) + + # Get rotated latitudes for grid edge + left_edge_y = append(y_bounds.flatten()[::2], y_bounds.flatten()[-1]) + right_edge_y = flip(left_edge_y, 0) + top_edge_y = repeat(y_bounds[-1][-1], len(self.x["data"]) - 1) + bottom_edge_y = repeat(y_bounds[0][0], len(self.x["data"])) + y_grid_edge = concatenate((left_edge_y, top_edge_y, right_edge_y, bottom_edge_y)) + + # Get rotated longitudes for grid edge + left_edge_x = repeat(x_bounds[0][0], len(self.y["data"]) + 1) + top_edge_x = x_bounds.flatten()[1:-1:2] + right_edge_x = repeat(x_bounds[-1][-1], len(self.y["data"]) + 1) + bottom_edge_x = flip(x_bounds.flatten()[:-1:2], 0) + x_grid_edge = concatenate((left_edge_x, top_edge_x, right_edge_x, bottom_edge_x)) + + # Get edges for regular coordinates + grid_edge_lon_data, grid_edge_lat_data = self.projection(x_grid_edge, y_grid_edge, inverse=True) + + # Create grid outline by stacking the edges in both coordinates + model_grid_outline = vstack((grid_edge_lon_data, grid_edge_lat_data)).T + grid_edge_lat = {"data": model_grid_outline[:, 1]} + grid_edge_lon = {"data": model_grid_outline[:, 0]} + + return grid_edge_lat, grid_edge_lon + + # noinspection DuplicatedCode + def create_spatial_bounds(self): + """ + Calculate longitude and latitude bounds and set them. + """ + + # Calculate Mercator coordinates bounds + full_x = self.get_full_x() + full_y = self.get_full_y() + inc_x = abs(mean(diff(full_x["data"]))) + x_bnds = self._create_single_spatial_bounds(array([full_x["data"]] * len(full_y["data"])), + inc_x, spatial_nv=4) + + inc_y = abs(mean(diff(full_y["data"]))) + y_bnds = self._create_single_spatial_bounds(array([full_y["data"]] * len(full_x["data"])).T, + inc_y, spatial_nv=4, inverse=True) + + # Transform Mercator bounds to regular bounds + lon_bnds, lat_bnds = self.projection(x_bnds, y_bnds, inverse=True) + + # Obtain regular coordinates bounds + self.set_full_latitudes_boundaries({"data": deepcopy(lat_bnds)}) + self.lat_bnds = {"data": lat_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + :]} + self.set_full_longitudes_boundaries({"data": deepcopy(lon_bnds)}) + self.lon_bnds = {"data": lon_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + :]} + + return None + + @staticmethod + def _set_var_crs(var): + """ + Set the grid_mapping to "mercator". + + Parameters + ---------- + var : Variable + netCDF4-python variable object. + """ + + var.grid_mapping = "mercator" + var.coordinates = "lat lon" + + return None + + def _create_metadata(self, netcdf): + """ + Create the "crs" variable for the Mercator grid_mapping. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python Dataset. + """ + + if self.projection_data is not None: + mapping = netcdf.createVariable("mercator", "i") + mapping.grid_mapping_name = self.projection_data["grid_mapping_name"] + mapping.standard_parallel = self.projection_data["standard_parallel"] + mapping.longitude_of_projection_origin = self.projection_data["longitude_of_projection_origin"] + + return None + + def to_grib2(self, path, grib_keys, grib_template_path, lat_flip=False, info=False): + """ + Write output file with grib2 format. + + Parameters + ---------- + lat_flip : bool + Indicates if you want to flip latitudes Up-Down + path : str + Path to the output file. + grib_keys : dict + Dictionary with the grib2 keys. + grib_template_path : str + Path to the grib2 file to use as template. + info : bool + Indicates if you want to print extra information during the process. + """ + + raise NotImplementedError("Grib2 format cannot be written in a Mercator projection.") + + # noinspection DuplicatedCode + def create_shapefile(self): + """ + Create spatial GeoDataFrame (shapefile). + + Returns + ------- + shapefile : GeoPandasDataFrame + Shapefile dataframe. + """ + + if self.shapefile is None: + + # Get latitude and longitude cell boundaries + if self.lat_bnds is None or self.lon_bnds is None: + self.create_spatial_bounds() + + # Reshape arrays to create geometry + aux_b_lat = self.lat_bnds["data"].reshape((self.lat_bnds["data"].shape[0] * self.lat_bnds["data"].shape[1], + self.lat_bnds["data"].shape[2])) + aux_b_lon = self.lon_bnds["data"].reshape((self.lon_bnds["data"].shape[0] * self.lon_bnds["data"].shape[1], + self.lon_bnds["data"].shape[2])) + + # Get polygons from bounds + geometry = [] + for i in range(aux_b_lon.shape[0]): + geometry.append(Polygon([(aux_b_lon[i, 0], aux_b_lat[i, 0]), + (aux_b_lon[i, 1], aux_b_lat[i, 1]), + (aux_b_lon[i, 2], aux_b_lat[i, 2]), + (aux_b_lon[i, 3], aux_b_lat[i, 3]), + (aux_b_lon[i, 0], aux_b_lat[i, 0])])) + + # Create dataframe containing all polygons + fids = self.get_fids() + gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=geometry, crs="EPSG:4326") + self.shapefile = gdf + + else: + gdf = self.shapefile + + return gdf + + # noinspection DuplicatedCode + def get_centroids_from_coordinates(self): + """ + Get centroids from geographical coordinates. + + Returns + ------- + centroids_gdf: GeoPandasDataFrame + Centroids dataframe. + """ + + # Get centroids from coordinates + centroids = [] + for lat_ind in range(0, self.lon["data"].shape[0]): + for lon_ind in range(0, self.lon["data"].shape[1]): + centroids.append(Point(self.lon["data"][lat_ind, lon_ind], + self.lat["data"][lat_ind, lon_ind])) + + # Create dataframe containing all points + fids = self.get_fids() + centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=centroids, crs="EPSG:4326") + + return centroids_gdf diff --git a/build/lib/nes/nc_projections/points_nes.py b/build/lib/nes/nc_projections/points_nes.py new file mode 100644 index 0000000..29022b5 --- /dev/null +++ b/build/lib/nes/nc_projections/points_nes.py @@ -0,0 +1,755 @@ +#!/usr/bin/env python + +import sys +from warnings import warn +from numpy import float64, arange, array, ndarray, generic, issubdtype, character, concatenate +from pandas import Index +from geopandas import GeoDataFrame, points_from_xy +from pyproj import Proj +from copy import deepcopy +from netCDF4 import date2num +from .default_nes import Nes + + +class PointsNes(Nes): + """ + + Attributes + ---------- + _var_dim : tuple + A Tuple with the name of the Y and X dimensions for the variables. + ("lat", "lon", ) for a points grid. + _lat_dim : tuple + A Tuple with the name of the dimensions of the Latitude values. + ("lat", ) for a points grid. + _lon_dim : tuple + A Tuple with the name of the dimensions of the Longitude values. + ("lon", ) for a points grid. + _station : tuple + A Tuple with the name of the dimensions of the station values. + ("station", ) for a points grid. + """ + + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="X", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the PointsNes class. + + Parameters + ---------- + comm: MPI.Comm + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset or None + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "X". + accepted values: ["X", "T"]. + strlen: int + Maximum length of strings in NetCDF. Default: 75. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + """ + + super(PointsNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=create_nes, + times=times, balanced=balanced, **kwargs) + + if create_nes: + # Dimensions screening + self.lat = self._get_coordinate_values(self.get_full_latitudes(), "X") + self.lon = self._get_coordinate_values(self.get_full_longitudes(), "X") + + # Complete dimensions + self._station = {"data": arange(len(self.get_full_longitudes()["data"]))} + + # Dimensions screening + self.station = self._get_coordinate_values(self._station, "X") + + # Set axis limits for parallel writing + self.write_axis_limits = self._get_write_axis_limits() + + self._var_dim = ("station",) + self._lat_dim = ("station",) + self._lon_dim = ("station",) + + @staticmethod + def new(comm=None, path=None, info=False, dataset=None, parallel_method="X", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, + create_nes=False, balanced=False, times=None, **kwargs): + """ + Initialize the Nes class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "X". + accepted values: ["X", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + """ + + new = PointsNes(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, + avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, + create_nes=create_nes, balanced=balanced, times=times, **kwargs) + + return new + + @staticmethod + def _get_pyproj_projection(): + """ + Get projection data as in Pyproj library. + + Returns + ---------- + projection : pyproj.Proj + Grid projection. + """ + + projection = Proj(proj="latlong", ellps="WGS84",) + + return projection + + def _get_projection_data(self, create_nes, **kwargs): + """ + Retrieves projection data based on grid details. + + Parameters + ---------- + create_nes : bool + Flag indicating whether to create new object (True) or use existing (False). + **kwargs : dict + Additional keyword arguments for specifying projection details. + """ + + return None + + def _create_dimensions(self, netcdf): + """ + Create "time", "time_nv", "station" and "strlen" dimensions. + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + # Create time dimension + netcdf.createDimension("time", None) + + # Create time_nv (number of vertices) dimension + if self.time_bnds is not None: + netcdf.createDimension("time_nv", 2) + + # Create station dimension + # The number of longitudes is equal to the number of stations + netcdf.createDimension("station", len(self.get_full_longitudes()["data"])) + + # Create string length dimension + if self.strlen is not None: + netcdf.createDimension("strlen", self.strlen) + + return None + + # noinspection DuplicatedCode + def _create_dimension_variables(self, netcdf): + """ + Create the "time", "time_bnds", "station", "lat", "lat_bnds", "lon" and "lon_bnds" variables. + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + # TIMES + time_var = netcdf.createVariable("time", float64, ("time",), zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + time_var.units = "hours since {0}".format( + self.get_full_times()[self._get_time_id(self.hours_start, first=True)].strftime("%Y-%m-%d %H:%M:%S")) + time_var.standard_name = "time" + time_var.calendar = "standard" + time_var.long_name = "time" + if self.time_bnds is not None: + time_var.bounds = "time_bnds" + if self.size > 1: + time_var.set_collective(True) + time_var[:] = date2num(self.get_full_times()[self._get_time_id(self.hours_start, first=True): + self._get_time_id(self.hours_end, first=False)], + time_var.units, time_var.calendar) + + # TIME BOUNDS + if self.time_bnds is not None: + time_bnds_var = netcdf.createVariable("time_bnds", float64, ("time", "time_nv",), zlib=self.zip_lvl, + complevel=self.zip_lvl) + if self.size > 1: + time_bnds_var.set_collective(True) + time_bnds_var[:] = date2num(self.get_full_time_bnds(), time_var.units, calendar="standard") + + # STATIONS + stations = netcdf.createVariable("station", float64, ("station",), zlib=self.zip_lvl > 0, + complevel=self.zip_lvl) + stations.units = "" + stations.axis = "X" + stations.long_name = "" + stations.standard_name = "station" + if self.size > 1: + stations.set_collective(True) + stations[:] = self._station["data"] + + # LATITUDES + lat = netcdf.createVariable("lat", float64, self._lat_dim, zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + lat.units = "degrees_north" + lat.axis = "Y" + lat.long_name = "latitude coordinate" + lat.standard_name = "latitude" + if self.lat_bnds is not None: + lat.bounds = "lat_bnds" + if self.size > 1: + lat.set_collective(True) + lat[:] = self.get_full_latitudes()["data"] + + # LONGITUDES + lon = netcdf.createVariable("lon", float64, self._lon_dim, zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + lon.units = "degrees_east" + lon.axis = "X" + lon.long_name = "longitude coordinate" + lon.standard_name = "longitude" + if self.lon_bnds is not None: + lon.bounds = "lon_bnds" + if self.size > 1: + lon.set_collective(True) + lon[:] = self.get_full_longitudes()["data"] + + return None + + # noinspection DuplicatedCode + def _get_coordinate_values(self, coordinate_info, coordinate_axis, bounds=False): + """ + Get the coordinate data of the current portion. + + Parameters + ---------- + coordinate_info : dict, list + Dictionary with the "data" key with the coordinate variable values. and the attributes as other keys. + coordinate_axis : str + Name of the coordinate to extract. Accepted values: ["X"]. + bounds : bool + Boolean variable to know if there are coordinate bounds. + Returns + ------- + values : dict + Dictionary with the portion of data corresponding to the rank. + """ + + if coordinate_info is None: + return None + + if not isinstance(coordinate_info, dict): + values = {"data": deepcopy(coordinate_info)} + else: + values = deepcopy(coordinate_info) + + coordinate_len = len(values["data"].shape) + if bounds: + coordinate_len -= 1 + + if coordinate_axis == "X": + if coordinate_len == 1: + values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + elif coordinate_len == 2: + values["data"] = values["data"][self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + else: + raise NotImplementedError("The coordinate has wrong dimensions: {dim}".format( + dim=values["data"].shape)) + + return values + + def _read_variable(self, var_name): + """ + Read the corresponding variable data according to the current rank. + + Parameters + ---------- + var_name : str + Name of the variable to read. + + Returns + ------- + data: array + Portion of the variable data corresponding to the rank. + """ + + nc_var = self.dataset.variables[var_name] + var_dims = nc_var.dimensions + + # Read data in 1 or 2 dimensions + if len(var_dims) < 2: + data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + elif len(var_dims) == 2: + if "strlen" in var_dims: + data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], :] + data = array(["".join(i.tobytes().decode("ascii").replace("\x00", "")) for i in data], dtype=object) + else: + data = nc_var[self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + else: + raise NotImplementedError("Error with {0}. Only can be read netCDF with 2 dimensions or less".format( + var_name)) + + # Unmask array + data = self._unmask_array(data) + + return data + + # noinspection DuplicatedCode + def _create_variables(self, netcdf, chunking=False): + """ + Create the netCDF file variables. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python open Dataset. + chunking : bool + Indicates if you want to chunk the output netCDF. + """ + + if self.variables is not None: + for i, (var_name, var_dict) in enumerate(self.variables.items()): + # Get data type + if "dtype" in var_dict.keys(): + var_dtype = var_dict["dtype"] + if (var_dict["data"] is not None) and (var_dtype != var_dict["data"].dtype): + msg = "WARNING!!! " + msg += "Different data types for variable {0}. ".format(var_name) + msg += "Input dtype={0}. Data dtype={1}.".format(var_dtype, var_dict["data"].dtype) + warn(msg) + sys.stderr.flush() + try: + var_dict["data"] = var_dict["data"].astype(var_dtype) + except Exception: # TODO: Detect exception + raise TypeError("It was not possible to cast the data to the input dtype.") + else: + var_dtype = var_dict["data"].dtype + if var_dtype is object: + raise TypeError("Data dtype is object. Define dtype explicitly as dictionary key 'dtype'") + + # Get dimensions when reading datasets + if "dimensions" in var_dict.keys(): + var_dims = var_dict["dimensions"] + # Get dimensions when creating new datasets + else: + if len(var_dict["data"].shape) == 1: + # For data that depends only on station (e.g. station_code) + var_dims = self._var_dim + else: + # For data that is dependent on time and station (e.g. PM10) + var_dims = ("time",) + self._var_dim + + if var_dict["data"] is not None: + + # Ensure data is of type numpy array (to create NES) + if not isinstance(var_dict["data"], (ndarray, generic)): + try: + var_dict["data"] = array(var_dict["data"]) + except AttributeError: + raise AttributeError("Data for variable {0} must be a numpy array.".format(var_name)) + + # Convert list of strings to chars for parallelization + if issubdtype(var_dtype, character): + var_dict["data_aux"] = self._str2char(var_dict["data"]) + var_dims += ("strlen",) + var_dtype = "S1" + + if self.info: + print("Rank {0:03d}: Writing {1} var ({2}/{3})".format(self.rank, var_name, i + 1, + len(self.variables))) + if not chunking: + var = netcdf.createVariable(var_name, var_dtype, var_dims, + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + else: + if self.balanced: + raise NotImplementedError("A balanced data cannot be chunked.") + if self.master: + chunk_size = var_dict["data"].shape + else: + chunk_size = None + chunk_size = self.comm.bcast(chunk_size, root=0) + var = netcdf.createVariable(var_name, var_dtype, var_dims, + zlib=self.zip_lvl > 0, complevel=self.zip_lvl, + chunksizes=chunk_size) + + if self.info: + print("Rank {0:03d}: Var {1} created ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + if self.size > 1: + var.set_collective(True) + if self.info: + print("Rank {0:03d}: Var {1} collective ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + + for att_name, att_value in var_dict.items(): + if att_name == "data": + if self.info: + print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) + if "data_aux" in var_dict.keys(): + att_value = var_dict["data_aux"] + if len(att_value.shape) == 1: + try: + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value + except IndexError: + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape, + att_value.shape)) + except ValueError: + raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape, + att_value.shape)) + elif len(att_value.shape) == 2: + if "strlen" in var_dims: + try: + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :] = att_value + except IndexError: + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :].shape, + att_value.shape)) + except ValueError: + raise ValueError( + "Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]: + self.write_axis_limits["x_max"]].shape, + att_value.shape)) + else: + try: + var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value + except IndexError: + out_shape = var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + out_shape, att_value.shape)) + except ValueError: + out_shape = var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape + raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( + out_shape, att_value.shape)) + + if self.info: + print("Rank {0:03d}: Var {1} data ({2}/{3})".format(self.rank, var_name, i + 1, + len(self.variables))) + elif att_name not in ["chunk_size", "var_dims", "dimensions", "dtype", "data_aux"]: + var.setncattr(att_name, att_value) + + if "data_aux" in var_dict.keys(): + del var_dict["data_aux"] + + self._set_var_crs(var) + if self.info: + print("Rank {0:03d}: Var {1} completed ({2}/{3})".format(self.rank, var_name, i + 1, + len(self.variables))) + + return None + + # noinspection DuplicatedCode + def _gather_data(self, data_to_gather): + """ + Gather all the variable data into the MPI rank 0 to perform a serial write. + + Returns + ------- + data_to_gather: dict + Variables to gather. + """ + data_list = deepcopy(data_to_gather) + for var_name, var_info in data_list.items(): + try: + # noinspection PyArgumentList + data_aux = self.comm.gather(data_list[var_name]["data"], root=0) + if self.rank == 0: + shp_len = len(data_list[var_name]["data"].shape) + if self.parallel_method == "X": + # concatenate over station + if shp_len == 1: + # dimensions = (station) + axis = 0 + elif shp_len == 2: + if "strlen" in var_info["dimensions"]: + # dimensions = (station, strlen) + axis = 0 + else: + # dimensions = (time, station) + axis = 1 + else: + msg = "The points NetCDF must have " + msg += "surface values (without levels)." + raise NotImplementedError(msg) + elif self.parallel_method == "T": + # concatenate over time + if shp_len == 1: + # dimensions = (station) + axis = None + elif shp_len == 2: + if "strlen" in var_info["dimensions"]: + # dimensions = (station, strlen) + axis = None + else: + # dimensions = (time, station) + axis = 0 + else: + msg = "The points NetCDF must only have surface values (without levels)." + raise NotImplementedError(msg) + else: + raise NotImplementedError( + "Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( + meth=self.parallel_method, accept=["X", "T"])) + data_list[var_name]["data"] = concatenate(data_aux, axis=axis) + except Exception as e: + msg = f"**ERROR** an error has occurred while gathering the '{var_name}' variable.\n" + print(msg) + sys.stderr.write(msg) + print(e) + sys.stderr.write(str(e)) + sys.stderr.flush() + self.comm.Abort(1) + + return data_list + + def _create_centre_coordinates(self, **kwargs): + """ + Calculate centre latitudes and longitudes from points. + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + # Calculate centre latitudes + centre_lat = kwargs["lat"] + + # Calculate centre longitudes + centre_lon = kwargs["lon"] + + return {"data": centre_lat}, {"data": centre_lon} + + def _create_metadata(self, netcdf): + """ + Create metadata variables + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + return None + + def create_spatial_bounds(self): + """ + Calculate longitude and latitude bounds and set them. + """ + + raise NotImplementedError("Spatial bounds cannot be created for points datasets.") + + def to_providentia(self, model_centre_lon, model_centre_lat, grid_edge_lon, grid_edge_lat): + """ + Transform a PointsNes into a PointsNesProvidentia object + + Returns + ---------- + points_nes_providentia : nes.Nes + Points Nes Providentia Object + """ + + from .points_nes_providentia import PointsNesProvidentia + + points_nes_providentia = PointsNesProvidentia(comm=self.comm, + info=self.info, + balanced=self.balanced, + parallel_method=self.parallel_method, + avoid_first_hours=self.hours_start, + avoid_last_hours=self.hours_end, + first_level=self.first_level, + last_level=self.last_level, + create_nes=True, + times=self.time, + model_centre_lon=model_centre_lon, + model_centre_lat=model_centre_lat, + grid_edge_lon=grid_edge_lon, + grid_edge_lat=grid_edge_lat, + lat=self.lat["data"], + lon=self.lon["data"] + ) + + # Convert dimensions (time, lev, lat, lon) to (station, time) for interpolated variables and reshape data + variables = {} + interpolated_variables = deepcopy(self.variables) + for var_name, var_info in interpolated_variables.items(): + variables[var_name] = {} + # ("time", "lev", "lat", "lon") or ("time", "lat", "lon") to ("station", "time") + if len(var_info["dimensions"]) != len(var_info["data"].shape): + variables[var_name]["data"] = var_info["data"].T + variables[var_name]["dimensions"] = ("station", "time") + else: + variables[var_name]["data"] = var_info["data"] + variables[var_name]["dimensions"] = var_info["dimensions"] + + # Set variables + points_nes_providentia.variables = variables + + return points_nes_providentia + + def to_grib2(self, path, grib_keys, grib_template_path, lat_flip=False, info=False): + """ + Write output file with grib2 format. + + Parameters + ---------- + lat_flip : bool + Indicates if you want to flip the latitude direction. + path : str + Path to the output file. + grib_keys : dict + Dictionary with the grib2 keys. + grib_template_path : str + Path to the grib2 file to use as template. + info : bool + Indicates if you want to print extra information during the process. + """ + + raise NotImplementedError("Grib2 format cannot be written with point data.") + + def create_shapefile(self): + """ + Create spatial GeoDataFrame (shapefile). + + Returns + ------- + shapefile : GeoPandasDataFrame + Shapefile dataframe. + """ + + if self.shapefile is None: + + # Create dataframe containing all points + gdf = self.get_centroids_from_coordinates() + self.shapefile = gdf + + else: + gdf = self.shapefile + + return gdf + + def get_centroids_from_coordinates(self): + """ + Get centroids from geographical coordinates. + + Returns + ------- + centroids_gdf: GeoPandasDataFrame + Centroids dataframe. + """ + + # Get centroids from coordinates + centroids = points_from_xy(self.lon["data"], self.lat["data"]) + + # Create dataframe containing all points + fids = arange(len(self.get_full_longitudes()["data"])) + fids = fids[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids), + geometry=centroids, + crs="EPSG:4326") + + return centroids_gdf + + def add_variables_to_shapefile(self, var_list, idx_lev=0, idx_time=0): + """ + Add variables data to shapefile. + + var_list : list, str + List (or single string) of the variables to be loaded and saved in the shapefile. + idx_lev : int + Index of vertical level for which the data will be saved in the shapefile. + idx_time : int + Index of time for which the data will be saved in the shapefile. + """ + + if idx_lev != 0: + msg = "Error: Points dataset has no level (Level: {0}).".format(idx_lev) + raise ValueError(msg) + + for var_name in var_list: + # station as dimension + if len(self.variables[var_name]["dimensions"]) == 1: + self.shapefile[var_name] = self.variables[var_name]["data"][:].ravel() + # station and time as dimensions + else: + self.shapefile[var_name] = self.variables[var_name]["data"][idx_time, :].ravel() + + return None + + @staticmethod + def _get_axis_index_(axis): + if axis == "T": + value = 0 + elif axis == "X": + value = 1 + else: + raise ValueError("Unknown axis: {0}".format(axis)) + return value + + @staticmethod + def _set_var_crs(var): + """ + Set the grid_mapping + + Parameters + ---------- + var : Variable + netCDF4-python variable object. + """ + var.coordinates = "lat lon" + + return None diff --git a/build/lib/nes/nc_projections/points_nes_ghost.py b/build/lib/nes/nc_projections/points_nes_ghost.py new file mode 100644 index 0000000..0df1c75 --- /dev/null +++ b/build/lib/nes/nc_projections/points_nes_ghost.py @@ -0,0 +1,818 @@ +#!/usr/bin/env python + +import sys +from warnings import warn +from numpy import float64, empty, ndarray, generic, array, issubdtype, character, concatenate, int64 +from netCDF4 import date2num +from copy import deepcopy +from .points_nes import PointsNes + + +class PointsNesGHOST(PointsNes): + """ + + Attributes + ---------- + _qa : dict + Quality flags (GHOST checks) dictionary with the complete "data" key for all the values and the rest of the + attributes. + _flag : dict + Data flags (given by data provider) dictionary with the complete "data" key for all the values and the rest of + the attributes. + _qa : dict + Quality flags (GHOST checks) dictionary with the portion of "data" corresponding to the rank values. + _flag : dict + Data flags (given by data provider) dictionary with the portion of "data" corresponding to the rank values. + """ + + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="X", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the PointsNesGHOST class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "X". + Accepted values: ["X"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + """ + + super(PointsNesGHOST, self).__init__(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=create_nes, + times=times, balanced=balanced, **kwargs) + + # Complete dimensions + self._flag = self._get_coordinate_dimension(["flag"]) + self._qa = self._get_coordinate_dimension(["qa"]) + + # Dimensions screening + self.flag = self._get_coordinate_values(self._flag, "X") + self.qa = self._get_coordinate_values(self._qa, "X") + + @staticmethod + def new(comm=None, path=None, info=False, dataset=None, parallel_method="X", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the PointsNesGHOST class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "X". + Accepted values: ["X"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + """ + + new = PointsNesGHOST(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, + avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, + create_nes=create_nes, balanced=balanced, times=times, **kwargs) + + return new + + def _create_dimensions(self, netcdf): + """ + Create "N_flag_codes" and "N_qa_codes" dimensions and the super dimensions + "time", "time_nv", "station", and "strlen". + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + super(PointsNesGHOST, self)._create_dimensions(netcdf) + + # Create N_flag_codes and N_qa_codes dimensions + netcdf.createDimension("N_flag_codes", self._flag["data"].shape[2]) + netcdf.createDimension("N_qa_codes", self._qa["data"].shape[2]) + + return None + + # noinspection DuplicatedCode + def _create_dimension_variables(self, netcdf): + """ + Create the "time", "time_bnds", "station", "lat", "lat_bnds", "lon" and "lon_bnds" variables. + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + # TIMES + time_var = netcdf.createVariable("time", float64, ("time",), zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + time_var.units = "hours since {0}".format( + self.get_full_times()[self._get_time_id(self.hours_start, first=True)].strftime("%Y-%m-%d %H:%M:%S")) + time_var.standard_name = "time" + time_var.calendar = "standard" + time_var.long_name = "time" + if self.time_bnds is not None: + time_var.bounds = "time_bnds" + if self.size > 1: + time_var.set_collective(True) + time_var[:] = date2num(self.get_full_times()[self._get_time_id(self.hours_start, first=True): + self._get_time_id(self.hours_end, first=False)], + time_var.units, time_var.calendar) + + # TIME BOUNDS + if self.time_bnds is not None: + time_bnds_var = netcdf.createVariable("time_bnds", float64, ("time", "time_nv",), zlib=self.zip_lvl, + complevel=self.zip_lvl) + if self.size > 1: + time_bnds_var.set_collective(True) + time_bnds_var[:] = date2num(self.get_full_time_bnds(), time_var.units, calendar="standard") + + # STATIONS + stations = netcdf.createVariable("station", float64, ("station",), zlib=self.zip_lvl > 0, + complevel=self.zip_lvl) + stations.units = "" + stations.axis = "X" + stations.long_name = "" + stations.standard_name = "station" + if self.size > 1: + stations.set_collective(True) + stations[:] = self._station["data"] + + # LATITUDES + lat = netcdf.createVariable("latitude", float64, self._lat_dim, zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + lat.units = "degrees_north" + lat.axis = "Y" + lat.long_name = "latitude coordinate" + lat.standard_name = "latitude" + if self.lat_bnds is not None: + lat.bounds = "lat_bnds" + if self.size > 1: + lat.set_collective(True) + lat[:] = self.get_full_latitudes()["data"] + + # LONGITUDES + lon = netcdf.createVariable("longitude", float64, self._lon_dim, zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + lon.units = "degrees_east" + lon.axis = "X" + lon.long_name = "longitude coordinate" + lon.standard_name = "longitude" + if self.lon_bnds is not None: + lon.bounds = "lon_bnds" + if self.size > 1: + lon.set_collective(True) + lon[:] = self.get_full_longitudes()["data"] + + def erase_flags(self): + + first_time_idx = self._get_time_id(self.hours_start, first=True) + last_time_idx = self._get_time_id(self.hours_end, first=False) + t_len = last_time_idx - first_time_idx + + self._qa["data"] = empty((len(self.get_full_longitudes()["data"]), t_len, 0)) + self._flag["data"] = empty((len(self.get_full_longitudes()["data"]), t_len, 0)) + + return None + + # noinspection DuplicatedCode + def _get_coordinate_values(self, coordinate_info, coordinate_axis, bounds=False): + """ + Get the coordinate data of the current portion. + + Parameters + ---------- + coordinate_info : dict, list + Dictionary with the "data" key with the coordinate variable values. and the attributes as other keys. + coordinate_axis : str + Name of the coordinate to extract. Accepted values: ["X"]. + bounds : bool + Boolean variable to know if there are coordinate bounds. + Returns + ------- + values : dict + Dictionary with the portion of data corresponding to the rank. + """ + + if coordinate_info is None: + return None + + if not isinstance(coordinate_info, dict): + values = {"data": deepcopy(coordinate_info)} + else: + values = deepcopy(coordinate_info) + + coordinate_len = len(values["data"].shape) + if bounds: + coordinate_len -= 1 + + if coordinate_axis == "X": + if coordinate_len == 1: + values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + elif coordinate_len == 2: + values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], + self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"]] + elif coordinate_len == 3: + values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], + self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], :] + else: + raise NotImplementedError("The coordinate has wrong dimensions: {dim}".format( + dim=values["data"].shape)) + + return values + + # noinspection DuplicatedCode + def _read_variable(self, var_name): + """ + Read the corresponding variable data according to the current rank. + + Parameters + ---------- + var_name : str + Name of the variable to read. + + Returns + ------- + data: array + Portion of the variable data corresponding to the rank. + """ + + nc_var = self.dataset.variables[var_name] + var_dims = nc_var.dimensions + + # Read data in 1 or 2 dimensions + if len(var_dims) < 2: + data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + elif len(var_dims) == 2: + data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], + self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"]] + elif len(var_dims) == 3: + data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], + self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], + :] + else: + raise NotImplementedError("Error with {0}. Only can be read netCDF with 3 dimensions or less".format( + var_name)) + + # Unmask array + data = self._unmask_array(data) + + return data + + # noinspection DuplicatedCode + def _create_variables(self, netcdf, chunking=False): + """ + Create the netCDF file variables. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python open Dataset. + chunking : bool + Indicates if you want to chunk the output netCDF. + """ + + if self.variables is not None: + for i, (var_name, var_dict) in enumerate(self.variables.items()): + # Get data type + if "dtype" in var_dict.keys(): + var_dtype = var_dict["dtype"] + if (var_dict["data"] is not None) and (var_dtype != var_dict["data"].dtype): + msg = "WARNING!!! " + msg += "Different data types for variable {0}. ".format(var_name) + msg += "Input dtype={0}. Data dtype={1}.".format(var_dtype, var_dict["data"].dtype) + warn(msg) + sys.stderr.flush() + try: + var_dict["data"] = var_dict["data"].astype(var_dtype) + except Exception: + raise TypeError("It was not possible to cast the data to the input dtype.") + else: + var_dtype = var_dict["data"].dtype + if var_dtype is object: + raise TypeError("Data dtype is object. Define dtype explicitly as dictionary key 'dtype'") + + # Get dimensions when reading datasets + if "dimensions" in var_dict.keys(): + var_dims = var_dict["dimensions"] + # Get dimensions when creating new datasets + else: + if len(var_dict["data"].shape) == 1: + # For data that depends only on station (e.g. station_code) + var_dims = self._var_dim + else: + # For data that is dependent on time and station (e.g. PM10) + var_dims = self._var_dim + ("time",) + + if var_dict["data"] is not None: + + # Ensure data is of type numpy array (to create NES) + if not isinstance(var_dict["data"], (ndarray, generic)): + try: + var_dict["data"] = array(var_dict["data"]) + except AttributeError: + raise AttributeError("Data for variable {0} must be a numpy array.".format(var_name)) + + # Convert list of strings to chars for parallelization + if issubdtype(var_dtype, character): + var_dict["data_aux"] = self._str2char(var_dict["data"]) + var_dims += ("strlen",) + var_dtype = "S1" + + if self.info: + print("Rank {0:03d}: Writing {1} var ({2}/{3})".format(self.rank, var_name, i + 1, + len(self.variables))) + + if not chunking: + var = netcdf.createVariable(var_name, var_dtype, var_dims, + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + else: + if self.master: + chunk_size = var_dict["data"].shape + else: + chunk_size = None + chunk_size = self.comm.bcast(chunk_size, root=0) + var = netcdf.createVariable(var_name, var_dtype, var_dims, zlib=self.zip_lvl > 0, + complevel=self.zip_lvl, chunksizes=chunk_size) + + if self.info: + print("Rank {0:03d}: Var {1} created ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + if self.size > 1: + var.set_collective(True) + if self.info: + print("Rank {0:03d}: Var {1} collective ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + + for att_name, att_value in var_dict.items(): + if att_name == "data": + if self.info: + print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) + if "data_aux" in var_dict.keys(): + att_value = var_dict["data_aux"] + if len(att_value.shape) == 1: + try: + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value + except IndexError: + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape, + att_value.shape)) + except ValueError: + raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape, + att_value.shape)) + elif len(att_value.shape) == 2: + if "strlen" in var_dims: + try: + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :] = att_value + except IndexError: + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :].shape, + att_value.shape)) + except ValueError: + raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :].shape, + att_value.shape)) + else: + try: + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"]] = att_value + except IndexError: + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"]].shape, + att_value.shape)) + except ValueError: + out_shape = var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"]].shape + raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( + out_shape, att_value.shape)) + elif len(att_value.shape) == 3: + try: + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + :] = att_value + except IndexError: + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + :].shape, + att_value.shape)) + except ValueError: + raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + :].shape, + att_value.shape)) + + if self.info: + print("Rank {0:03d}: Var {1} data ({2}/{3})".format(self.rank, var_name, i + 1, + len(self.variables))) + + elif att_name not in ["chunk_size", "var_dims", "dimensions", "dtype", "data_aux"]: + var.setncattr(att_name, att_value) + + if "data_aux" in var_dict.keys(): + del var_dict["data_aux"] + + self._set_var_crs(var) + if self.info: + print("Rank {0:03d}: Var {1} completed ({2}/{3})".format(self.rank, var_name, i + 1, + len(self.variables))) + + return None + + # noinspection DuplicatedCode + def _gather_data(self, data_to_gather): + """ + Gather all the variable data into the MPI rank 0 to perform a serial write. + + Returns + ------- + data_to_gather: dict + Variables to gather. + """ + + data_list = deepcopy(data_to_gather) + for var_name, var_info in data_list.items(): + try: + # noinspection PyArgumentList + data_aux = self.comm.gather(data_list[var_name]["data"], root=0) + if self.rank == 0: + shp_len = len(data_list[var_name]["data"].shape) + # concatenate over station + if self.parallel_method == "X": + if shp_len == 1: + # dimensions = (station) + axis = 0 + elif shp_len == 2: + # dimensions = (station, strlen) or + # dimensions = (station, time) + axis = 0 + else: + msg = "The points NetCDF must have " + msg += "surface values (without levels)." + raise NotImplementedError(msg) + elif self.parallel_method == "T": + # concatenate over time + if shp_len == 1: + # dimensions = (station) + axis = None + elif shp_len == 2: + if "strlen" in var_info["dimensions"]: + # dimensions = (station, strlen) + axis = None + else: + # dimensions = (station, time) + axis = 1 + else: + msg = "The points NetCDF must have " + msg += "surface values (without levels)." + raise NotImplementedError(msg) + else: + raise NotImplementedError( + "Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( + meth=self.parallel_method, accept=["X", "T"])) + data_list[var_name]["data"] = concatenate(data_aux, axis=axis) + except Exception as e: + msg = f"**ERROR** an error has occurred while gathering the '{var_name}' variable.\n" + print(msg) + sys.stderr.write(msg) + print(e) + sys.stderr.write(str(e)) + sys.stderr.flush() + self.comm.Abort(1) + + return data_list + + def _create_metadata(self, netcdf): + """ + Create metadata variables. + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + # N FLAG CODES + flag = netcdf.createVariable("flag", int64, ("station", "time", "N_flag_codes",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + flag.units = "" + flag.axis = "" + flag.long_name = "" + flag.standard_name = "flag" + if self.size > 1: + flag.set_collective(True) + flag[:] = self._flag["data"] + + # N QA CODES + qa = netcdf.createVariable("qa", int64, ("station", "time", "N_qa_codes",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + qa.units = "" + qa.axis = "" + qa.long_name = "" + qa.standard_name = "N_qa_codes" + if self.size > 1: + qa.set_collective(True) + qa[:] = self._qa["data"] + + return None + + def to_netcdf(self, path, compression_level=0, serial=False, info=False, chunking=False, nc_type="NES", + keep_open=False): + """ + Write the netCDF output file. + + Parameters + ---------- + keep_open : bool + nc_type : str + path : str + Path to the output netCDF file. + compression_level : int + Level of compression (0 to 9) Default: 0 (no compression). + serial : bool + Indicates if you want to write in serial or not. Default: False. + info : bool + Indicates if you want to print the information of each writing step by stdout Default: False. + chunking : bool + Indicates if you want a chunked netCDF output. Only available with non-serial writes. Default: False. + """ + + if (not serial) and (self.size > 1): + msg = "WARNING!!! " + msg += "GHOST datasets cannot be written in parallel yet. " + msg += "Changing to serial mode." + warn(msg) + sys.stderr.flush() + + super(PointsNesGHOST, self).to_netcdf(path, compression_level=compression_level, + serial=True, info=info, chunking=chunking) + + return None + + def to_points(self): + """ + Transform a PointsNesGHOST into a PointsNes object + + Returns + ---------- + points_nes : nes.Nes + Points Nes Object (without GHOST metadata variables) + """ + + points_nes = PointsNes(comm=self.comm, + info=self.info, + balanced=self.balanced, + parallel_method=self.parallel_method, + avoid_first_hours=self.hours_start, + avoid_last_hours=self.hours_end, + first_level=self.first_level, + last_level=self.last_level, + create_nes=True, + lat=self.lat["data"], + lon=self.lon["data"], + times=self.time + ) + + # The version attribute in GHOST files prior to 1.3.3 is called data_version, after it is version + if "version" in self.global_attrs: + ghost_version = self.global_attrs["version"] + elif "data_version" in self.global_attrs: + ghost_version = self.global_attrs["data_version"] + else: + ghost_version = "0.0.0" + metadata_variables = self.get_standard_metadata(ghost_version) + self.free_vars(metadata_variables) + self.free_vars("station") + points_nes.variables = deepcopy(self.variables) + + return points_nes + + @staticmethod + def get_standard_metadata(ghost_version): + """ + Get all possible GHOST variables for each version. + + Parameters + ---------- + ghost_version : str + Version of GHOST file. + + Returns + ---------- + metadata_variables[GHOST_version] : list + A List of metadata variables for a certain GHOST version + """ + + # This metadata variables are + metadata_variables = {"1.4": ["GHOST_version", "station_reference", "station_timezone", "latitude", "longitude", + "altitude", "sampling_height", "measurement_altitude", "ellipsoid", + "horizontal_datum", "vertical_datum", "projection", "distance_to_building", + "distance_to_kerb", "distance_to_junction", "distance_to_source", "street_width", + "street_type", "daytime_traffic_speed", "daily_passing_vehicles", "data_level", + "climatology", "station_name", "city", "country", + "administrative_country_division_1", "administrative_country_division_2", + "population", "representative_radius", "network", "associated_networks", + "area_classification", "station_classification", "main_emission_source", + "land_use", "terrain", "measurement_scale", + "ESDAC_Iwahashi_landform_classification", + "ESDAC_modal_Iwahashi_landform_classification_5km", + "ESDAC_modal_Iwahashi_landform_classification_25km", + "ESDAC_Meybeck_landform_classification", + "ESDAC_modal_Meybeck_landform_classification_5km", + "ESDAC_modal_Meybeck_landform_classification_25km", + "GHSL_settlement_model_classification", + "GHSL_modal_settlement_model_classification_5km", + "GHSL_modal_settlement_model_classification_25km", + "Joly-Peuch_classification_code", "Koppen-Geiger_classification", + "Koppen-Geiger_modal_classification_5km", + "Koppen-Geiger_modal_classification_25km", + "MODIS_MCD12C1_v6_IGBP_land_use", "MODIS_MCD12C1_v6_modal_IGBP_land_use_5km", + "MODIS_MCD12C1_v6_modal_IGBP_land_use_25km", "MODIS_MCD12C1_v6_UMD_land_use", + "MODIS_MCD12C1_v6_modal_UMD_land_use_5km", + "MODIS_MCD12C1_v6_modal_UMD_land_use_25km", "MODIS_MCD12C1_v6_LAI", + "MODIS_MCD12C1_v6_modal_LAI_5km", "MODIS_MCD12C1_v6_modal_LAI_25km", + "WMO_region", "WWF_TEOW_terrestrial_ecoregion", "WWF_TEOW_biogeographical_realm", + "WWF_TEOW_biome", "UMBC_anthrome_classification", + "UMBC_modal_anthrome_classification_5km", + "UMBC_modal_anthrome_classification_25km", + "EDGAR_v4.3.2_annual_average_BC_emissions", + "EDGAR_v4.3.2_annual_average_CO_emissions", + "EDGAR_v4.3.2_annual_average_NH3_emissions", + "EDGAR_v4.3.2_annual_average_NMVOC_emissions", + "EDGAR_v4.3.2_annual_average_NOx_emissions", + "EDGAR_v4.3.2_annual_average_OC_emissions", + "EDGAR_v4.3.2_annual_average_PM10_emissions", + "EDGAR_v4.3.2_annual_average_biogenic_PM2.5_emissions", + "EDGAR_v4.3.2_annual_average_fossilfuel_PM2.5_emissions", + "EDGAR_v4.3.2_annual_average_SO2_emissions", "ASTER_v3_altitude", + "ETOPO1_altitude", "ETOPO1_max_altitude_difference_5km", + "GHSL_built_up_area_density", "GHSL_average_built_up_area_density_5km", + "GHSL_average_built_up_area_density_25km", "GHSL_max_built_up_area_density_5km", + "GHSL_max_built_up_area_density_25km", "GHSL_population_density", + "GHSL_average_population_density_5km", "GHSL_average_population_density_25km", + "GHSL_max_population_density_5km", "GHSL_max_population_density_25km", + "GPW_population_density", "GPW_average_population_density_5km", + "GPW_average_population_density_25km", "GPW_max_population_density_5km", + "GPW_max_population_density_25km", + "NOAA-DMSP-OLS_v4_nighttime_stable_lights", + "NOAA-DMSP-OLS_v4_average_nighttime_stable_lights_5km", + "NOAA-DMSP-OLS_v4_average_nighttime_stable_lights_25km", + "NOAA-DMSP-OLS_v4_max_nighttime_stable_lights_5km", + "NOAA-DMSP-OLS_v4_max_nighttime_stable_lights_25km", + "OMI_level3_column_annual_average_NO2", + "OMI_level3_column_cloud_screened_annual_average_NO2", + "OMI_level3_tropospheric_column_annual_average_NO2", + "OMI_level3_tropospheric_column_cloud_screened_annual_average_NO2", + "GSFC_coastline_proximity", "primary_sampling_type", + "primary_sampling_instrument_name", + "primary_sampling_instrument_documented_flow_rate", + "primary_sampling_instrument_reported_flow_rate", + "primary_sampling_process_details", "primary_sampling_instrument_manual_name", + "primary_sampling_further_details", "sample_preparation_types", + "sample_preparation_techniques", "sample_preparation_process_details", + "sample_preparation_further_details", "measurement_methodology", + "measuring_instrument_name", "measuring_instrument_sampling_type", + "measuring_instrument_documented_flow_rate", + "measuring_instrument_reported_flow_rate", "measuring_instrument_process_details", + "measuring_instrument_process_details", "measuring_instrument_manual_name", + "measuring_instrument_further_details", "measuring_instrument_reported_units", + "measuring_instrument_reported_lower_limit_of_detection", + "measuring_instrument_documented_lower_limit_of_detection", + "measuring_instrument_reported_upper_limit_of_detection", + "measuring_instrument_documented_upper_limit_of_detection", + "measuring_instrument_reported_uncertainty", + "measuring_instrument_documented_uncertainty", + "measuring_instrument_reported_accuracy", + "measuring_instrument_documented_accuracy", + "measuring_instrument_reported_precision", + "measuring_instrument_documented_precision", + "measuring_instrument_reported_zero_drift", + "measuring_instrument_documented_zero_drift", + "measuring_instrument_reported_span_drift", + "measuring_instrument_documented_span_drift", + "measuring_instrument_reported_zonal_drift", + "measuring_instrument_documented_zonal_drift", + "measuring_instrument_reported_measurement_resolution", + "measuring_instrument_documented_measurement_resolution", + "measuring_instrument_reported_absorption_cross_section", + "measuring_instrument_documented_absorption_cross_section", + "measuring_instrument_inlet_information", + "measuring_instrument_calibration_scale", + "network_provided_volume_standard_temperature", + "network_provided_volume_standard_pressure", "retrieval_algorithm", + "principal_investigator_name", "principal_investigator_institution", + "principal_investigator_email_address", "contact_name", + "contact_institution", "contact_email_address", "meta_update_stamp", + "data_download_stamp", "data_revision_stamp", "network_sampling_details", + "network_uncertainty_details", "network_maintenance_details", + "network_qa_details", "network_miscellaneous_details", "data_licence", + "process_warnings", "temporal_resolution", + "reported_lower_limit_of_detection_per_measurement", + "reported_upper_limit_of_detection_per_measurement", + "reported_uncertainty_per_measurement", "derived_uncertainty_per_measurement", + "day_night_code", "weekday_weekend_code", "season_code", + "hourly_native_representativity_percent", "hourly_native_max_gap_percent", + "daily_native_representativity_percent", "daily_representativity_percent", + "daily_native_max_gap_percent", "daily_max_gap_percent", + "monthly_native_representativity_percent", "monthly_representativity_percent", + "monthly_native_max_gap_percent", "monthly_max_gap_percent", + "annual_native_representativity_percent", "annual_native_max_gap_percent", + "all_representativity_percent", "all_max_gap_percent"], + } + + return metadata_variables[ghost_version] + + # noinspection DuplicatedCode + def add_variables_to_shapefile(self, var_list, idx_lev=0, idx_time=0): + """ + Add variables data to shapefile. + + var_list : list, str + List (or single string) of the variables to be loaded and saved in the shapefile. + idx_lev : int + Index of vertical level for which the data will be saved in the shapefile. + idx_time : int + Index of time for which the data will be saved in the shapefile. + """ + + if idx_lev != 0: + msg = "Error: Points dataset has no level (Level: {0}).".format(idx_lev) + raise ValueError(msg) + + for var_name in var_list: + # station as dimension + if len(self.variables[var_name]["dimensions"]) == 1: + self.shapefile[var_name] = self.variables[var_name]["data"][:].ravel() + # station and time as dimensions + else: + self.shapefile[var_name] = self.variables[var_name]["data"][:, idx_time].ravel() + + return None + + @staticmethod + def _get_axis_index_(axis): + if axis == "T": + value = 1 + elif axis == "X": + value = 0 + else: + raise ValueError("Unknown axis: {0}".format(axis)) + return value + + @staticmethod + def _set_var_crs(var): + """ + Set the grid_mapping + + Parameters + ---------- + var : Variable + netCDF4-python variable object. + """ + return None diff --git a/build/lib/nes/nc_projections/points_nes_providentia.py b/build/lib/nes/nc_projections/points_nes_providentia.py new file mode 100644 index 0000000..ad3fc56 --- /dev/null +++ b/build/lib/nes/nc_projections/points_nes_providentia.py @@ -0,0 +1,650 @@ +#!/usr/bin/env python + +import sys +from warnings import warn +from copy import deepcopy +from numpy import ndarray, generic, array, issubdtype, character, concatenate +from .points_nes import PointsNes + + +class PointsNesProvidentia(PointsNes): + """ + + Attributes + ---------- + _model_centre_lon : dict + Model centre longitudes dictionary with the complete "data" key for all the values and the rest of the + attributes. + _model_centre_lat : dict + Model centre latitudes dictionary with the complete "data" key for all the values and the rest of the + attributes. + _grid_edge_lon : dict + Grid edge longitudes dictionary with the complete "data" key for all the values and the rest of the + attributes. + _grid_edge_lat : dict + Grid edge latitudes dictionary with the complete "data" key for all the values and the rest of the + attributes. + model_centre_lon : dict + Model centre longitudes dictionary with the portion of "data" corresponding to the rank values. + model_centre_lat : dict + Model centre latitudes dictionary with the portion of "data" corresponding to the rank values. + grid_edge_lon : dict + Grid edge longitudes dictionary with the portion of "data" corresponding to the rank values. + grid_edge_lat : dict + Grid edge latitudes dictionary with the portion of "data" corresponding to the rank values. + """ + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="X", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, model_centre_lon=None, model_centre_lat=None, grid_edge_lon=None, + grid_edge_lat=None, + **kwargs): + """ + Initialize the PointsNesProvidentia class + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "X". + Accepted values: ["X"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + model_centre_lon : dict + Model centre longitudes dictionary with the portion of "data" corresponding to the rank values. + model_centre_lat : dict + Model centre latitudes dictionary with the portion of "data" corresponding to the rank values. + grid_edge_lon : dict + Grid edge longitudes dictionary with the portion of "data" corresponding to the rank values. + grid_edge_lat : dict + Grid edge latitudes dictionary with the portion of "data" corresponding to the rank values. + """ + + super(PointsNesProvidentia, self).__init__(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, + avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, + create_nes=create_nes, times=times, balanced=balanced, **kwargs) + + if create_nes: + # Complete dimensions + self._model_centre_lon = model_centre_lon + self._model_centre_lat = model_centre_lat + self._grid_edge_lon = grid_edge_lon + self._grid_edge_lat = grid_edge_lat + else: + # Complete dimensions + self._model_centre_lon = self._get_coordinate_dimension(["model_centre_longitude"]) + self._model_centre_lat = self._get_coordinate_dimension(["model_centre_latitude"]) + self._grid_edge_lon = self._get_coordinate_dimension(["grid_edge_longitude"]) + self._grid_edge_lat = self._get_coordinate_dimension(["grid_edge_latitude"]) + + # Dimensions screening + self.model_centre_lon = self._get_coordinate_values(self._model_centre_lon, "") + self.model_centre_lat = self._get_coordinate_values(self._model_centre_lat, "") + self.grid_edge_lon = self._get_coordinate_values(self._grid_edge_lon, "") + self.grid_edge_lat = self._get_coordinate_values(self._grid_edge_lat, "") + + @staticmethod + def new(comm=None, path=None, info=False, dataset=None, parallel_method="X", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, + create_nes=False, balanced=False, times=None, + model_centre_lon=None, model_centre_lat=None, grid_edge_lon=None, grid_edge_lat=None, + **kwargs): + """ + Initialize the PointsNesProvidentia class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "X". + Accepted values: ["X"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use + last_level : int, None + Index of the last level to use. None if it is the last. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + model_centre_lon : dict + Model centre longitudes dictionary with the portion of "data" corresponding to the rank values. + model_centre_lat : dict + Model centre latitudes dictionary with the portion of "data" corresponding to the rank values. + grid_edge_lon : dict + Grid edge longitudes dictionary with the portion of "data" corresponding to the rank values. + grid_edge_lat : dict + Grid edge latitudes dictionary with the portion of "data" corresponding to the rank values. + """ + + new = PointsNesProvidentia(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, + avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, + create_nes=create_nes, balanced=balanced, times=times, + model_centre_lon=model_centre_lon, model_centre_lat=model_centre_lat, + grid_edge_lon=grid_edge_lon, grid_edge_lat=grid_edge_lat, **kwargs) + + return new + + def _create_dimensions(self, netcdf): + """ + Create "grid_edge", "model_latitude" and "model_longitude" dimensions and the super dimensions + "time", "time_nv", "station", and "strlen". + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + super(PointsNesProvidentia, self)._create_dimensions(netcdf) + + # Create grid_edge, model_latitude and model_longitude dimensions + netcdf.createDimension("grid_edge", len(self._grid_edge_lon["data"])) + netcdf.createDimension("model_latitude", self._model_centre_lon["data"].shape[0]) + netcdf.createDimension("model_longitude", self._model_centre_lon["data"].shape[1]) + + return None + + def _create_dimension_variables(self, netcdf): + """ + Create the "model_centre_lon", model_centre_lat", "grid_edge_lon" and "grid_edge_lat" variables. + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + super(PointsNesProvidentia, self)._create_dimension_variables(netcdf) + + # MODEL CENTRE LONGITUDES + model_centre_lon = netcdf.createVariable("model_centre_longitude", "f8", + ("model_latitude", "model_longitude",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + model_centre_lon.units = "degrees_east" + model_centre_lon.axis = "X" + model_centre_lon.long_name = "model centre longitude" + model_centre_lon.standard_name = "model centre longitude" + if self.size > 1: + model_centre_lon.set_collective(True) + msg = "2D meshed grid centre longitudes with " + msg += "{} longitudes in {} bands of latitude".format(self._model_centre_lon["data"].shape[1], + self._model_centre_lat["data"].shape[0]) + model_centre_lon.description = msg + model_centre_lon[:] = self._model_centre_lon["data"] + + # MODEL CENTRE LATITUDES + model_centre_lat = netcdf.createVariable("model_centre_latitude", "f8", + ("model_latitude", "model_longitude",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + model_centre_lat.units = "degrees_north" + model_centre_lat.axis = "Y" + model_centre_lat.long_name = "model centre latitude" + model_centre_lat.standard_name = "model centre latitude" + if self.size > 1: + model_centre_lat.set_collective(True) + msg = "2D meshed grid centre longitudes with " + msg += "{} longitudes in {} bands of latitude".format(self._model_centre_lon["data"].shape[1], + self._model_centre_lat["data"].shape[0]) + model_centre_lat[:] = self._model_centre_lat["data"] + + # GRID EDGE DOMAIN LONGITUDES + grid_edge_lon = netcdf.createVariable("grid_edge_longitude", "f8", "grid_edge") + grid_edge_lon.units = "degrees_east" + grid_edge_lon.axis = "X" + grid_edge_lon.long_name = "grid edge longitude" + grid_edge_lon.standard_name = "grid edge longitude" + if self.size > 1: + grid_edge_lon.set_collective(True) + msg = "Longitude coordinate along edge of grid domain " + msg += "(going clockwise around grid boundary from bottom-left corner)." + grid_edge_lon.description = msg + grid_edge_lon[:] = self._grid_edge_lon["data"] + + # GRID EDGE DOMAIN LATITUDES + grid_edge_lat = netcdf.createVariable("grid_edge_latitude", "f8", "grid_edge") + grid_edge_lat.units = "degrees_north" + grid_edge_lat.axis = "Y" + grid_edge_lat.long_name = "grid edge latitude" + grid_edge_lat.standard_name = "grid edge latitude" + if self.size > 1: + grid_edge_lat.set_collective(True) + msg = "Latitude coordinate along edge of grid domain " + msg += "(going clockwise around grid boundary from bottom-left corner)." + grid_edge_lat.description = msg + grid_edge_lat[:] = self._grid_edge_lat["data"] + + self.free_vars(["model_centre_longitude", "model_centre_latitude", "grid_edge_longitude", "grid_edge_latitude"]) + + # noinspection DuplicatedCode + def _get_coordinate_values(self, coordinate_info, coordinate_axis, bounds=False): + """ + Get the coordinate data of the current portion. + + Parameters + ---------- + coordinate_info : dict, list + Dictionary with the "data" key with the coordinate variable values. and the attributes as other keys. + coordinate_axis : str + Name of the coordinate to extract. Accepted values: ["X"]. + bounds : bool + Boolean variable to know if there are coordinate bounds. + Returns + ------- + values : dict + Dictionary with the portion of data corresponding to the rank. + """ + + if coordinate_info is None: + return None + + if not isinstance(coordinate_info, dict): + values = {"data": deepcopy(coordinate_info)} + else: + values = deepcopy(coordinate_info) + + coordinate_len = len(values["data"].shape) + if bounds: + coordinate_len -= 1 + + if coordinate_axis == "X": + if coordinate_len == 1: + values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + elif coordinate_len == 2: + values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], + self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"]] + elif coordinate_len == 3: + values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], + self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], :] + else: + raise NotImplementedError("The coordinate has wrong dimensions: {dim}".format( + dim=values["data"].shape)) + elif coordinate_axis == "": + # pass for "model_centre_lon", "model_centre_lat", "grid_edge_lon" and "grid_edge_lat" + pass + + return values + + # noinspection DuplicatedCode + def _read_variable(self, var_name): + """ + Read the corresponding variable data according to the current rank. + + Parameters + ---------- + var_name : str + Name of the variable to read. + + Returns + ------- + data: array + Portion of the variable data corresponding to the rank. + """ + nc_var = self.dataset.variables[var_name] + var_dims = nc_var.dimensions + + # Read data in 1, 2 or 3 dimensions + if len(var_dims) < 2: + data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + elif len(var_dims) == 2: + data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], + self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"]] + elif len(var_dims) == 3: + data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], + self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], + :] + else: + raise NotImplementedError("Error with {0}. Only can be read netCDF with 3 dimensions or less".format( + var_name)) + + # Unmask array + data = self._unmask_array(data) + + return data + + # noinspection DuplicatedCode + def _create_variables(self, netcdf, chunking=False): + """ + Create the netCDF file variables. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python open Dataset. + chunking : bool + Indicates if you want to chunk the output netCDF. + """ + + if self.variables is not None: + for i, (var_name, var_dict) in enumerate(self.variables.items()): + # Get data type + if "dtype" in var_dict.keys(): + var_dtype = var_dict["dtype"] + if (var_dict["data"] is not None) and (var_dtype != var_dict["data"].dtype): + msg = "WARNING!!! " + msg += "Different data types for variable {0}. ".format(var_name) + msg += "Input dtype={0}. Data dtype={1}.".format(var_dtype, + var_dict["data"].dtype) + warn(msg) + sys.stderr.flush() + try: + var_dict["data"] = var_dict["data"].astype(var_dtype) + except Exception: # TODO: Detect exception + raise TypeError("It was not possible to cast the data to the input dtype.") + else: + var_dtype = var_dict["data"].dtype + if var_dtype is object: + raise TypeError("Data dtype is object. Define dtype explicitly as dictionary key 'dtype'") + + # Get dimensions when reading datasets + if "dimensions" in var_dict.keys(): + var_dims = var_dict["dimensions"] + # Get dimensions when creating new datasets + else: + if len(var_dict["data"].shape) == 1: + # For data that depends only on station (e.g. station_code) + var_dims = self._var_dim + else: + # For data that is dependent on time and station (e.g. PM10) + var_dims = self._var_dim + ("time",) + + if var_dict["data"] is not None: + + # Ensure data is of type numpy array (to create NES) + if not isinstance(var_dict["data"], (ndarray, generic)): + try: + var_dict["data"] = array(var_dict["data"]) + except AttributeError: + raise AttributeError("Data for variable {0} must be a numpy array.".format(var_name)) + + # Convert list of strings to chars for parallelization + if issubdtype(var_dtype, character): + var_dict["data_aux"] = self._str2char(var_dict["data"]) + var_dims += ("strlen",) + var_dtype = "S1" + + if self.info: + print("Rank {0:03d}: Writing {1} var ({2}/{3})".format(self.rank, var_name, i + 1, + len(self.variables))) + + if not chunking: + var = netcdf.createVariable(var_name, var_dtype, var_dims, + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + else: + if self.master: + chunk_size = var_dict["data"].shape + else: + chunk_size = None + chunk_size = self.comm.bcast(chunk_size, root=0) + var = netcdf.createVariable(var_name, var_dtype, var_dims, zlib=self.zip_lvl > 0, + complevel=self.zip_lvl, chunksizes=chunk_size) + + if self.info: + print("Rank {0:03d}: Var {1} created ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + if self.size > 1: + var.set_collective(True) + if self.info: + print("Rank {0:03d}: Var {1} collective ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + + for att_name, att_value in var_dict.items(): + if att_name == "data": + if self.info: + print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) + if "data_aux" in var_dict.keys(): + att_value = var_dict["data_aux"] + if len(att_value.shape) == 1: + try: + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value + except IndexError: + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape, + att_value.shape)) + except ValueError: + raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape, + att_value.shape)) + elif len(att_value.shape) == 2: + if "strlen" in var_dims: + try: + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :] = att_value + except IndexError: + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :].shape, + att_value.shape)) + except ValueError: + raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :].shape, + att_value.shape)) + else: + try: + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"]] = att_value + except IndexError: + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"]].shape, + att_value.shape)) + except ValueError: + raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"]].shape, + att_value.shape)) + elif len(att_value.shape) == 3: + try: + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + :] = att_value + except IndexError: + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + :].shape, + att_value.shape)) + except ValueError: + raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + :].shape, + att_value.shape)) + + if self.info: + print("Rank {0:03d}: Var {1} data ({2}/{3})".format(self.rank, var_name, i + 1, + len(self.variables))) + elif att_name not in ["chunk_size", "var_dims", "dimensions", "dtype", "data_aux"]: + var.setncattr(att_name, att_value) + + if "data_aux" in var_dict.keys(): + del var_dict["data_aux"] + + self._set_var_crs(var) + if self.info: + print("Rank {0:03d}: Var {1} completed ({2}/{3})".format(self.rank, var_name, i + 1, + len(self.variables))) + + return None + + # noinspection DuplicatedCode + def _gather_data(self, data_to_gather): + """ + Gather all the variable data into the MPI rank 0 to perform a serial write. + + Returns + ------- + data_to_gather: dict + Variables to gather. + """ + + data_list = deepcopy(data_to_gather) + for var_name, var_info in data_list.items(): + try: + # noinspection PyArgumentList + data_aux = self.comm.gather(data_list[var_name]["data"], root=0) + if self.rank == 0: + shp_len = len(data_list[var_name]["data"].shape) + # concatenate over station + if self.parallel_method == "X": + if shp_len == 1: + # dimensions = (station) + axis = 0 + elif shp_len == 2: + # dimensions = (station, strlen) or + # dimensions = (station, time) + axis = 0 + else: + msg = "The points NetCDF must have " + msg += "surface values (without levels)." + raise NotImplementedError(msg) + elif self.parallel_method == "T": + # concatenate over time + if shp_len == 1: + # dimensions = (station) + axis = None + elif shp_len == 2: + if "strlen" in var_info["dimensions"]: + # dimensions = (station, strlen) + axis = None + else: + # dimensions = (station, time) + axis = 1 + else: + msg = "The points NetCDF must have " + msg += "surface values (without levels)." + raise NotImplementedError(msg) + else: + raise NotImplementedError( + "Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( + meth=self.parallel_method, accept=["X", "T"])) + data_list[var_name]["data"] = concatenate(data_aux, axis=axis) + except Exception as e: + msg = f"**ERROR** an error has occurred while gathering the '{var_name}' variable.\n" + print(msg) + sys.stderr.write(msg) + print(e) + sys.stderr.write(str(e)) + # print(e, file=sys.stderr) + sys.stderr.flush() + self.comm.Abort(1) + + return data_list + + def to_netcdf(self, path, compression_level=0, serial=False, info=False, chunking=False, nc_type="NES", + keep_open=False): + """ + Write the netCDF output file. + + Parameters + ---------- + path : str + Path to the output netCDF file. + compression_level : int + Level of compression (0 to 9) Default: 0 (no compression). + serial : bool + Indicates if you want to write in serial or not. Default: False. + info : bool + Indicates if you want to print the information of each writing step by stdout Default: False. + chunking : bool + Indicates if you want a chunked netCDF output. Only available with non-serial writes. Default: False. + nc_type : str + Type to NetCDf to write. "CAMS_RA" or "NES" + keep_open : bool + Indicates if you want to keep open the NetCDH to fill the data by time-step + """ + + if (not serial) and (self.size > 1): + msg = "WARNING!!! " + msg += "Providentia datasets cannot be written in parallel yet. " + msg += "Changing to serial mode." + warn(msg) + sys.stderr.flush() + + super(PointsNesProvidentia, self).to_netcdf(path, compression_level=compression_level, + serial=True, info=info, chunking=chunking) + + return None + + # noinspection DuplicatedCode + def add_variables_to_shapefile(self, var_list, idx_lev=0, idx_time=0): + """ + Add variables data to shapefile. + + var_list : list, str + List (or single string) of the variables to be loaded and saved in the shapefile. + idx_lev : int + Index of vertical level for which the data will be saved in the shapefile. + idx_time : int + Index of time for which the data will be saved in the shapefile. + """ + + if idx_lev != 0: + msg = "Error: Points dataset has no level (Level: {0}).".format(idx_lev) + raise ValueError(msg) + + for var_name in var_list: + # station as dimension + if len(self.variables[var_name]["dimensions"]) == 1: + self.shapefile[var_name] = self.variables[var_name]["data"][:].ravel() + # station and time as dimensions + else: + self.shapefile[var_name] = self.variables[var_name]["data"][:, idx_time].ravel() + + return None + + @staticmethod + def _get_axis_index_(axis): + if axis == "T": + value = 1 + elif axis == "X": + value = 0 + else: + raise ValueError("Unknown axis: {0}".format(axis)) + return value + + @staticmethod + def _set_var_crs(var): + """ + Set the grid_mapping + + Parameters + ---------- + var : Variable + netCDF4-python variable object. + """ + return None diff --git a/build/lib/nes/nc_projections/rotated_nes.py b/build/lib/nes/nc_projections/rotated_nes.py new file mode 100644 index 0000000..c5c3794 --- /dev/null +++ b/build/lib/nes/nc_projections/rotated_nes.py @@ -0,0 +1,694 @@ +#!/usr/bin/env python + +from numpy import (float64, linspace, cos, sin, arcsin, arctan2, array, mean, diff, append, flip, repeat, concatenate, + vstack) +from math import pi +from geopandas import GeoDataFrame +from pandas import Index +from pyproj import Proj +from copy import deepcopy +from typing import Dict, Any +from shapely.geometry import Polygon, Point +from .default_nes import Nes + + +class RotatedNes(Nes): + """ + + Attributes + ---------- + _full_rlat : dict + Rotated latitudes dictionary with the complete "data" key for all the values and the rest of the attributes. + _full_rlon : dict + Rotated longitudes dictionary with the complete "data" key for all the values and the rest of the attributes. + rlat : dict + Rotated latitudes dictionary with the portion of "data" corresponding to the rank values. + rlon : dict + Rotated longitudes dictionary with the portion of "data" corresponding to the rank values. + _var_dim : tuple + A Tuple with the name of the Y and X dimensions for the variables. + ("rlat", "rlon") for a rotated projection. + _lat_dim : tuple + A Tuple with the name of the dimensions of the Latitude values. + ("rlat", "rlon") for a rotated projection. + _lon_dim : tuple + A Tuple with the name of the dimensions of the Longitude values. + ("rlat", "rlon") for a rotated projection. + """ + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="Y", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the RotatedNes class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "Y". + Accepted values: ["X", "Y", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + """ + self._full_rlat = None + self._full_rlon = None + + super(RotatedNes, self).__init__(comm=comm, path=path, + info=info, dataset=dataset, balanced=balanced, + parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=create_nes, + times=times, **kwargs) + + if create_nes: + # Complete dimensions + # self._full_rlat, self._full_rlon = self._create_rotated_coordinates() + # Dimensions screening + self.lat = self._get_coordinate_values(self.get_full_latitudes(), "Y") + self.lon = self._get_coordinate_values(self.get_full_longitudes(), "X") + else: + # Complete dimensions + self._full_rlat = self._get_coordinate_dimension("rlat") + self._full_rlon = self._get_coordinate_dimension("rlon") + + # Dimensions screening + self.rlat = self._get_coordinate_values(self.get_full_rlat(), "Y") + self.rlon = self._get_coordinate_values(self.get_full_rlon(), "X") + + # Set axis limits for parallel writing + self.write_axis_limits = self._get_write_axis_limits() + + self._var_dim = ("rlat", "rlon") + self._lat_dim = ("rlat", "rlon") + self._lon_dim = ("rlat", "rlon") + + @staticmethod + def new(comm=None, path=None, info=False, dataset=None, parallel_method="Y", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the Nes class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default over Y axis + accepted values: ["X", "Y", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int or None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : List[datetime] or None + List of times to substitute the current ones while creation. + """ + + new = RotatedNes(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, + avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, + create_nes=create_nes, balanced=balanced, times=times, **kwargs) + + return new + + def get_full_rlat(self) -> Dict[str, Any]: + """ + Retrieve the complete rotated latitude information. + + Returns + ------- + Dict[str, Any] + A dictionary containing the complete latitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of latitude values. + attr_name: attr_value, # Latitude attributes. + ... + } + """ + data = self.comm.bcast(self._full_rlat) + + return data + + def get_full_rlon(self) -> Dict[str, Any]: + """ + Retrieve the complete rotated longitude information. + + Returns + ------- + Dict[str, Any] + A dictionary containing the complete longitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of longitude values. + attr_name: attr_value, # Longitude attributes. + ... + } + """ + data = self.comm.bcast(self._full_rlon) + return data + + def set_full_rlat(self, data: Dict[str, Any]) -> None: + """ + Set the complete rotated latitude information. + + Parameters + ---------- + data : Dict[str, Any] + A dictionary containing the complete latitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of latitude values. + attr_name: attr_value, # Latitude attributes. + ... + } + """ + if self.master: + self._full_rlat = data + return None + + def set_full_rlon(self, data: Dict[str, Any]) -> None: + """ + Set the complete rotated longitude information. + + Parameters + ---------- + data : Dict[str, Any] + A dictionary containing the complete longitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of longitude values. + attr_name: attr_value, # Longitude attributes. + ... + } + """ + if self.master: + self._full_rlon = data + return None + + # noinspection DuplicatedCode + def _filter_coordinates_selection(self): + """ + Use the selection limits to filter rlat, rlon, time, lev, lat, lon, lon_bnds and lat_bnds. + """ + + idx = self._get_idx_intervals() + + full_rlat = self.get_full_rlat() + full_rlon = self.get_full_rlon() + + self.rlat = self._get_coordinate_values(full_rlat, "Y") + self.rlon = self._get_coordinate_values(full_rlon, "X") + + if self.master: + self.set_full_rlat({'data': full_rlat["data"][idx["idx_y_min"]:idx["idx_y_max"]]}) + self.set_full_rlon({'data': full_rlon["data"][idx["idx_x_min"]:idx["idx_x_max"]]}) + + super(RotatedNes, self)._filter_coordinates_selection() + + return None + + def _get_pyproj_projection(self): + """ + Get projection data as in Pyproj library. + + Returns + ---------- + projection : pyproj.Proj + Grid projection. + """ + + projection = Proj(proj="ob_tran", + o_proj="longlat", + ellps="WGS84", + R=self.earth_radius[0], + o_lat_p=float64(self.projection_data["grid_north_pole_latitude"]), + o_lon_p=float64(self.projection_data["grid_north_pole_longitude"]), + ) + + return projection + + # noinspection DuplicatedCode + def _get_projection_data(self, create_nes, **kwargs): + """ + Retrieves projection data based on grid details. + + Parameters + ---------- + create_nes : bool + Flag indicating whether to create new object (True) or use existing (False). + **kwargs : dict + Additional keyword arguments for specifying projection details. + """ + if create_nes: + projection_data = {"grid_mapping_name": "rotated_latitude_longitude", + "grid_north_pole_latitude": 90 - kwargs["centre_lat"], + "grid_north_pole_longitude": -180 + kwargs["centre_lon"], + "inc_rlat": kwargs["inc_rlat"], + "inc_rlon": kwargs["inc_rlon"], + "south_boundary": kwargs["south_boundary"], + "west_boundary": kwargs["west_boundary"], + } + else: + if "rotated_pole" in self.variables.keys(): + projection_data = self.variables["rotated_pole"] + self.free_vars("rotated_pole") + else: + msg = "There is no variable called rotated_pole, projection has not been defined." + raise RuntimeError(msg) + + if "dtype" in projection_data.keys(): + del projection_data["dtype"] + + if "data" in projection_data.keys(): + del projection_data["data"] + + if "dimensions" in projection_data.keys(): + del projection_data["dimensions"] + + return projection_data + + def _create_dimensions(self, netcdf): + """ + Create "rlat", "rlon" and "spatial_nv" dimensions and the dimensions "lev", "time", "time_nv", "lon" and "lat". + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + super(RotatedNes, self)._create_dimensions(netcdf) + + shape = self.get_full_shape() + # Create rlat and rlon dimensions + netcdf.createDimension("rlon", shape[1]) + netcdf.createDimension("rlat", shape[0]) + + # Create spatial_nv (number of vertices) dimension + if (self.lat_bnds is not None) and (self.lon_bnds is not None): + netcdf.createDimension("spatial_nv", 4) + pass + + return None + + def _create_dimension_variables(self, netcdf): + """ + Create the "rlat" and "rlon" variables. + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + super(RotatedNes, self)._create_dimension_variables(netcdf) + + # ROTATED LATITUDES + full_rlat = self.get_full_rlat() + rlat = netcdf.createVariable("rlat", full_rlat["data"].dtype, ("rlat",)) + rlat.long_name = "latitude in rotated pole grid" + if "units" in full_rlat.keys(): + rlat.units = full_rlat["units"] + else: + rlat.units = "degrees" + rlat.standard_name = "grid_latitude" + if self.size > 1: + rlat.set_collective(True) + rlat[:] = full_rlat["data"] + + # ROTATED LONGITUDES + full_rlon = self.get_full_rlon() + rlon = netcdf.createVariable("rlon", full_rlon["data"].dtype, ("rlon",)) + rlon.long_name = "longitude in rotated pole grid" + if "units" in full_rlon.keys(): + rlon.units = full_rlon["units"] + else: + rlon.units = "degrees" + rlon.standard_name = "grid_longitude" + if self.size > 1: + rlon.set_collective(True) + rlon[:] = full_rlon["data"] + + return None + + def _create_rotated_coordinates(self): + """ + Calculate rotated latitudes and longitudes from grid details. + + Returns + ---------- + _rlat : dict + Rotated latitudes dictionary with the "data" key for all the values and the rest of the attributes. + _rlon : dict + Rotated longitudes dictionary with the "data" key for all the values and the rest of the attributes. + """ + # Get grid resolution + inc_rlon = float64(self.projection_data["inc_rlon"]) + inc_rlat = float64(self.projection_data["inc_rlat"]) + + # Get south and west boundaries + south_boundary = float64(self.projection_data["south_boundary"]) + west_boundary = float64(self.projection_data["west_boundary"]) + + # Calculate rotated latitudes + n_lat = int((abs(south_boundary) / inc_rlat) * 2 + 1) + rlat = linspace(south_boundary, south_boundary + (inc_rlat * (n_lat - 1)), n_lat, dtype=float64) + + # Calculate rotated longitudes + n_lon = int((abs(west_boundary) / inc_rlon) * 2 + 1) + rlon = linspace(west_boundary, west_boundary + (inc_rlon * (n_lon - 1)), n_lon, dtype=float64) + + return {"data": rlat}, {"data": rlon} + + def rotated2latlon(self, lon_deg, lat_deg, lon_min=-180): + """ + Calculate the unrotated coordinates using the rotated ones. + + Parameters + ---------- + lon_deg : array + Rotated longitude coordinate. + lat_deg : array + Rotated latitude coordinate. + lon_min : float + Minimum value for the longitudes: -180 (-180 to 180) or 0 (0 to 360). + + Returns + ---------- + almd : array + Unrotated longitudes. + aphd : array + Unrotated latitudes. + """ + + # Get centre coordinates + centre_lat = 90 - float64(self.projection_data["grid_north_pole_latitude"]) + centre_lon = float64(self.projection_data["grid_north_pole_longitude"]) + 180 + + # Convert to radians + degrees_to_radians = pi / 180. + tph0 = centre_lat * degrees_to_radians + tlm = lon_deg * degrees_to_radians + tph = lat_deg * degrees_to_radians + + tlm0d = -180 + centre_lon + ctph0 = cos(tph0) + stph0 = sin(tph0) + stlm = sin(tlm) + ctlm = cos(tlm) + stph = sin(tph) + ctph = cos(tph) + + # Calculate unrotated latitudes + sph = (ctph0 * stph) + (stph0 * ctph * ctlm) + sph[sph > 1.] = 1. + sph[sph < -1.] = -1. + aph = arcsin(sph) + aphd = aph / degrees_to_radians + + # Calculate rotated longitudes + anum = ctph * stlm + denom = (ctlm * ctph - stph0 * sph) / ctph0 + relm = arctan2(anum, denom) - pi + almd = relm / degrees_to_radians + tlm0d + almd[almd > (lon_min + 360)] -= 360 + almd[almd < lon_min] += 360 + + return almd, aphd + + def _create_centre_coordinates(self, **kwargs): + """ + Calculate centre latitudes and longitudes from grid details. + + Returns + ---------- + centre_lat : dict + Dictionary with data of centre coordinates for latitude in 2D (latitude, longitude). + centre_lon : dict + Dictionary with data of centre coordinates for longitude in 2D (latitude, longitude). + """ + if self.master: + # Complete dimensions + self._full_rlat, self._full_rlon = self._create_rotated_coordinates() + + # Calculate centre latitudes and longitudes (1D to 2D) + centre_lon, centre_lat = self.rotated2latlon( + array([self._full_rlon["data"]] * len(self._full_rlat["data"])), + array([self._full_rlat["data"]] * len(self._full_rlon["data"])).T) + + return {"data": centre_lat}, {"data": centre_lon} + else: + return None, None + + def create_providentia_exp_centre_coordinates(self): + """ + Calculate centre latitudes and longitudes from original coordinates and store as 2D arrays. + + Returns + ---------- + model_centre_lat : dict + Dictionary with data of centre coordinates for latitude in 2D (latitude, longitude). + model_centre_lon : dict + Dictionary with data of centre coordinates for longitude in 2D (latitude, longitude). + """ + + # Get centre latitudes + model_centre_lat = self.lat + + # Get centre longitudes + model_centre_lon = self.lon + + return model_centre_lat, model_centre_lon + + # noinspection DuplicatedCode + def create_providentia_exp_grid_edge_coordinates(self): + """ + Calculate grid edge latitudes and longitudes and get model grid outline. + + Returns + ---------- + grid_edge_lat : dict + Dictionary with data of grid edge latitudes. + grid_edge_lon : dict + Dictionary with data of grid edge longitudes. + """ + + # Get grid resolution + inc_rlon = abs(mean(diff(self.rlon["data"]))) + inc_rlat = abs(mean(diff(self.rlat["data"]))) + + # Get bounds for rotated coordinates + rlat_bounds = self._create_single_spatial_bounds(self.rlat["data"], inc_rlat) + rlon_bounds = self._create_single_spatial_bounds(self.rlon["data"], inc_rlon) + + # Get rotated latitudes for grid edge + left_edge_rlat = append(rlat_bounds.flatten()[::2], rlat_bounds.flatten()[-1]) + right_edge_rlat = flip(left_edge_rlat, 0) + top_edge_rlat = repeat(rlat_bounds[-1][-1], len(self.rlon["data"]) - 1) + bottom_edge_rlat = repeat(rlat_bounds[0][0], len(self.rlon["data"])) + rlat_grid_edge = concatenate((left_edge_rlat, top_edge_rlat, right_edge_rlat, bottom_edge_rlat)) + + # Get rotated longitudes for grid edge + left_edge_rlon = repeat(rlon_bounds[0][0], len(self.rlat["data"]) + 1) + top_edge_rlon = rlon_bounds.flatten()[1:-1:2] + right_edge_rlon = repeat(rlon_bounds[-1][-1], len(self.rlat["data"]) + 1) + bottom_edge_rlon = flip(rlon_bounds.flatten()[:-1:2], 0) + rlon_grid_edge = concatenate((left_edge_rlon, top_edge_rlon, right_edge_rlon, bottom_edge_rlon)) + + # Get edges for regular coordinates + grid_edge_lon_data, grid_edge_lat_data = self.rotated2latlon(rlon_grid_edge, rlat_grid_edge) + + # Create grid outline by stacking the edges in both coordinates + model_grid_outline = vstack((grid_edge_lon_data, grid_edge_lat_data)).T + + grid_edge_lat = {"data": model_grid_outline[:, 1]} + grid_edge_lon = {"data": model_grid_outline[:, 0]} + + return grid_edge_lat, grid_edge_lon + + # noinspection DuplicatedCode + def create_spatial_bounds(self): + """ + Calculate longitude and latitude bounds and set them. + """ + + # Calculate rotated coordinates bounds + full_rlat = self.get_full_rlat() + full_rlon = self.get_full_rlon() + inc_rlat = abs(mean(diff(full_rlat["data"]))) + rlat_bnds = self._create_single_spatial_bounds(array([full_rlat["data"]] * len(full_rlon["data"])).T, + inc_rlat, spatial_nv=4, inverse=True) + + inc_rlon = abs(mean(diff(full_rlon["data"]))) + rlon_bnds = self._create_single_spatial_bounds(array([full_rlon["data"]] * len(full_rlat["data"])), + inc_rlon, spatial_nv=4) + + # Transform rotated bounds to regular bounds + lon_bnds, lat_bnds = self.rotated2latlon(rlon_bnds, rlat_bnds) + + # Obtain regular coordinates bounds + self.set_full_latitudes_boundaries({"data": deepcopy(lat_bnds)}) + self.lat_bnds = {"data": lat_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + :]} + + self.set_full_longitudes_boundaries({"data": deepcopy(lon_bnds)}) + self.lon_bnds = {"data": lon_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + :]} + + return None + + @staticmethod + def _set_var_crs(var): + """ + Set the grid_mapping to "rotated_pole". + + Parameters + ---------- + var : Variable + netCDF4-python variable object. + """ + + var.grid_mapping = "rotated_pole" + var.coordinates = "lat lon" + + return None + + def _create_metadata(self, netcdf): + """ + Create the "crs" variable for the rotated latitude longitude grid_mapping. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python Dataset. + """ + + if self.projection_data is not None: + mapping = netcdf.createVariable("rotated_pole", "i") + mapping.grid_mapping_name = self.projection_data["grid_mapping_name"] + mapping.grid_north_pole_latitude = self.projection_data["grid_north_pole_latitude"] + mapping.grid_north_pole_longitude = self.projection_data["grid_north_pole_longitude"] + + return None + + def to_grib2(self, path, grib_keys, grib_template_path, lat_flip=False, info=False): + """ + Write output file with grib2 format. + + Parameters + ---------- + lat_flip : bool + Indicates if you want to flip the latitude coordinates. + path : str + Path to the output file. + grib_keys : dict + Dictionary with the grib2 keys. + grib_template_path : str + Path to the grib2 file to use as template. + info : bool + Indicates if you want to print extra information during the process. + """ + + raise NotImplementedError("Grib2 format cannot be written in a Rotated pole projection.") + + # noinspection DuplicatedCode + def create_shapefile(self): + """ + Create spatial geodataframe (shapefile). + + Returns + ------- + shapefile : GeoPandasDataFrame + Shapefile dataframe. + """ + + if self.shapefile is None: + + if self.lat_bnds is None or self.lon_bnds is None: + self.create_spatial_bounds() + + # Reshape arrays to create geometry + aux_b_lats = self.lat_bnds["data"].reshape((self.lat_bnds["data"].shape[0] * self.lat_bnds["data"].shape[1], + self.lat_bnds["data"].shape[2])) + aux_b_lons = self.lon_bnds["data"].reshape((self.lon_bnds["data"].shape[0] * self.lon_bnds["data"].shape[1], + self.lon_bnds["data"].shape[2])) + + # Get polygons from bounds + geometry = [] + for i in range(aux_b_lons.shape[0]): + geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), + (aux_b_lons[i, 1], aux_b_lats[i, 1]), + (aux_b_lons[i, 2], aux_b_lats[i, 2]), + (aux_b_lons[i, 3], aux_b_lats[i, 3]), + (aux_b_lons[i, 0], aux_b_lats[i, 0])])) + + # Create dataframe cointaining all polygons + fids = self.get_fids() + gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=geometry, crs="EPSG:4326") + self.shapefile = gdf + + else: + gdf = self.shapefile + + return gdf + + # noinspection DuplicatedCode + def get_centroids_from_coordinates(self): + """ + Get centroids from geographical coordinates. + + Returns + ------- + centroids_gdf: GeoPandasDataFrame + Centroids dataframe. + """ + + # Get centroids from coordinates + centroids = [] + for lat_ind in range(0, self.lon["data"].shape[0]): + for lon_ind in range(0, self.lon["data"].shape[1]): + centroids.append(Point(self.lon["data"][lat_ind, lon_ind], + self.lat["data"][lat_ind, lon_ind])) + + # Create dataframe cointaining all points + fids = self.get_fids() + centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=centroids, crs="EPSG:4326") + + return centroids_gdf diff --git a/build/lib/nes/nc_projections/rotated_nested_nes.py b/build/lib/nes/nc_projections/rotated_nested_nes.py new file mode 100644 index 0000000..4517701 --- /dev/null +++ b/build/lib/nes/nc_projections/rotated_nested_nes.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python + +from numpy import linspace, float64 +from netCDF4 import Dataset +from .rotated_nes import RotatedNes + + +class RotatedNestedNes(RotatedNes): + + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="Y", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the RotatedNestedNes class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset or None + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "Y". + Accepted values: ["X", "Y", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + """ + + super(RotatedNestedNes, self).__init__(comm=comm, path=path, + info=info, dataset=dataset, balanced=balanced, + parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=create_nes, + times=times, **kwargs) + + @staticmethod + def _get_parent_attributes(projection_data): + """ + Get projection attributes from parent grid. + + Parameters + ---------- + projection_data : dict + Dictionary with the projection information. + + Returns + ------- + projection_data : dict + Dictionary with the projection information, including parameters from the parent grid. + """ + + # Read variables from parent grid + netcdf = Dataset(projection_data["parent_grid_path"], mode="r") + rlat = netcdf.variables["rlat"][:] + rlon = netcdf.variables["rlon"][:] + rotated_pole = netcdf.variables["rotated_pole"] + + # j_parent_start starts at index 1, so we must subtract 1 + projection_data["inc_rlat"] = (rlat[1] - rlat[0]) / projection_data["parent_ratio"] + projection_data["1st_rlat"] = rlat[int(projection_data["j_parent_start"]) - 1] + + # i_parent_start starts at index 1, so we must subtract 1 + projection_data["inc_rlon"] = (rlon[1] - rlon[0]) / projection_data["parent_ratio"] + projection_data["1st_rlon"] = rlon[int(projection_data["i_parent_start"]) - 1] + + projection_data["grid_north_pole_longitude"] = rotated_pole.grid_north_pole_longitude + projection_data["grid_north_pole_latitude"] = rotated_pole.grid_north_pole_latitude + + netcdf.close() + + return projection_data + + def _get_projection_data(self, create_nes, **kwargs): + """ + Retrieves projection data based on grid details. + + Parameters + ---------- + create_nes : bool + Flag indicating whether to create new object (True) or use existing (False). + **kwargs : dict + Additional keyword arguments for specifying projection details. + """ + if create_nes: + projection_data = {"grid_mapping_name": "rotated_latitude_longitude", + "parent_grid_path": kwargs["parent_grid_path"], + "parent_ratio": kwargs["parent_ratio"], + "i_parent_start": kwargs["i_parent_start"], + "j_parent_start": kwargs["j_parent_start"], + "n_rlat": kwargs["n_rlat"], + "n_rlon": kwargs["n_rlon"] + } + projection_data = self._get_parent_attributes(projection_data) + else: + projection_data = super()._get_projection_data(create_nes, **kwargs) + + return projection_data + + def _create_rotated_coordinates(self): + """ + Calculate rotated latitudes and longitudes from grid details. + + Returns + ---------- + _rlat : dict + Rotated latitudes dictionary with the "data" key for all the values and the rest of the attributes. + _rlon : dict + Rotated longitudes dictionary with the "data" key for all the values and the rest of the attributes. + """ + + # Get grid resolution + inc_rlon = self.projection_data["inc_rlon"] + inc_rlat = self.projection_data["inc_rlat"] + + # Get number of rotated coordinates + n_rlat = self.projection_data["n_rlat"] + n_rlon = self.projection_data["n_rlon"] + + # Get first coordinates + first_rlat = self.projection_data["1st_rlat"] + first_rlon = self.projection_data["1st_rlon"] + + # Calculate rotated latitudes + rlat = linspace(first_rlat, first_rlat + (inc_rlat * (n_rlat - 1)), n_rlat, dtype=float64) + + # Calculate rotated longitudes + rlon = linspace(first_rlon, first_rlon + (inc_rlon * (n_rlon - 1)), n_rlon, dtype=float64) + + return {"data": rlat}, {"data": rlon} + \ No newline at end of file diff --git a/build/lib/nes/nes_formats/__init__.py b/build/lib/nes/nes_formats/__init__.py new file mode 100644 index 0000000..39aaf30 --- /dev/null +++ b/build/lib/nes/nes_formats/__init__.py @@ -0,0 +1,9 @@ +from .cams_ra_format import to_netcdf_cams_ra +from .monarch_format import to_netcdf_monarch, to_monarch_units +from .cmaq_format import to_netcdf_cmaq, to_cmaq_units +from .wrf_chem_format import to_netcdf_wrf_chem, to_wrf_chem_units + +__all__ = [ + 'to_netcdf_cams_ra', 'to_netcdf_monarch', 'to_monarch_units', 'to_netcdf_cmaq', 'to_cmaq_units', + 'to_netcdf_wrf_chem', 'to_wrf_chem_units' +] diff --git a/build/lib/nes/nes_formats/cams_ra_format.py b/build/lib/nes/nes_formats/cams_ra_format.py new file mode 100644 index 0000000..480becc --- /dev/null +++ b/build/lib/nes/nes_formats/cams_ra_format.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python + +import sys +import nes +from numpy import float64, float32, int32, array +from warnings import warn +from netCDF4 import Dataset +from mpi4py import MPI +from copy import copy + + +# noinspection DuplicatedCode +def to_netcdf_cams_ra(self, path): + """ + Horizontal methods from one grid to another one. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + path : str + Path to the output netCDF file. + """ + + if not isinstance(self, nes.LatLonNes): + raise TypeError("CAMS Re-Analysis format must have Regular Lat-Lon projection") + if "" not in path: + raise ValueError(f"AMS Re-Analysis path must contain '' as pattern; current: '{path}'") + + orig_path = copy(path) + + for i_lev, level in enumerate(self.lev["data"]): + path = orig_path.replace("", "l{0}".format(i_lev)) + # Open NetCDF + if self.info: + print("Rank {0:03d}: Creating {1}".format(self.rank, path)) + if self.size > 1: + netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=True, comm=self.comm, info=MPI.Info()) + else: + netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=False) + if self.info: + print("Rank {0:03d}: NetCDF ready to write".format(self.rank)) + self.to_dtype(data_type=float32) + + # Create dimensions + create_dimensions(self, netcdf) + + # Create variables + create_variables(self, netcdf, i_lev) + + # Create dimension variables + create_dimension_variables(self, netcdf) + if self.info: + print("Rank {0:03d}: Dimensions done".format(self.rank)) + + # Close NetCDF + if self.global_attrs is not None: + for att_name, att_value in self.global_attrs.items(): + netcdf.setncattr(att_name, att_value) + + netcdf.close() + + return None + + +def create_dimensions(self, netcdf): + """ + Create "time", "time_bnds", "lev", "lon" and "lat" dimensions. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + netcdf : Dataset + netcdf4-python open dataset. + """ + + # Create time dimension + netcdf.createDimension("time", None) + + # Create lev, lon and lat dimensions + netcdf.createDimension("lat", len(self.get_full_latitudes()["data"])) + netcdf.createDimension("lon", len(self.get_full_longitudes()["data"])) + + return None + + +def create_dimension_variables(self, netcdf): + """ + Create the "time", "time_bnds", "lev", "lat", "lat_bnds", "lon" and "lon_bnds" variables. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + netcdf : Dataset + netcdf4-python open dataset. + """ + + # LATITUDES + lat = netcdf.createVariable("lat", float64, ("lat",)) + lat.standard_name = "latitude" + lat.long_name = "latitude" + lat.units = "degrees_north" + lat.axis = "Y" + + if self.size > 1: + lat.set_collective(True) + lat[:] = self.get_full_latitudes()["data"] + + # LONGITUDES + lon = netcdf.createVariable("lon", float64, ("lon",)) + lon.long_name = "longitude" + lon.standard_name = "longitude" + lon.units = "degrees_east" + lon.axis = "X" + if self.size > 1: + lon.set_collective(True) + lon[:] = self.get_full_longitudes()["data"] + + # TIMES + time_var = netcdf.createVariable("time", float64, ("time",)) + time_var.standard_name = "time" + time_var.units = "day as %Y%m%d.%f" + time_var.calendar = "proleptic_gregorian" + time_var.axis = "T" + if self.size > 1: + time_var.set_collective(True) + time_var[:] = __date2num(self.get_full_times()[self._get_time_id(self.hours_start, first=True): + self._get_time_id(self.hours_end, first=False)]) + + return None + + +# noinspection DuplicatedCode +def create_variables(self, netcdf, i_lev): + """ + Create and write variables to a netCDF file. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + netcdf : Dataset + netcdf4-python open dataset. + i_lev : int + The specific level index to write data for. + """ + + for i, (var_name, var_dict) in enumerate(self.variables.items()): + if var_dict["data"] is not None: + if self.info: + print("Rank {0:03d}: Writing {1} var ({2}/{3})".format(self.rank, var_name, i + 1, len(self.variables))) + try: + var = netcdf.createVariable(var_name, float32, ("time", "lat", "lon",), + zlib=True, complevel=7, least_significant_digit=3) + + if self.info: + print("Rank {0:03d}: Var {1} created ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + if self.size > 1: + var.set_collective(True) + if self.info: + print("Rank {0:03d}: Var {1} collective ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + + if self.info: + print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) + var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = var_dict["data"][:, i_lev, :, :] + + if self.info: + print("Rank {0:03d}: Var {1} data ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + var.long_name = var_dict["long_name"] + var.units = var_dict["units"] + var.number_of_significant_digits = int32(3) + + if self.info: + print("Rank {0:03d}: Var {1} completed ({2}/{3})".format(self.rank, var_name, i + 1, + len(self.variables))) + except Exception as e: + print(f"**ERROR** an error has occurred while writing the '{var_name}' variable") + raise e + else: + msg = "WARNING!!! " + msg += "Variable {0} was not loaded. It will not be written.".format(var_name) + warn(msg) + sys.stderr.flush() + + return None + + +def __date2num(time_array): + """ + Convert an array of datetime objects to numerical values. + + Parameters + ---------- + time_array : List[datetime.datetime] + List of datetime objects to be converted. + + Returns + ------- + numpy.ndarray + Array of numerical time values, with each date represented as a float. + + Notes + ----- + The conversion represents each datetime as a float in the format YYYYMMDD.HH/24. + """ + + time_res = [] + for aux_time in time_array: + time_res.append(float(aux_time.strftime("%Y%m%d")) + (float(aux_time.strftime("%H")) / 24)) + time_res = array(time_res, dtype=float64) + + return time_res diff --git a/build/lib/nes/nes_formats/cmaq_format.py b/build/lib/nes/nes_formats/cmaq_format.py new file mode 100644 index 0000000..30a5cea --- /dev/null +++ b/build/lib/nes/nes_formats/cmaq_format.py @@ -0,0 +1,355 @@ +#!/usr/bin/env python + +import nes +from numpy import float32, array, ndarray, empty, int32, float64 +from netCDF4 import Dataset +from mpi4py import MPI +from copy import deepcopy +from datetime import datetime + +GLOBAL_ATTRIBUTES_ORDER = [ + "IOAPI_VERSION", "EXEC_ID", "FTYPE", "CDATE", "CTIME", "WDATE", "WTIME", "SDATE", "STIME", "TSTEP", "NTHIK", + "NCOLS", "NROWS", "NLAYS", "NVARS", "GDTYP", "P_ALP", "P_BET", "P_GAM", "XCENT", "YCENT", "XORIG", "YORIG", + "XCELL", "YCELL", "VGTYP", "VGTOP", "VGLVLS", "GDNAM", "UPNAM", "FILEDESC", "HISTORY", "VAR-LIST"] + + +# noinspection DuplicatedCode +def to_netcdf_cmaq(self, path, keep_open=False): + """ + Create the NetCDF using netcdf4-python methods. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + path : str + Path to the output netCDF file. + keep_open : bool + Indicates if you want to keep open the NetCDH to fill the data by time-step. + """ + + self.to_dtype(float32) + + set_global_attributes(self) + change_variable_attributes(self) + + # Open NetCDF + if self.info: + print("Rank {0:03d}: Creating {1}".format(self.rank, path)) + if self.size > 1: + netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=True, comm=self.comm, info=MPI.Info()) + else: + netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=False) + if self.info: + print("Rank {0:03d}: NetCDF ready to write".format(self.rank)) + + # Create dimensions + create_dimensions(self, netcdf) + + create_dimension_variables(self, netcdf) + if self.info: + print("Rank {0:03d}: Dimensions done".format(self.rank)) + + # Create variables + create_variables(self, netcdf) + + for att_name in GLOBAL_ATTRIBUTES_ORDER: + netcdf.setncattr(att_name, self.global_attrs[att_name]) + + # Close NetCDF + if keep_open: + self.dataset = netcdf + else: + netcdf.close() + + return None + + +def change_variable_attributes(self): + """ + Modify the emission list to be consistent to use the output as input for CMAQ model. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + """ + + for var_name in self.variables.keys(): + + if self.variables[var_name]["units"] == "mol.s-1": + self.variables[var_name]["units"] = "{:<16}".format("mole/s") + self.variables[var_name]["var_desc"] = "{:<80}".format(self.variables[var_name]["long_name"]) + self.variables[var_name]["long_name"] = "{:<16}".format(var_name) + elif self.variables[var_name]["units"] == "g.s-1": + self.variables[var_name]["units"] = "{:<16}".format("g/s") + self.variables[var_name]["var_desc"] = "{:<80}".format(self.variables[var_name]["long_name"]) + self.variables[var_name]["long_name"] = "{:<16}".format(var_name) + + else: + raise TypeError("The unit '{0}' of specie {1} is not defined correctly. ".format( + self.variables[var_name]["units"], var_name) + "Should be 'mol.s-1' or 'g.s-1'") + + return None + + +def to_cmaq_units(self): + """ + Change the data values according to the CMAQ conventions + + Parameters + ---------- + self : nes.Nes + A Nes Object. + + Returns + ------- + dict + Variable in the MONARCH units. + """ + + self.calculate_grid_area(overwrite=False) + for var_name in self.variables.keys(): + if isinstance(self.variables[var_name]["data"], ndarray): + if self.variables[var_name]["units"] == "mol.s-1": + # Kmol.m-2.s-1 to mol.s-1 + self.variables[var_name]["data"] = array( + self.variables[var_name]["data"] * 1000 * self.cell_measures["cell_area"]["data"], dtype=float32) + elif self.variables[var_name]["units"] == "g.s-1": + # Kg.m-2.s-1 to g.s-1 + self.variables[var_name]["data"] = array( + self.variables[var_name]["data"] * 1000 * self.cell_measures["cell_area"]["data"], dtype=float32) + + else: + raise TypeError("The unit '{0}' of specie {1} is not defined correctly. ".format( + self.variables[var_name]["units"], var_name) + "Should be 'mol.s-1' or 'g.s-1'") + self.variables[var_name]["dtype"] = float32 + + return self.variables + + +def create_tflag(self): + """ + Create the content of the CMAQ variable TFLAG. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + + Returns + ------- + numpy.ndarray + Array with the content of TFLAG. + """ + + t_flag = empty((len(self.time), len(self.variables), 2)) + + for i_d, aux_date in enumerate(self.time): + y_d = int(aux_date.strftime("%Y%j")) + hms = int(aux_date.strftime("%H%M%S")) + for i_p in range(len(self.variables)): + t_flag[i_d, i_p, 0] = y_d + t_flag[i_d, i_p, 1] = hms + + return t_flag + + +def str_var_list(self): + """ + Transform the list of variable names to a string with the elements with 16 white spaces. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + + Returns + ------- + str + List of variable names transformed on string. + """ + + str_var_list_aux = "" + for var in self.variables.keys(): + str_var_list_aux += "{:<16}".format(var) + + return str_var_list_aux + + +# noinspection DuplicatedCode +def set_global_attributes(self): + """ + Set the NetCDF global attributes. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + """ + + now = datetime.now() + if len(self.time) > 1: + tstep = ((self.time[1] - self.time[0]).seconds // 3600) * 10000 + else: + tstep = 1 * 10000 + + current_attributes = deepcopy(self.global_attrs) + del self.global_attrs + + self.global_attrs = {"IOAPI_VERSION": "None: made only with NetCDF libraries", + "EXEC_ID": "{:<80}".format("0.1alpha"), # Editable + "FTYPE": int32(1), # Editable + "CDATE": int32(now.strftime("%Y%j")), + "CTIME": int32(now.strftime("%H%M%S")), + "WDATE": int32(now.strftime("%Y%j")), + "WTIME": int32(now.strftime("%H%M%S")), + "SDATE": int32(self.time[0].strftime("%Y%j")), + "STIME": int32(self.time[0].strftime("%H%M%S")), + "TSTEP": int32(tstep), + "NTHIK": int32(1), # Editable + "NCOLS": None, # Projection dependent + "NROWS": None, # Projection dependent + "NLAYS": int32(len(self.lev["data"])), + "NVARS": None, # Projection dependent + "GDTYP": None, # Projection dependent + "P_ALP": None, # Projection dependent + "P_BET": None, # Projection dependent + "P_GAM": None, # Projection dependent + "XCENT": None, # Projection dependent + "YCENT": None, # Projection dependent + "XORIG": None, # Projection dependent + "YORIG": None, # Projection dependent + "XCELL": None, # Projection dependent + "YCELL": None, # Projection dependent + "VGTYP": int32(7), # Editable + "VGTOP": float32(5000.), # Editable + "VGLVLS": array([1., 0.], dtype=float32), # Editable + "GDNAM": "{:<16}".format(""), # Editable + "UPNAM": "{:<16}".format("HERMESv3"), + "FILEDESC": "", # Editable + "HISTORY": "", # Editable + "VAR-LIST": str_var_list(self)} + + # Editable attributes + for att_name, att_value in current_attributes.items(): + if att_name == "EXEC_ID": + self.global_attrs[att_name] = "{:<80}".format(att_value) # Editable + elif att_name == "FTYPE": + self.global_attrs[att_name] = int32(att_value) # Editable + elif att_name == "NTHIK": + self.global_attrs[att_name] = int32(att_value) # Editable + elif att_name == "VGTYP": + self.global_attrs[att_name] = int32(att_value) # Editable + elif att_name == "VGTOP": + self.global_attrs[att_name] = float32(att_value) # Editable + elif att_name == "VGLVLS": + self.global_attrs[att_name] = array(att_value.split(), dtype=float32) # Editable + elif att_name == "GDNAM": + self.global_attrs[att_name] = "{:<16}".format(att_value) # Editable + elif att_name == "FILEDESC": + self.global_attrs[att_name] = att_value # Editable + elif att_name == "HISTORY": + self.global_attrs[att_name] = att_value # Editable + + # Projection dependent attributes + if isinstance(self, nes.LCCNes): + self.global_attrs["NCOLS"] = int32(len(self._full_x["data"])) + self.global_attrs["NROWS"] = int32(len(self._full_y["data"])) + self.global_attrs["NVARS"] = int32(len(self.variables)) + self.global_attrs["GDTYP"] = int32(2) + + self.global_attrs["P_ALP"] = float64(self.projection_data["standard_parallel"][0]) + self.global_attrs["P_BET"] = float64(self.projection_data["standard_parallel"][1]) + self.global_attrs["P_GAM"] = float64(self.projection_data["longitude_of_central_meridian"]) + self.global_attrs["XCENT"] = float64(self.projection_data["longitude_of_central_meridian"]) + self.global_attrs["YCENT"] = float64(self.projection_data["latitude_of_projection_origin"]) + self.global_attrs["XORIG"] = float64( + self._full_x["data"][0]) - (float64(self._full_x["data"][1] - self._full_x["data"][0]) / 2) + self.global_attrs["YORIG"] = float64( + self._full_y["data"][0]) - (float64(self._full_y["data"][1] - self._full_y["data"][0]) / 2) + self.global_attrs["XCELL"] = float64(self._full_x["data"][1] - self._full_x["data"][0]) + self.global_attrs["YCELL"] = float64(self._full_y["data"][1] - self._full_y["data"][0]) + + return None + + +def create_dimensions(self, netcdf): + """ + Create "time", "time_bnds", "lev", "lon" and "lat" dimensions. + + Parameters + ---------- + self : nes.Nes + Nes Object. + netcdf : Dataset + netcdf4-python open dataset. + """ + + netcdf.createDimension("TSTEP", len(self.get_full_times())) + netcdf.createDimension("DATE-TIME", 2) + netcdf.createDimension("LAY", len(self.get_full_levels()["data"])) + netcdf.createDimension("VAR", len(self.variables)) + if isinstance(self, nes.LCCNes): + netcdf.createDimension("COL", len(self._full_x["data"])) + netcdf.createDimension("ROW", len(self._full_y["data"])) + + return None + + +def create_dimension_variables(self, netcdf): + """ + Create the "y" and "x" variables. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + netcdf : Dataset + NetCDF object. + """ + + tflag = netcdf.createVariable("TFLAG", "i", ("TSTEP", "VAR", "DATE-TIME",)) + tflag.setncatts({"units": "{:<16}".format(""), "long_name": "{:<16}".format("TFLAG"), + "var_desc": "{:<80}".format("Timestep-valid flags: (1) YYYYDDD or (2) HHMMSS")}) + tflag[:] = create_tflag(self) + + return None + + +# noinspection DuplicatedCode +def create_variables(self, netcdf): + """ + Create the netCDF file variables. + + Parameters + ---------- + self : nes.Nes + Nes Object. + netcdf : Dataset + netcdf4-python open dataset. + """ + + for var_name, var_info in self.variables.items(): + var = netcdf.createVariable(var_name, "f", ("TSTEP", "LAY", "ROW", "COL",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + var.units = var_info["units"] + var.long_name = str(var_info["long_name"]) + var.var_desc = str(var_info["var_desc"]) + if var_info["data"] is not None: + if self.info: + print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) + + if isinstance(var_info["data"], int) and var_info["data"] == 0: + var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = 0 + + elif len(var_info["data"].shape) == 4: + var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = var_info["data"] + + return None diff --git a/build/lib/nes/nes_formats/monarch_format.py b/build/lib/nes/nes_formats/monarch_format.py new file mode 100644 index 0000000..0a50e75 --- /dev/null +++ b/build/lib/nes/nes_formats/monarch_format.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python + +import nes +from numpy import float32, array, ndarray +from netCDF4 import Dataset +from mpi4py import MPI + + +# noinspection DuplicatedCode +def to_netcdf_monarch(self, path, chunking=False, keep_open=False): + """ + Create the NetCDF using netcdf4-python methods. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + path : str + Path to the output netCDF file. + chunking: bool + Indicates if you want to chunk the output netCDF. + keep_open : bool + Indicates if you want to keep open the NetCDH to fill the data by time-step. + """ + + self.to_dtype(float32) + + # Open NetCDF + if self.info: + print("Rank {0:03d}: Creating {1}".format(self.rank, path)) + if self.size > 1: + netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=True, comm=self.comm, info=MPI.Info()) + else: + netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=False) + if self.info: + print("Rank {0:03d}: NetCDF ready to write".format(self.rank)) + + # Create dimensions + self._create_dimensions(netcdf) + + # Create dimension variables + if self.master: + self._full_lev["data"] = array(self._full_lev["data"], dtype=float32) + self._full_lat["data"] = array(self._full_lat["data"], dtype=float32) + self._full_lat_bnds["data"] = array(self._full_lat_bnds["data"], dtype=float32) + self._full_lon["data"] = array(self._full_lon["data"], dtype=float32) + self._full_lon_bnds["data"] = array(self._full_lon_bnds["data"], dtype=float32) + + if isinstance(self, nes.RotatedNes): + self._full_rlat["data"] = array(self._full_rlat["data"], dtype=float32) + self._full_rlon["data"] = array(self._full_rlon["data"], dtype=float32) + if isinstance(self, nes.LCCNes) or isinstance(self, nes.MercatorNes): + self._full_y["data"] = array(self._full_y["data"], dtype=float32) + self._full_x["data"] = array(self._full_x["data"], dtype=float32) + + self._create_dimension_variables(netcdf) + if self.info: + print("Rank {0:03d}: Dimensions done".format(self.rank)) + + # Create cell measures + if "cell_area" in self.cell_measures.keys(): + self.cell_measures["cell_area"]["data"] = array(self.cell_measures["cell_area"]["data"], dtype=float32) + self._create_cell_measures(netcdf) + + # Create variables + self._create_variables(netcdf, chunking=chunking) + + # Create metadata + self._create_metadata(netcdf) + + # Close NetCDF + if self.global_attrs is not None: + for att_name, att_value in self.global_attrs.items(): + netcdf.setncattr(att_name, att_value) + netcdf.setncattr("Conventions", "CF-1.7") + + if keep_open: + self.dataset = netcdf + else: + netcdf.close() + + return None + + +def to_monarch_units(self): + """ + Change the data values according to the MONARCH conventions. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + + Returns + ------- + dict + Variable in the MONARCH units. + """ + + for var_name in self.variables.keys(): + if isinstance(self.variables[var_name]["data"], ndarray): + if self.variables[var_name]["units"] == "mol.s-1.m-2": + # Kmol to mol + self.variables[var_name]["data"] = array(self.variables[var_name]["data"] * 1000, dtype=float32) + elif self.variables[var_name]["units"] == "kg.s-1.m-2": + # No unit change needed + self.variables[var_name]["data"] = array(self.variables[var_name]["data"], dtype=float32) + + else: + raise TypeError("The unit '{0}' of specie {1} is not defined correctly. ".format( + self.variables[var_name]["units"], var_name) + + "Should be 'mol.s-1.m-2' or 'kg.s-1.m-2'") + self.variables[var_name]["dtype"] = float32 + return self.variables diff --git a/build/lib/nes/nes_formats/wrf_chem_format.py b/build/lib/nes/nes_formats/wrf_chem_format.py new file mode 100644 index 0000000..6a06af4 --- /dev/null +++ b/build/lib/nes/nes_formats/wrf_chem_format.py @@ -0,0 +1,398 @@ +#!/usr/bin/env python + +import nes +from numpy import float32, int32, ndarray, array, chararray +from netCDF4 import Dataset +from mpi4py import MPI +from copy import deepcopy + +GLOBAL_ATTRIBUTES_ORDER = [ + "TITLE", "START_DATE", "WEST-EAST_GRID_DIMENSION", "SOUTH-NORTH_GRID_DIMENSION", "BOTTOM-TOP_GRID_DIMENSION", "DX", + "DY", "GRIDTYPE", "DIFF_OPT", "KM_OPT", "DAMP_OPT", "DAMPCOEF", "KHDIF", "KVDIF", "MP_PHYSICS", "RA_LW_PHYSICS", + "RA_SW_PHYSICS", "SF_SFCLAY_PHYSICS", "SF_SURFACE_PHYSICS", "BL_PBL_PHYSICS", "CU_PHYSICS", "SF_LAKE_PHYSICS", + "SURFACE_INPUT_SOURCE", "SST_UPDATE", "GRID_FDDA", "GFDDA_INTERVAL_M", "GFDDA_END_H", "GRID_SFDDA", + "SGFDDA_INTERVAL_M", "SGFDDA_END_H", "WEST-EAST_PATCH_START_UNSTAG", "WEST-EAST_PATCH_END_UNSTAG", + "WEST-EAST_PATCH_START_STAG", "WEST-EAST_PATCH_END_STAG", "SOUTH-NORTH_PATCH_START_UNSTAG", + "SOUTH-NORTH_PATCH_END_UNSTAG", "SOUTH-NORTH_PATCH_START_STAG", "SOUTH-NORTH_PATCH_END_STAG", + "BOTTOM-TOP_PATCH_START_UNSTAG", "BOTTOM-TOP_PATCH_END_UNSTAG", "BOTTOM-TOP_PATCH_START_STAG", + "BOTTOM-TOP_PATCH_END_STAG", "GRID_ID", "PARENT_ID", "I_PARENT_START", "J_PARENT_START", "PARENT_GRID_RATIO", "DT", + "CEN_LAT", "CEN_LON", "TRUELAT1", "TRUELAT2", "MOAD_CEN_LAT", "STAND_LON", "POLE_LAT", "POLE_LON", "GMT", "JULYR", + "JULDAY", "MAP_PROJ", "MMINLU", "NUM_LAND_CAT", "ISWATER", "ISLAKE", "ISICE", "ISURBAN", "ISOILWATER"] + + +# noinspection DuplicatedCode +def to_netcdf_wrf_chem(self, path, keep_open=False): + """ + Create the NetCDF using netcdf4-python methods. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + path : str + Path to the output netCDF file. + keep_open : bool + Indicates if you want to keep open the NetCDH to fill the data by time-step. + """ + + self.to_dtype(float32) + + set_global_attributes(self) + change_variable_attributes(self) + + # Open NetCDF + if self.info: + print("Rank {0:03d}: Creating {1}".format(self.rank, path)) + if self.size > 1: + netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=True, comm=self.comm, info=MPI.Info()) + else: + netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=False) + if self.info: + print("Rank {0:03d}: NetCDF ready to write".format(self.rank)) + + # Create dimensions + create_dimensions(self, netcdf) + + create_dimension_variables(self, netcdf) + if self.info: + print("Rank {0:03d}: Dimensions done".format(self.rank)) + + # Create variables + create_variables(self, netcdf) + + for att_name in GLOBAL_ATTRIBUTES_ORDER: + netcdf.setncattr(att_name, self.global_attrs[att_name]) + + # Close NetCDF + if keep_open: + self.dataset = netcdf + else: + netcdf.close() + + return None + + +def change_variable_attributes(self): + """ + Modify the emission list to be consistent to use the output as input for WRF-CHEM model. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + """ + + for var_name in self.variables.keys(): + if self.variables[var_name]["units"] == "mol.h-1.km-2": + self.variables[var_name]["FieldType"] = int32(104) + self.variables[var_name]["MemoryOrder"] = "XYZ" + self.variables[var_name]["description"] = "EMISSIONS" + self.variables[var_name]["units"] = "mol km^-2 hr^-1" + self.variables[var_name]["stagger"] = "" + self.variables[var_name]["coordinates"] = "XLONG XLAT" + + elif self.variables[var_name]["units"] == "ug.s-1.m-2": + self.variables[var_name]["FieldType"] = int32(104) + self.variables[var_name]["MemoryOrder"] = "XYZ" + self.variables[var_name]["description"] = "EMISSIONS" + self.variables[var_name]["units"] = "ug/m3 m/s" + self.variables[var_name]["stagger"] = "" + self.variables[var_name]["coordinates"] = "XLONG XLAT" + + else: + raise TypeError("The unit '{0}' of specie {1} is not defined correctly. ".format( + self.variables[var_name]["units"], var_name) + "Should be 'mol.h-1.km-2' or 'ug.s-1.m-2'") + + if "long_name" in self.variables[var_name].keys(): + del self.variables[var_name]["long_name"] + + return None + + +def to_wrf_chem_units(self): + """ + Change the data values according to the WRF-CHEM conventions. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + + Returns + ------- + dict + Variable in the MONARCH units. + """ + + self.calculate_grid_area(overwrite=False) + for var_name in self.variables.keys(): + if isinstance(self.variables[var_name]["data"], ndarray): + if self.variables[var_name]["units"] == "mol.h-1.km-2": + # 10**6 -> from m2 to km2 + # 10**3 -> from kmol to mol + # 3600 -> from s to h + self.variables[var_name]["data"] = array( + self.variables[var_name]["data"] * 10 ** 6 * 10 ** 3 * 3600, dtype=float32) + elif self.variables[var_name]["units"] == "ug.s-1.m-2": + # 10**9 -> from kg to ug + self.variables[var_name]["data"] = array( + self.variables[var_name]["data"] * 10 ** 9, dtype=float32) + + else: + raise TypeError("The unit '{0}' of specie {1} is not defined correctly. ".format( + self.variables[var_name]["units"], var_name) + "Should be 'mol.h-1.km-2' or 'ug.s-1.m-2'") + self.variables[var_name]["dtype"] = float32 + + return self.variables + + +def create_times_var(self): + """ + Create the content of the WRF-CHEM variable times. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + + Returns + ------- + numpy.ndarray + Array with the content of TFLAG. + """ + + aux_times = chararray((len(self.time), 19), itemsize=1) + + for i_d, aux_date in enumerate(self.time): + aux_times[i_d] = list(aux_date.strftime("%Y-%m-%d_%H:%M:%S")) + + return aux_times + + +# noinspection DuplicatedCode +def set_global_attributes(self): + """ + Set the NetCDF global attributes + + Parameters + ---------- + self : nes.Nes + A Nes Object. + """ + + # now = datetime.now() + # if len(self.time) > 1: + # tstep = ((self.time[1] - self.time[0]).seconds // 3600) * 10000 + # else: + # tstep = 1 * 10000 + + current_attributes = deepcopy(self.global_attrs) + del self.global_attrs + + self.global_attrs = {"TITLE": None, + "START_DATE": self.time[0].strftime("%Y-%m-%d_%H:%M:%S"), + "WEST-EAST_GRID_DIMENSION": None, # Projection dependent attributes + "SOUTH-NORTH_GRID_DIMENSION": None, # Projection dependent attributes + "BOTTOM-TOP_GRID_DIMENSION": int32(45), + "DX": None, # Projection dependent attributes + "DY": None, # Projection dependent attributes + "GRIDTYPE": "C", + "DIFF_OPT": int32(1), + "KM_OPT": int32(4), + "DAMP_OPT": int32(3), + "DAMPCOEF": float32(0.2), + "KHDIF": float32(0.), + "KVDIF": float32(0.), + "MP_PHYSICS": int32(6), + "RA_LW_PHYSICS": int32(4), + "RA_SW_PHYSICS": int32(4), + "SF_SFCLAY_PHYSICS": int32(2), + "SF_SURFACE_PHYSICS": int32(2), + "BL_PBL_PHYSICS": int32(8), + "CU_PHYSICS": int32(0), + "SF_LAKE_PHYSICS": int32(0), + "SURFACE_INPUT_SOURCE": None, # Projection dependent attributes + "SST_UPDATE": int32(0), + "GRID_FDDA": int32(0), + "GFDDA_INTERVAL_M": int32(0), + "GFDDA_END_H": int32(0), + "GRID_SFDDA": int32(0), + "SGFDDA_INTERVAL_M": int32(0), + "SGFDDA_END_H": int32(0), + "WEST-EAST_PATCH_START_UNSTAG": None, # Projection dependent attributes + "WEST-EAST_PATCH_END_UNSTAG": None, # Projection dependent attributes + "WEST-EAST_PATCH_START_STAG": None, # Projection dependent attributes + "WEST-EAST_PATCH_END_STAG": None, # Projection dependent attributes + "SOUTH-NORTH_PATCH_START_UNSTAG": None, # Projection dependent attributes + "SOUTH-NORTH_PATCH_END_UNSTAG": None, # Projection dependent attributes + "SOUTH-NORTH_PATCH_START_STAG": None, # Projection dependent attributes + "SOUTH-NORTH_PATCH_END_STAG": None, # Projection dependent attributes + "BOTTOM-TOP_PATCH_START_UNSTAG": None, + "BOTTOM-TOP_PATCH_END_UNSTAG": None, + "BOTTOM-TOP_PATCH_START_STAG": None, + "BOTTOM-TOP_PATCH_END_STAG": None, + "GRID_ID": int32(1), + "PARENT_ID": int32(0), + "I_PARENT_START": int32(1), + "J_PARENT_START": int32(1), + "PARENT_GRID_RATIO": int32(1), + "DT": float32(18.), + "CEN_LAT": None, # Projection dependent attributes + "CEN_LON": None, # Projection dependent attributes + "TRUELAT1": None, # Projection dependent attributes + "TRUELAT2": None, # Projection dependent attributes + "MOAD_CEN_LAT": None, # Projection dependent attributes + "STAND_LON": None, # Projection dependent attributes + "POLE_LAT": None, # Projection dependent attributes + "POLE_LON": None, # Projection dependent attributes + "GMT": float32(self.time[0].hour), + "JULYR": int32(self.time[0].year), + "JULDAY": int32(self.time[0].strftime("%j")), + "MAP_PROJ": None, # Projection dependent attributes + "MMINLU": "MODIFIED_IGBP_MODIS_NOAH", + "NUM_LAND_CAT": int32(41), + "ISWATER": int32(17), + "ISLAKE": int32(-1), + "ISICE": int32(15), + "ISURBAN": int32(13), + "ISOILWATER": int32(14), + "HISTORY": "", # Editable + } + + # Editable attributes + float_atts = ["DAMPCOEF", "KHDIF", "KVDIF", "CEN_LAT", "CEN_LON", "DT"] + int_atts = ["BOTTOM-TOP_GRID_DIMENSION", "DIFF_OPT", "KM_OPT", "DAMP_OPT", + "MP_PHYSICS", "RA_LW_PHYSICS", "RA_SW_PHYSICS", "SF_SFCLAY_PHYSICS", "SF_SURFACE_PHYSICS", + "BL_PBL_PHYSICS", "CU_PHYSICS", "SF_LAKE_PHYSICS", "SURFACE_INPUT_SOURCE", "SST_UPDATE", + "GRID_FDDA", "GFDDA_INTERVAL_M", "GFDDA_END_H", "GRID_SFDDA", "SGFDDA_INTERVAL_M", "SGFDDA_END_H", + "BOTTOM-TOP_PATCH_START_UNSTAG", "BOTTOM-TOP_PATCH_END_UNSTAG", "BOTTOM-TOP_PATCH_START_STAG", + "BOTTOM-TOP_PATCH_END_STAG", "GRID_ID", "PARENT_ID", "I_PARENT_START", "J_PARENT_START", + "PARENT_GRID_RATIO", "NUM_LAND_CAT", "ISWATER", "ISLAKE", "ISICE", "ISURBAN", "ISOILWATER"] + str_atts = ["GRIDTYPE", "MMINLU", "HISTORY"] + for att_name, att_value in current_attributes.items(): + if att_name in int_atts: + self.global_attrs[att_name] = int32(att_value) + elif att_name in float_atts: + self.global_attrs[att_name] = float32(att_value) + elif att_name in str_atts: + self.global_attrs[att_name] = str(att_value) + + # Projection dependent attributes + if isinstance(self, nes.LCCNes) or isinstance(self, nes.MercatorNes): + self.global_attrs["WEST-EAST_GRID_DIMENSION"] = int32(len(self._full_x["data"]) + 1) + self.global_attrs["SOUTH-NORTH_GRID_DIMENSION"] = int32(len(self._full_y["data"]) + 1) + self.global_attrs["DX"] = float32(self._full_x["data"][1] - self._full_x["data"][0]) + self.global_attrs["DY"] = float32(self._full_y["data"][1] - self._full_y["data"][0]) + self.global_attrs["SURFACE_INPUT_SOURCE"] = int32(1) + self.global_attrs["WEST-EAST_PATCH_START_UNSTAG"] = int32(1) + self.global_attrs["WEST-EAST_PATCH_END_UNSTAG"] = int32(len(self._full_x["data"])) + self.global_attrs["WEST-EAST_PATCH_START_STAG"] = int32(1) + self.global_attrs["WEST-EAST_PATCH_END_STAG"] = int32(len(self._full_x["data"]) + 1) + self.global_attrs["SOUTH-NORTH_PATCH_START_UNSTAG"] = int32(1) + self.global_attrs["SOUTH-NORTH_PATCH_END_UNSTAG"] = int32(len(self._full_y["data"])) + self.global_attrs["SOUTH-NORTH_PATCH_START_STAG"] = int32(1) + self.global_attrs["SOUTH-NORTH_PATCH_END_STAG"] = int32(len(self._full_y["data"]) + 1) + + self.global_attrs["POLE_LAT"] = float32(90) + self.global_attrs["POLE_LON"] = float32(0) + + if isinstance(self, nes.LCCNes): + self.global_attrs["MAP_PROJ"] = int32(1) + self.global_attrs["TRUELAT1"] = float32(self.projection_data["standard_parallel"][0]) + self.global_attrs["TRUELAT2"] = float32(self.projection_data["standard_parallel"][1]) + self.global_attrs["MOAD_CEN_LAT"] = float32(self.projection_data["latitude_of_projection_origin"]) + self.global_attrs["STAND_LON"] = float32(self.projection_data["longitude_of_central_meridian"]) + self.global_attrs["CEN_LAT"] = float32(self.projection_data["latitude_of_projection_origin"]) + self.global_attrs["CEN_LON"] = float32(self.projection_data["longitude_of_central_meridian"]) + elif isinstance(self, nes.MercatorNes): + self.global_attrs["MAP_PROJ"] = int32(3) + self.global_attrs["TRUELAT1"] = float32(self.projection_data["standard_parallel"]) + self.global_attrs["TRUELAT2"] = float32(0) + self.global_attrs["MOAD_CEN_LAT"] = float32(self.projection_data["standard_parallel"]) + self.global_attrs["STAND_LON"] = float32(self.projection_data["longitude_of_projection_origin"]) + self.global_attrs["CEN_LAT"] = float32(self.projection_data["standard_parallel"]) + self.global_attrs["CEN_LON"] = float32(self.projection_data["longitude_of_projection_origin"]) + + return None + + +def create_dimensions(self, netcdf): + """ + Create "time", "time_bnds", "lev", "lon" and "lat" dimensions. + + Parameters + ---------- + self : nes.Nes + Nes Object. + netcdf : Dataset + netcdf4-python open dataset. + """ + + netcdf.createDimension("Time", len(self.get_full_times())) + netcdf.createDimension("DateStrLen", 19) + netcdf.createDimension("emissions_zdim", len(self.get_full_levels()["data"])) + if isinstance(self, nes.LCCNes): + netcdf.createDimension("west_east", len(self._full_x["data"])) + netcdf.createDimension("south_north", len(self._full_y["data"])) + + return None + + +def create_dimension_variables(self, netcdf): + """ + Create the "y" and "x" variables. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + netcdf : Dataset + NetCDF object. + """ + + times = netcdf.createVariable("Times", "S1", ("Time", "DateStrLen", )) + times[:] = create_times_var(self) + + return None + + +# noinspection DuplicatedCode +def create_variables(self, netcdf): + """ + Create the netCDF file variables. + + Parameters + ---------- + self : nes.Nes + Nes Object. + netcdf : Dataset + netcdf4-python open dataset. + """ + + for var_name, var_info in self.variables.items(): + var = netcdf.createVariable(var_name, "f", ("Time", "emissions_zdim", "south_north", "west_east",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + var.FieldType = var_info["FieldType"] + var.MemoryOrder = var_info["MemoryOrder"] + var.description = var_info["description"] + var.units = var_info["units"] + var.stagger = var_info["stagger"] + var.coordinates = var_info["coordinates"] + + if var_info["data"] is not None: + if self.info: + print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) + + if isinstance(var_info["data"], int) and var_info["data"] == 0: + var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = 0 + + elif len(var_info["data"].shape) == 4: + var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = var_info["data"] + + return None diff --git a/build/lib/tests/1.1-test_read_write_projection.py b/build/lib/tests/1.1-test_read_write_projection.py new file mode 100644 index 0000000..5788b30 --- /dev/null +++ b/build/lib/tests/1.1-test_read_write_projection.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python + +import sys +import timeit +import pandas as pd +from mpi4py import MPI +from nes import open_netcdf + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' + +result_path = "Times_test_1.1_read_write_projection_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'write'], + columns=['1.1.1.Regular', '1.1.2.Rotated', '1.1.3.Points', '1.1.4.Points_GHOST', + '1.1.5.LCC', '1.1.6.Mercator']) + +# ====================================================================================================================== +# ============================================= REGULAR ======================================================== +# ====================================================================================================================== + +test_name = '1.1.1.Regular' +if rank == 0: + print(test_name) +comm.Barrier() + +# Original path: /gpfs/scratch/bsc32/bsc32538/original_files/franco_interp.nc +# Regular lat-lon grid from MONARCH +path = '/gpfs/projects/bsc32/models/NES_tutorial_data/franco_interp.nc' + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(path=path, comm=comm, info=True, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# LOAD VARIABLES +variables = ['sconcno2'] +nessy.keep_vars(variables) +nessy.load() + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ============================================= ROTATED ======================================================== +# ====================================================================================================================== + +test_name = '1.1.2.Rotated' +if rank == 0: + print(test_name) + +# Original path: /gpfs/scratch/bsc32/bsc32538/mr_multiplyby/OUT/stats_bnds/monarch/a45g/regional/daily_max/O3_all/O3_all-000_2021080300.nc +# Rotated grid from MONARCH +path = '/gpfs/projects/bsc32/models/NES_tutorial_data/O3_all-000_2021080300.nc' + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(path=path, comm=comm, info=True, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# LOAD VARIABLES +variables = ['O3_all'] +nessy.keep_vars(variables) +nessy.load() + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + + +# ====================================================================================================================== +# ============================================= LCC ============================================================ +# ====================================================================================================================== + +test_name = '1.1.5.LCC' +if rank == 0: + print(test_name) + +# Original path: /esarchive/exp/snes/a5g1/ip/daily_max/sconco3/sconco3_2022111500.nc +# LCC grid with a coverage over the Iberian Peninsula (4x4km) +path = '/gpfs/projects/bsc32/models/NES_tutorial_data/sconco3_2022111500.nc' + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(path=path, comm=comm, info=True, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# LOAD VARIABLES +nessy.load() + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ============================================= MERCATOR ======================================================== +# ====================================================================================================================== + +test_name = '1.1.6.Mercator' +if rank == 0: + print(test_name) + +# Original path: None (generated with NES) +# Mercator grid +path = '/gpfs/projects/bsc32/models/NES_tutorial_data/mercator_grid.nc' + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(path=path, comm=comm, info=True, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# LOAD VARIABLES +nessy.load() + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + +# ====================================================================================================================== +# ============================================= POINTS ========================================================= +# ====================================================================================================================== + +test_name = '1.1.3.Points' +if rank == 0: + print(test_name) + +# Original path: /esarchive/obs/nilu/ebas/daily/pm10/pm10_201507.nc +# Points grid from EBAS network +path = '/gpfs/projects/bsc32/models/NES_tutorial_data/pm10_201507.nc' + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(path=path, comm=comm, info=True, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# LOAD VARIABLES +nessy.load() + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ============================================= POINTS GHOST =================================================== +# ====================================================================================================================== + +test_name = '1.1.4.Points_GHOST' +if rank == 0: + print(test_name) + +path = '/gpfs/projects/bsc32/AC_cache/obs/ghost/EBAS/1.4/hourly/sconco3/sconco3_201906.nc' + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(path=path, comm=comm, info=True, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# LOAD VARIABLES +nessy.load() + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/1.2-test_create_projection.py b/build/lib/tests/1.2-test_create_projection.py new file mode 100644 index 0000000..60c470a --- /dev/null +++ b/build/lib/tests/1.2-test_create_projection.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python + +import sys +from mpi4py import MPI +import pandas as pd +import timeit +from nes import create_nes + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' + +result_path = "Times_test_1.2_create_projection_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['create', 'write'], + columns=['1.2.1.Regular', '1.2.2.Rotated', '1.2.3.LCC', '1.2.4.Mercator', '1.2.5.Global']) + +# ====================================================================================================================== +# ============================================= REGULAR ======================================================== +# ====================================================================================================================== + +test_name = '1.2.1.Regular' +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +lat_orig = 41.1 +lon_orig = 1.8 +inc_lat = 0.01 +inc_lon = 0.01 +n_lat = 100 +n_lon = 100 +nessy = create_nes(projection='regular', parallel_method=parallel_method, + lat_orig=lat_orig, lon_orig=lon_orig, inc_lat=inc_lat, inc_lon=inc_lon, n_lat=n_lat, n_lon=n_lon) + +comm.Barrier() +result.loc['create', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ============================================= ROTATED ======================================================== +# ====================================================================================================================== + +test_name = '1.2.2.Rotated' +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +centre_lat = 51 +centre_lon = 10 +west_boundary = -35 +south_boundary = -27 +inc_rlat = 0.2 +inc_rlon = 0.2 +nessy = create_nes(projection='rotated', parallel_method=parallel_method, + centre_lat=centre_lat, centre_lon=centre_lon, + west_boundary=west_boundary, south_boundary=south_boundary, + inc_rlat=inc_rlat, inc_rlon=inc_rlon) + +comm.Barrier() +result.loc['create', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ============================================= LCC ======================================================== +# ====================================================================================================================== + +test_name = '1.2.3.LCC' +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 397 +ny = 397 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 +nessy = create_nes(projection='lcc', parallel_method=parallel_method, + lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0) + +comm.Barrier() +result.loc['create', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ============================================= MERCATOR ======================================================== +# ====================================================================================================================== + +test_name = '1.2.4.Mercator' +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +lat_ts = -1.5 +lon_0 = -18.0 +nx = 210 +ny = 236 +inc_x = 50000 +inc_y = 50000 +x_0 = -126017.5 +y_0 = -5407460.0 +nessy = create_nes(projection='mercator', parallel_method=parallel_method, + lat_ts=lat_ts, lon_0=lon_0, nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0) + +comm.Barrier() +result.loc['create', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ============================================== GLOBAL ======================================================== +# ====================================================================================================================== + +test_name = '1.2.5.Global' +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +inc_lat = 0.1 +inc_lon = 0.1 +nessy = create_nes(projection='global', parallel_method=parallel_method, inc_lat=inc_lat, inc_lon=inc_lon) + +comm.Barrier() +result.loc['create', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/1.3-test_selecting.py b/build/lib/tests/1.3-test_selecting.py new file mode 100644 index 0000000..00bbb23 --- /dev/null +++ b/build/lib/tests/1.3-test_selecting.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python +import sys +import timeit +import pandas as pd +from mpi4py import MPI +from nes import open_netcdf +from datetime import datetime + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' +serial_write = True + +result_path = "Times_test_1.3.Selecting_{0}_{1:03d}.csv".format(parallel_method, size) + +result = pd.DataFrame(index=['read', 'calcul', 'write'], + columns=['1.3.1.LatLon', '1.3.2.Level', '1.3.3.Time', '1.3.4.Time_min', '1.3.5.Time_max']) + +# NAMEE +src_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc" +var_list = ['O3'] + +# ====================================================================================================================== +# ====================================== '1.3.1.LatLon' ===================================================== +# ====================================================================================================================== +test_name = '1.3.1.Selecting_LatLon' + +if rank == 0: + print(test_name) + +st_time = timeit.default_timer() + +# Source data +nessy = open_netcdf(src_path, parallel_method=parallel_method, balanced=True) +nessy.keep_vars(var_list) +nessy.sel(lat_min=35, lat_max=45, lon_min=-9, lon_max=5) + +nessy.load() + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size), serial=serial_write) + +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ====================================== 1.3.2.Level ===================================================== +# ====================================================================================================================== +test_name = '1.3.2.Selecting_Level' + +if rank == 0: + print(test_name) + +st_time = timeit.default_timer() + +# Source data +nessy = open_netcdf(src_path, parallel_method=parallel_method) +nessy.keep_vars(var_list) +nessy.sel(lev_min=3, lev_max=5) + +nessy.load() + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size), serial=serial_write) + +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ====================================== 1.3.3.Time ===================================================== +# ====================================================================================================================== +test_name = '1.3.3.Selecting_Time' + +if rank == 0: + print(test_name) + +st_time = timeit.default_timer() + +# Source data +nessy = open_netcdf(src_path, parallel_method=parallel_method) +nessy.keep_vars(var_list) +nessy.sel(time_min=datetime(year=2022, month=11, day=16, hour=0), + time_max=datetime(year=2022, month=11, day=16, hour=0)) + +nessy.load() + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size), serial=serial_write) + +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ====================================== '1.3.4.Time_min' ===================================================== +# ====================================================================================================================== +test_name = '1.3.4.Selecting_Time_min' + +if rank == 0: + print(test_name) + +st_time = timeit.default_timer() + +# Source data +nessy = open_netcdf(src_path, parallel_method=parallel_method) +nessy.keep_vars(var_list) +nessy.sel(time_min=datetime(year=2022, month=11, day=16, hour=0)) + +nessy.load() + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size), serial=serial_write) + +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ====================================== '1.3.5.Time_max' ===================================================== +# ====================================================================================================================== +test_name = '1.3.5.Selecting_Time_max' + +if rank == 0: + print(test_name) + +st_time = timeit.default_timer() + +# Source data +nessy = open_netcdf(src_path, parallel_method=parallel_method) +nessy.keep_vars(var_list) +nessy.sel(time_max=datetime(year=2022, month=11, day=16, hour=0)) + +nessy.load() + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size), serial=serial_write) + +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/2.1-test_spatial_join.py b/build/lib/tests/2.1-test_spatial_join.py new file mode 100644 index 0000000..e24d443 --- /dev/null +++ b/build/lib/tests/2.1-test_spatial_join.py @@ -0,0 +1,329 @@ +#!/usr/bin/env python + +import sys +from mpi4py import MPI +import pandas as pd +import timeit +from nes import open_netcdf, from_shapefile + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' +serial_write = False + +result_path = "Times_test_2.1_spatial_join_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'calculate', 'write'], + columns=['2.1.1.Existing_file_centroid', '2.1.2.New_file_centroid', + '2.1.3.Existing_file_nearest', '2.1.4.New_file_nearest', + '2.1.5.Existing_file_intersection', '2.1.6.New_file_intersection']) + +# ===== PATH TO MASK ===== # +# Timezones +# shapefile_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/timezones_2021c/timezones_2021c.shp' +# shapefile_var_list = ['tzid'] +# str_len = 32 +# Country ISO codes +shapefile_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/gadm_country_mask/gadm_country_ISO3166.shp" +shapefile_var_list = ['ISO'] +str_len = 3 + +# Original path: /gpfs/scratch/bsc32/bsc32538/original_files/franco_interp.nc +# Regular lat-lon grid from MONARCH +original_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/franco_interp.nc' +# CAMS_Global +# original_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/nox_no_201505.nc" + +# ====================================================================================================================== +# =================================== CENTROID EXISTING FILE =================================================== +# ====================================================================================================================== + +test_name = '2.1.1.Existing_file_centroid' +if rank == 0: + print(test_name) + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(original_path, parallel_method=parallel_method) +nessy.variables = {} +nessy.create_shapefile() +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# SPATIAL JOIN +# Method can be centroid, nearest and intersection +st_time = timeit.default_timer() +nessy.spatial_join(shapefile_path, method='centroid', var_list=shapefile_var_list, info=True) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() + +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) +comm.Barrier() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) +nessy.load() + +# REWRITE +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}_2.nc".format(size), serial=serial_write) + +# REOPEN +nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}_2.nc".format(size), parallel_method=parallel_method) +nessy.load() + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# =================================== CENTROID FROM NEW FILE =================================================== +# ====================================================================================================================== + +test_name = '2.1.2.New_file_centroid' +if rank == 0: + print(test_name) + +# DEFINE PROJECTION +st_time = timeit.default_timer() +projection = 'regular' +lat_orig = 41.1 +lon_orig = 1.8 +inc_lat = 0.2 +inc_lon = 0.2 +n_lat = 100 +n_lon = 100 + +# SPATIAL JOIN +# Method can be centroid, nearest and intersection +nessy = from_shapefile(shapefile_path, method='centroid', projection=projection, + lat_orig=lat_orig, lon_orig=lon_orig, + inc_lat=inc_lat, inc_lon=inc_lon, + n_lat=n_lat, n_lon=n_lon) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) +nessy.load() + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# =================================== NEAREST EXISTING FILE =================================================== +# ====================================================================================================================== + +test_name = '2.1.3.Existing_file_nearest' +if rank == 0: + print(test_name) + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(original_path, parallel_method=parallel_method) +nessy.variables = {} +nessy.create_shapefile() +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# SPATIAL JOIN +# Method can be centroid, nearest and intersection +st_time = timeit.default_timer() +nessy.spatial_join(shapefile_path, method='nearest', var_list=shapefile_var_list, info=True) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('SPATIAL JOIN - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) +nessy.load() + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# =================================== NEAREST FROM NEW FILE =================================================== +# ====================================================================================================================== + +test_name = '2.1.4.New_file_nearest' +if rank == 0: + print(test_name) + +# DEFINE PROJECTION +st_time = timeit.default_timer() +projection = 'regular' +lat_orig = 41.1 +lon_orig = 1.8 +inc_lat = 0.2 +inc_lon = 0.2 +n_lat = 100 +n_lon = 100 + +# SPATIAL JOIN +# Method can be centroid, nearest and intersection +nessy = from_shapefile(shapefile_path, method='nearest', projection=projection, + lat_orig=lat_orig, lon_orig=lon_orig, + inc_lat=inc_lat, inc_lon=inc_lon, + n_lat=n_lat, n_lon=n_lon) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) +nessy.load() + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + + +# ====================================================================================================================== +# =================================== INTERSECTION EXISTING FILE =================================================== +# ====================================================================================================================== + +test_name = '2.1.5.Existing_file_intersection' +if rank == 0: + print(test_name) + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(original_path, parallel_method=parallel_method) +nessy.variables = {} +nessy.create_shapefile() +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# SPATIAL JOIN +# Method can be centroid, nearest and intersection +st_time = timeit.default_timer() +nessy.spatial_join(shapefile_path, method='intersection', var_list=shapefile_var_list, info=True) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('SPATIAL JOIN - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) + +# WRITE +st_time = timeit.default_timer() +nessy.set_strlen(strlen=str_len) +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) +nessy.load() + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# =================================== INTERSECTION FROM NEW FILE =================================================== +# ====================================================================================================================== + +test_name = '2.1.6.New_file_intersection' +if rank == 0: + print(test_name) + +# DEFINE PROJECTION +st_time = timeit.default_timer() +projection = 'regular' +lat_orig = 41.1 +lon_orig = 1.8 +inc_lat = 0.2 +inc_lon = 0.2 +n_lat = 100 +n_lon = 100 + +# SPATIAL JOIN +# Method can be centroid, nearest and intersection +nessy = from_shapefile(shapefile_path, method='intersection', projection=projection, + lat_orig=lat_orig, lon_orig=lon_orig, + inc_lat=inc_lat, inc_lon=inc_lon, + n_lat=n_lat, n_lon=n_lon) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) +nessy.load() + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/2.2-test_create_shapefile.py b/build/lib/tests/2.2-test_create_shapefile.py new file mode 100644 index 0000000..6d443a7 --- /dev/null +++ b/build/lib/tests/2.2-test_create_shapefile.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python + +import sys +import timeit +import pandas as pd +from mpi4py import MPI +import datetime +from nes import create_nes, open_netcdf + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' + +result_path = "Times_test_2.2_create_shapefile_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'calculate'], + columns=['2.2.1.Existing', '2.2.2.New_Regular', + '2.2.3.New_Rotated', '2.2.4.New_LCC', '2.2.5.New_Mercator']) + +# ====================================================================================================================== +# ===================================== CREATE SHAPEFILE FROM EXISTING GRID ========================================== +# ====================================================================================================================== + +test_name = '2.2.1.Existing' +if rank == 0: + print(test_name) + +# Original path: /gpfs/scratch/bsc32/bsc32538/original_files/franco_interp.nc +# Regular lat-lon grid from MONARCH +path = '/gpfs/projects/bsc32/models/NES_tutorial_data/franco_interp.nc' + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(path=path, info=True, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# LOAD VARIABLES +nessy.load() + +# CREATE SHAPEFILE +st_time = timeit.default_timer() +nessy.to_shapefile(path='regular_shp', + time=datetime.datetime(2019, 1, 1, 10, 0), + lev=0, var_list=['sconcno2']) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('FROM EXISTING GRID - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ===================================== CREATE SHAPEFILE FROM NEW REGULAR GRID ======================================= +# ====================================================================================================================== + +test_name = '2.2.2.New_Regular' +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +lat_orig = 41.1 +lon_orig = 1.8 +inc_lat = 0.1 +inc_lon = 0.1 +n_lat = 50 +n_lon = 100 +nessy = create_nes(comm=None, info=False, projection='regular', + lat_orig=lat_orig, lon_orig=lon_orig, inc_lat=inc_lat, inc_lon=inc_lon, + n_lat=n_lat, n_lon=n_lon) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CREATE SHAPEFILE +st_time = timeit.default_timer() +nessy.create_shapefile() +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('FROM NEW REGULAR GRID - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ===================================== CREATE SHAPEFILE FROM NEW ROTATED GRID ======================================= +# ====================================================================================================================== + +test_name = '2.2.3.New_Rotated' +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +centre_lat = 51 +centre_lon = 10 +west_boundary = -35 +south_boundary = -27 +inc_rlat = 0.2 +inc_rlon = 0.2 +nessy = create_nes(comm=None, info=False, projection='rotated', + centre_lat=centre_lat, centre_lon=centre_lon, + west_boundary=west_boundary, south_boundary=south_boundary, + inc_rlat=inc_rlat, inc_rlon=inc_rlon) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CREATE SHAPEFILE +st_time = timeit.default_timer() +nessy.create_shapefile() +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('FROM NEW ROTATED GRID - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ===================================== CREATE SHAPEFILE FROM NEW LCC GRID =========================================== +# ====================================================================================================================== + +test_name = '2.2.4.New_LCC' +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 100 +ny = 200 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 +nessy = create_nes(comm=None, info=False, projection='lcc', + lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CREATE SHAPEFILE +st_time = timeit.default_timer() +nessy.create_shapefile() +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('FROM NEW LCC GRID - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ===================================== CREATE SHAPEFILE FROM NEW MERCATOR GRID ====================================== +# ====================================================================================================================== + +test_name = '2-2.5.New_Mercator' +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +lat_ts = -1.5 +lon_0 = -18.0 +nx = 100 +ny = 50 +inc_x = 50000 +inc_y = 50000 +x_0 = -126017.5 +y_0 = -5407460.0 +nessy = create_nes(comm=None, info=False, projection='mercator', + lat_ts=lat_ts, lon_0=lon_0, nx=nx, ny=ny, + inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CREATE SHAPEFILE +st_time = timeit.default_timer() +nessy.create_shapefile() +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('FROM NEW MERCATOR GRID - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/2.3-test_bounds.py b/build/lib/tests/2.3-test_bounds.py new file mode 100644 index 0000000..a2a9c1c --- /dev/null +++ b/build/lib/tests/2.3-test_bounds.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python + +import sys +import timeit +import pandas as pd +from mpi4py import MPI +from nes import open_netcdf, create_nes + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' + +result_path = "Times_test_2.3_bounds_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'calculate', 'write'], + columns=['2.3.1.With_bounds', '2.3.2.Without_bounds', "2.3.3.Create_new", + "2.3.4.latlon_sel_create_bnds", "2.3.5.rotated_sel_create_bnds"]) + +# ====================================================================================================================== +# ===================================== FILE WITH EXISTING BOUNDS ==================================================== +# ====================================================================================================================== + +test_name = "2.3.1.With_bounds" +if rank == 0: + print(test_name) + +# READ +st_time = timeit.default_timer() +# Original path: /esarchive/exp/snes/a5s1/regional/3hourly/od550du/od550du-000_2021070612.nc +# Rotated grid for dust regional +path_1 = '/gpfs/projects/bsc32/models/NES_tutorial_data/od550du-000_2021070612.nc' +nessy_1 = open_netcdf(path=path_1, parallel_method=parallel_method, info=True) + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# EXPLORE BOUNDS +st_time = timeit.default_timer() +print('FILE WITH EXISTING BOUNDS - Rank', rank, '-', 'Lat bounds', nessy_1.lat_bnds) +print('FILE WITH EXISTING BOUNDS - Rank', rank, '-', 'Lon bounds', nessy_1.lon_bnds) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +nessy_1.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy_2 = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) + +# LOAD DATA AND EXPLORE BOUNDS +print('FILE WITH EXISTING BOUNDS AFTER WRITE - Rank', rank, '-', 'Lat bounds', nessy_2.lat_bnds) +print('FILE WITH EXISTING BOUNDS AFTER WRITE - Rank', rank, '-', 'Lon bounds', nessy_2.lon_bnds) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# =================================== FILE WITHOUT EXISTING BOUNDS =================================================== +# ====================================================================================================================== + +test_name = '2.3.2.Without_bounds' +if rank == 0: + print(test_name) + +# Original path: /gpfs/scratch/bsc32/bsc32538/mr_multiplyby/OUT/stats_bnds/monarch/a45g/regional/daily_max/O3_all +# /O3_all-000_2021080300.nc Rotated grid from MONARCH +st_time = timeit.default_timer() +path_3 = "/gpfs/projects/bsc32/models/NES_tutorial_data/O3_all-000_2021080300.nc" +nessy_3 = open_netcdf(path=path_3, parallel_method=parallel_method, info=True) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CREATE BOUNDS +st_time = timeit.default_timer() +nessy_3.create_spatial_bounds() +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# EXPLORE BOUNDS +print('FILE WITHOUT EXISTING BOUNDS - Rank', rank, '-', 'Lat bounds', nessy_3.lat_bnds) +print('FILE WITHOUT EXISTING BOUNDS - Rank', rank, '-', 'Lon bounds', nessy_3.lon_bnds) + +# WRITE +st_time = timeit.default_timer() +nessy_3.to_netcdf('/tmp/bounds_file_2.nc', info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy_4 = open_netcdf('/tmp/bounds_file_2.nc', info=True) + +# LOAD DATA AND EXPLORE BOUNDS +print('FILE WITH EXISTING BOUNDS AFTER WRITE - Rank', rank, '-', 'Lat bounds', nessy_4.lat_bnds) +print('FILE WITH EXISTING BOUNDS AFTER WRITE - Rank', rank, '-', 'Lon bounds', nessy_4.lon_bnds) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ==================================== CREATE NES REGULAR LAT-LON ==================================================== +# ====================================================================================================================== + +test_name = "2.3.3.Create_new" +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +lat_orig = 41.1 +lon_orig = 1.8 +inc_lat = 0.2 +inc_lon = 0.2 +n_lat = 100 +n_lon = 100 +nessy_5 = create_nes(comm=None, parallel_method=parallel_method, info=True, projection='regular', + lat_orig=lat_orig, lon_orig=lon_orig, + inc_lat=inc_lat, inc_lon=inc_lon, + n_lat=n_lat, n_lon=n_lon) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CREATE BOUNDS +st_time = timeit.default_timer() +nessy_5.create_spatial_bounds() +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# EXPLORE BOUNDS +print('FROM NEW GRID - Rank', rank, '-', 'Lat bounds', nessy_5.lat_bnds) +print('FROM NEW GRID - Rank', rank, '-', 'Lon bounds', nessy_5.lon_bnds) + +# WRITE +st_time = timeit.default_timer() +nessy_5.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy_6 = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) + +# LOAD DATA AND EXPLORE BOUNDS +print('FROM NEW GRID AFTER WRITE - Rank', rank, '-', 'Lat bounds', nessy_6.lat_bnds) +print('FROM NEW GRID AFTER WRITE - Rank', rank, '-', 'Lon bounds', nessy_6.lon_bnds) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + + +# ====================================================================================================================== +# ================================ REGULAR LAT-LON SEL THEN CREATE BOUNDS ============================================= +# ====================================================================================================================== + +test_name = "2.3.4.latlon_sel_create_bnds" +if rank == 0: + print(test_name) + +# USE SAME GRID SETTING AS 2.3.3 +nessy_7 = create_nes(comm=None, parallel_method=parallel_method, info=True, projection='regular', + lat_orig=lat_orig, lon_orig=lon_orig, + inc_lat=inc_lat, inc_lon=inc_lon, + n_lat=n_lat, n_lon=n_lon) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# SEL +nessy_7.sel(lat_min=50, lat_max=60, lon_min=10, lon_max=20) + +# CREATE BOUNDS +st_time = timeit.default_timer() +nessy_7.create_spatial_bounds() +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# EXPLORE BOUNDS +print('FROM NEW GRID - Rank', rank, '-', 'Lat bounds', nessy_7.lat_bnds) +print('FROM NEW GRID - Rank', rank, '-', 'Lon bounds', nessy_7.lon_bnds) + +# Check lon_bnds +if nessy_7.lon_bnds['data'].shape != (52, 2): + raise Exception("Wrong lon_bnds.") + +# WRITE +st_time = timeit.default_timer() +nessy_7.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy_8 = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) + +# LOAD DATA AND EXPLORE BOUNDS +print('FROM NEW GRID AFTER WRITE - Rank', rank, '-', 'Lat bounds', nessy_8.lat_bnds) +print('FROM NEW GRID AFTER WRITE - Rank', rank, '-', 'Lon bounds', nessy_8.lon_bnds) + +# Check lon_bnds +if nessy_8.lon_bnds['data'].shape != (52, 2): + raise Exception("Wrong lon_bnds.") + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + + +# ====================================================================================================================== +# ================================ ROTATED SEL THEN CREATE BOUNDS ============================================= +# ====================================================================================================================== + +test_name = "2.3.5.rotated_sel_create_bnds" +if rank == 0: + print(test_name) + +# USE FILE AS 2.3.2 + +# Original path: /gpfs/scratch/bsc32/bsc32538/mr_multiplyby/OUT/stats_bnds/monarch/a45g/regional/daily_max/O3_all +# /O3_all-000_2021080300.nc Rotated grid from MONARCH +st_time = timeit.default_timer() +path_9 = "/gpfs/projects/bsc32/models/NES_tutorial_data/O3_all-000_2021080300.nc" +nessy_9 = open_netcdf(path=path_9, parallel_method=parallel_method, info=True) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# SEL +nessy_9.sel(lat_min=50, lat_max=60, lon_min=10, lon_max=15) + +# CREATE BOUNDS +st_time = timeit.default_timer() +nessy_9.create_spatial_bounds() +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# EXPLORE BOUNDS +print('FILE WITHOUT EXISTING BOUNDS - Rank', rank, '-', 'Lat bounds', nessy_9.lat_bnds) +print('FILE WITHOUT EXISTING BOUNDS - Rank', rank, '-', 'Lon bounds', nessy_9.lon_bnds) + +# Check lon_bnds +if nessy_9.lon_bnds['data'].shape[0:2] != nessy_9.lon['data'].shape: + raise Exception("Wrong lon_bnds.") + +# WRITE +st_time = timeit.default_timer() +nessy_9.to_netcdf('/tmp/bounds_file_9.nc', info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy_10 = open_netcdf('/tmp/bounds_file_9.nc', info=True) + +# LOAD DATA AND EXPLORE BOUNDS +print('FILE WITH EXISTING BOUNDS AFTER WRITE - Rank', rank, '-', 'Lat bounds', nessy_10.lat_bnds) +print('FILE WITH EXISTING BOUNDS AFTER WRITE - Rank', rank, '-', 'Lon bounds', nessy_10.lon_bnds) + +# Check lon_bnds +if nessy_10.lon_bnds['data'].shape[0:2] != nessy_10.lon['data'].shape: + raise Exception("Wrong lon_bnds.") + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/2.4-test_cell_area.py b/build/lib/tests/2.4-test_cell_area.py new file mode 100644 index 0000000..9db836f --- /dev/null +++ b/build/lib/tests/2.4-test_cell_area.py @@ -0,0 +1,195 @@ +#!/usr/bin/env python + +import sys +import timeit +import pandas as pd +from mpi4py import MPI +from nes import create_nes, open_netcdf, calculate_geometry_area + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' + +result_path = "Times_test_2.4_cell_area_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'calculate', 'write'], + columns=['2.4.1.New_file_grid_area', '2.4.2.New_file_geometry_area', + '2.4.3.Existing_file_grid_area', '2.4.4.Existing_file_geometry_area']) + +# ====================================================================================================================== +# ===================================== CALCULATE CELLS AREA FROM NEW GRID =========================================== +# ====================================================================================================================== + +test_name = "2.4.1.New_file_grid_area" +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 20 +ny = 40 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 +nessy = create_nes(comm=None, info=False, projection='lcc', + lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CALCULATE AREA OF EACH CELL IN GRID +st_time = timeit.default_timer() +nessy.calculate_grid_area() +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# EXPLORE GRID AREA +print('Rank {0:03d}: Calculate grid cell area: {1}'.format(rank, nessy.cell_measures['cell_area'])) + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +# nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) + +# EXPLORE GRID AREA +print('Rank {0:03d}: Write grid cell area: {1}'.format(rank, nessy.cell_measures['cell_area'])) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +del nessy + +# ====================================================================================================================== +# ===================================== CALCULATE CELLS AREA FROM GEOMETRIES ========================================= +# ====================================================================================================================== + +test_name = "2.4.2.New_file_geometry_area" +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 20 +ny = 40 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 +nessy = create_nes(comm=None, info=False, projection='lcc', + lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CALCULATE AREA OF EACH CELL POLYGON +st_time = timeit.default_timer() +nessy.create_shapefile() +geometry_list = nessy.shapefile['geometry'].values +geometry_area = calculate_geometry_area(geometry_list) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# EXPLORE GEOMETRIES AREA +print('Rank {0:03d}: Calculate geometry cell area: {1}'.format(rank, geometry_area)) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ===================================== CALCULATE CELLS AREA FROM EXISTING GRID ====================================== +# ====================================================================================================================== + +test_name = '2.4.3.Existing_file_grid_area' +if rank == 0: + print(test_name) + +# Original path: /esarchive/exp/monarch/a4dd/original_files/000/2022111512/MONARCH_d01_2022111512.nc +# Rotated grid from MONARCH +original_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc' + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(original_path, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CALCULATE AREA OF EACH CELL IN GRID +st_time = timeit.default_timer() +nessy.calculate_grid_area() +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# EXPLORE GRID AREA +print('Rank {0:03d}: Calculate grid cell area: {1}'.format(rank, nessy.cell_measures['cell_area'])) + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +# nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() +del nessy + +# ====================================================================================================================== +# ===================================== CALCULATE CELLS AREA FROM GEOMETRIES FROM EXISTING GRID ====================== +# ====================================================================================================================== + +test_name = '2.4.4.Existing_file_geometry_area' +if rank == 0: + print(test_name) + +# Original path: /esarchive/exp/monarch/a4dd/original_files/000/2022111512/MONARCH_d01_2022111512.nc +# Rotated grid from MONARCH +original_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc' + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(original_path, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CALCULATE AREA OF EACH CELL POLYGON +st_time = timeit.default_timer() +nessy.create_shapefile() +geometry_list = nessy.shapefile['geometry'].values +geometry_area = calculate_geometry_area(geometry_list) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# EXPLORE GEOMETRIES AREA +print('Rank {0:03d}: Calculate geometry cell area: {1}'.format(rank, geometry_area)) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() +del nessy + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/3.1-test_vertical_interp.py b/build/lib/tests/3.1-test_vertical_interp.py new file mode 100644 index 0000000..9b78628 --- /dev/null +++ b/build/lib/tests/3.1-test_vertical_interp.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python +import sys +import timeit +import pandas as pd +from mpi4py import MPI +from nes import open_netcdf + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'T' + +result_path = "Times_test_3.1_vertical_interp_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'calculate'], + columns=['3.1.1.Interp', '3.1.1.Exterp']) + +# ====================================================================================================================== +# =============================================== VERTICAL INTERPOLATION ============================================= +# ====================================================================================================================== + +test_name = '3.1.1.Interp' +if rank == 0: + print(test_name) + +# READ +st_time = timeit.default_timer() + +# Original path: /esarchive/exp/monarch/a4dd/original_files/000/2022111512/MONARCH_d01_2022111512.nc +# Rotated grid from MONARCH +source_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc' + +# Read source data +source_data = open_netcdf(path=source_path, info=True) + +# Select time and load variables +source_data.keep_vars(['O3', 'mid_layer_height_agl']) +source_data.load() + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# INTERPOLATE +st_time = timeit.default_timer() +source_data.vertical_var_name = 'mid_layer_height_agl' +level_list = [0., 50., 100., 250., 500., 750., 1000., 2000., 3000., 5000.] +interp_nes = source_data.interpolate_vertical(level_list, info=True, kind='linear', extrapolate=None) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +interp_nes.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# =============================================== VERTICAL INTERPOLATION ============================================= +# ====================================================================================================================== + +test_name = '3.1.1.Exterp' +if rank == 0: + print(test_name) + +# READ +st_time = timeit.default_timer() + +# Original path: /esarchive/exp/monarch/a4dd/original_files/000/2022111512/MONARCH_d01_2022111512.nc +# Rotated grid from MONARCH +source_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc' + +# Read source data +source_data = open_netcdf(path=source_path, info=True) + +# Select time and load variables +source_data.keep_vars(['O3', 'mid_layer_height_agl']) +source_data.load() + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# INTERPOLATE +st_time = timeit.default_timer() +source_data.vertical_var_name = 'mid_layer_height_agl' +level_list = [0., 50., 100., 250., 500., 750., 1000., 2000., 3000., 5000., 21000, 25000, 30000, 40000, 50000] +interp_nes = source_data.interpolate_vertical(level_list, info=True, kind='linear', extrapolate=True) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +interp_nes.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/3.2-test_horiz_interp_bilinear.py b/build/lib/tests/3.2-test_horiz_interp_bilinear.py new file mode 100644 index 0000000..4366a8d --- /dev/null +++ b/build/lib/tests/3.2-test_horiz_interp_bilinear.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python +import sys +import timeit +import pandas as pd +from mpi4py import MPI +from nes import open_netcdf, create_nes +import os + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'T' + +result_path = "Times_test_3.2_horiz_interp_bilinear_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'calculate', 'write'], + columns=['3.2.1.Only interp', '3.2.2.Create_WM', "3.2.3.Use_WM", "3.2.4.Read_WM"]) + +# NAMEE +src_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc" +var_list = ['O3'] + +# ====================================================================================================================== +# ====================================== Only interp ===================================================== +# ====================================================================================================================== +test_name = '3.2.1.NN_Only interp' +if rank == 0: + print(test_name) + sys.stdout.flush() + +# READING +st_time = timeit.default_timer() + +# Source data +src_nes = open_netcdf(src_path, parallel_method=parallel_method) +src_nes.keep_vars(var_list) +src_nes.load() + +# Destination Grid +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 397 +ny = 397 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 +dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, + times=src_nes.get_full_times()) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() + +interp_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='NN') +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() +interp_nes.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ====================================== Create_WM ===================================================== +# ====================================================================================================================== +test_name = '3.2.2.NN_Create_WM' +if rank == 0: + print(test_name) + +# READ +st_time = timeit.default_timer() + +# Read source data +src_nes = open_netcdf(src_path, parallel_method=parallel_method) +src_nes.keep_vars(var_list) +src_nes.load() + +# Destination Grid +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 397 +ny = 397 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 +dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, + times=src_nes.get_full_times()) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# Cleaning WM +if os.path.exists("NN_WM_NAMEE_to_IP.nc") and rank == 0: + os.remove("NN_WM_NAMEE_to_IP.nc") +comm.Barrier() + +st_time = timeit.default_timer() + +wm_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='NN', info=True, + weight_matrix_path="NN_WM_NAMEE_to_IP.nc", only_create_wm=True) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ====================================== Use_WM ===================================================== +# ====================================================================================================================== +test_name = "3.2.3.NN_Use_WM" +if rank == 0: + print(test_name) + +# READING +st_time = timeit.default_timer() + +# Source data +src_nes = open_netcdf(src_path, parallel_method=parallel_method) +src_nes.keep_vars(var_list) +src_nes.load() + +# Destination Grid +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 397 +ny = 397 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 + +dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, + times=src_nes.get_full_times()) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() + +interp_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='NN', wm=wm_nes) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() +interp_nes.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ====================================== Read_WM ===================================================== +# ====================================================================================================================== +test_name = "3.2.4.NN_Read_WM" +if rank == 0: + print(test_name) + +# READING +st_time = timeit.default_timer() + +# Source data +src_nes = open_netcdf(src_path, parallel_method=parallel_method) +src_nes.keep_vars(var_list) +src_nes.load() + +# Destination Grid +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 397 +ny = 397 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 + +dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, + times=src_nes.get_full_times()) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() + +interp_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='NN', + weight_matrix_path="NN_WM_NAMEE_to_IP.nc") +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() +interp_nes.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/3.3-test_horiz_interp_conservative.py b/build/lib/tests/3.3-test_horiz_interp_conservative.py new file mode 100644 index 0000000..90aa72b --- /dev/null +++ b/build/lib/tests/3.3-test_horiz_interp_conservative.py @@ -0,0 +1,248 @@ +#!/usr/bin/env python +import sys +import os +import timeit +import pandas as pd +from mpi4py import MPI +from nes import open_netcdf, create_nes + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' + +result_path = "Times_test_3.3_horiz_interp_conservative.py_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'calculate', 'write'], + columns=['3.3.1.Only interp', '3.3.2.Create_WM', "3.3.3.Use_WM", "3.3.4.Read_WM"]) + +src_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc" +src_type = 'NAMEE' +var_list = ['O3'] +# src_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/nox_no_201505.nc" +# src_type = 'CAMS_glob_antv21' +# var_list = ['nox_no'] + +# ====================================================================================================================== +# ====================================== Only interp ===================================================== +# ====================================================================================================================== + +test_name = '3.3.1.Only interp' +if rank == 0: + print(test_name) + +# READ +# final_dst.variables[var_name]['data'][time, lev] = np.sum(weights * src_aux, axis=1) + +st_time = timeit.default_timer() + +# Read source data +src_nes = open_netcdf(src_path, parallel_method=parallel_method) +src_nes.keep_vars(var_list) +src_nes.load() + +# Create destination grid +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 397 +ny = 397 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 +dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, + times=src_nes.get_full_times()) +dst_type = "IP" + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() + +# INTERPOLATE +interp_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='Conservative', info=False) +# interp_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='Conservative', weight_matrix_path='T_WM.nc') +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +interp_nes.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size), serial=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ====================================== Create_WM ===================================================== +# ====================================================================================================================== + +test_name = '3.3.2.Create_WM' +if rank == 0: + print(test_name) + +# READING +st_time = timeit.default_timer() + +# Read source data +src_nes = open_netcdf(src_path, parallel_method=parallel_method) +src_nes.keep_vars(var_list) +src_nes.load() + +# Create destination grid +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 397 +ny = 397 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 +dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, + times=src_nes.get_full_times()) +dst_type = "IP" + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# Cleaning WM +if os.path.exists("CONS_WM_{0}_to_{1}.nc".format(src_type, dst_type)) and rank == 0: + os.remove("CONS_WM_{0}_to_{1}.nc".format(src_type, dst_type)) +comm.Barrier() + +# INTERPOLATE +st_time = timeit.default_timer() +wm_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='Conservative', info=True, + weight_matrix_path="CONS_WM_{0}_to_{1}.nc".format(src_type, dst_type), + only_create_wm=True) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# WRITE +# st_time = timeit.default_timer() +# interp_nes.to_netcdf(test_name.replace(' ', '_') + ".nc") +# comm.Barrier() +# result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ====================================== Use_WM ===================================================== +# ====================================================================================================================== + +test_name = "3.3.3.Use_WM" +if rank == 0: + print(test_name) + +# READ +st_time = timeit.default_timer() + +# Read source data +src_nes = open_netcdf(src_path, parallel_method=parallel_method) +src_nes.keep_vars(var_list) +src_nes.load() + +# Create destination grid +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 397 +ny = 397 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 +dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, + times=src_nes.get_full_times()) +dst_type = "IP" + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# INTERPOLATE +st_time = timeit.default_timer() +interp_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='Conservative', wm=wm_nes) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +interp_nes.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ====================================== Read_WM ===================================================== +# ====================================================================================================================== + +test_name = "3.3.4.Read_WM" +if rank == 0: + print(test_name) + +# READ +st_time = timeit.default_timer() + +# Read source data +src_nes = open_netcdf(src_path, parallel_method=parallel_method) +src_nes.keep_vars(var_list) +src_nes.load() + +# Create destination grid +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 397 +ny = 397 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 +dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, + times=src_nes.get_full_times()) +dst_type = "IP" + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# INTERPOLATE +st_time = timeit.default_timer() +interp_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='Conservative', + weight_matrix_path="CONS_WM_{0}_to_{1}.nc".format(src_type, dst_type)) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +interp_nes.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/4.1-test_stats.py b/build/lib/tests/4.1-test_stats.py new file mode 100644 index 0000000..f11206c --- /dev/null +++ b/build/lib/tests/4.1-test_stats.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python + +import sys +import timeit +import pandas as pd +from mpi4py import MPI +from nes import open_netcdf + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' + +result_path = "Times_test_4.1_daily_stats_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'calculate', 'write'], + columns=['4.1.1.Mean']) + +# ====================================================================================================================== +# ============================================== CALCULATE DAILY MEAN ================================================ +# ====================================================================================================================== + +test_name = '4.1.1.Mean' +if rank == 0: + print(test_name) + +# Original path: /esarchive/exp/monarch/a4dd/original_files/000/2022111512/MONARCH_d01_2022111512.nc +# Rotated grid from MONARCH +cams_file = '/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc' + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(path=cams_file, info=True, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# LOAD VARIABLES +nessy.keep_vars('O3') +nessy.load() + +# CALCULATE MEAN +st_time = timeit.default_timer() +nessy.daily_statistic(op="mean") +print(nessy.variables['O3']['cell_methods']) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ========================================== CALCULATE 8-HOUR ROLLING MEAN =========================================== +# ====================================================================================================================== + +test_name = '4.1.2.Rolling_Mean' +if rank == 0: + print(test_name) + +# Original path: /esarchive/exp/monarch/a4dd/original_files/000/2022111512/MONARCH_d01_2022111512.nc +# Rotated grid from MONARCH +cams_file = '/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc' + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(path=cams_file, info=True, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CALCULATE MEAN +st_time = timeit.default_timer() +rolling_mean = nessy.rolling_mean(var_list='O3', hours=8) +print(rolling_mean.variables['O3']['data']) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/4.2-test_sum.py b/build/lib/tests/4.2-test_sum.py new file mode 100644 index 0000000..2f1a93c --- /dev/null +++ b/build/lib/tests/4.2-test_sum.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python + +import sys +from mpi4py import MPI +import pandas as pd +import timeit +import numpy as np +from nes import create_nes + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' + +result_path = "Times_test_4.2_sum_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'calculate', 'write'], + columns=['4.2.1.Sum']) + +# ====================================================================================================================== +# =================================== CENTROID FROM NEW FILE =================================================== +# ====================================================================================================================== + +test_name = '4.2.1.Sum' + +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +projection = 'regular' +lat_orig = 41.1 +lon_orig = 1.8 +inc_lat = 0.2 +inc_lon = 0.2 +n_lat = 100 +n_lon = 100 +nessy = create_nes(projection=projection, lat_orig=lat_orig, lon_orig=lon_orig, inc_lat=inc_lat, inc_lon=inc_lon, + n_lat=n_lat, n_lon=n_lon) + +# ADD VARIABLES +nessy.variables = {'var_aux': {'data': np.ones((len(nessy.time), len(nessy.lev['data']), + nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]))}} + +# CREATE GRID WITH COPY +nessy_2 = nessy.copy(copy_vars=True) + +# ADD VARIABLES +for var_name in nessy_2.variables.keys(): + nessy_2.variables[var_name]['data'] *= 2 + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# SUM +st_time = timeit.default_timer() +nessy_3 = nessy + nessy_2 +print('Sum result', nessy_3.variables['var_aux']['data']) + +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +nessy_3.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/4.3-test_write_timestep.py b/build/lib/tests/4.3-test_write_timestep.py new file mode 100644 index 0000000..b50c74b --- /dev/null +++ b/build/lib/tests/4.3-test_write_timestep.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python + +import sys +from mpi4py import MPI +import pandas as pd +import timeit +from datetime import datetime, timedelta +import numpy as np +from nes import create_nes + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' + +result_path = "Times_test_4.3_write_time_step_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'calculate', 'write'], + columns=['4.3.1.Parallel_Write', '4.3.2.Serial_Write']) + +# ====================================================================================================================== +# =================================== PARALLEL WRITE =================================================== +# ====================================================================================================================== + +test_name = '4.3.1.Parallel_Write' + +if rank == 0: + print(test_name) + +st_time = timeit.default_timer() +# CREATE GRID +centre_lat = 51 +centre_lon = 10 +west_boundary = -35 +south_boundary = -27 +inc_rlat = 0.2 +inc_rlon = 0.2 +nessy = create_nes(comm=None, info=False, projection='rotated', + centre_lat=centre_lat, centre_lon=centre_lon, + west_boundary=west_boundary, south_boundary=south_boundary, + inc_rlat=inc_rlat, inc_rlon=inc_rlon) + +# ADD VARIABLES +nessy.variables = {'var1': {'data': None, 'units': 'kg.s-1', 'dtype': np.float32}, + 'var2': {'data': None, 'units': 'kg.s-1', 'dtype': np.float32}} +time_list = [datetime(year=2023, month=1, day=1) + timedelta(hours=x) for x in range(24)] +nessy.set_time(time_list) + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CREATE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name + '.nc', keep_open=True, info=False) + +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# CALCULATE & APPEND +result.loc['calculate', test_name] = 0 + +for i_time, time_aux in enumerate(time_list): + # CALCULATE + st_time = timeit.default_timer() + + nessy.variables['var1']['data'] = np.ones((1, 1, nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1])) * i_time + nessy.variables['var2']['data'] = np.ones((1, 1, nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1])) * i_time + + comm.Barrier() + result.loc['calculate', test_name] += timeit.default_timer() - st_time + + # APPEND + st_time = timeit.default_timer() + nessy.append_time_step_data(i_time) + comm.Barrier() + if i_time == len(time_list) - 1: + nessy.close() + result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# =================================== SERIAL WRITE =================================================== +# ====================================================================================================================== + +test_name = '4.3.2.Serial_Write' + +if rank == 0: + print(test_name) + +st_time = timeit.default_timer() +# CREATE GRID +centre_lat = 51 +centre_lon = 10 +west_boundary = -35 +south_boundary = -27 +inc_rlat = 0.2 +inc_rlon = 0.2 +nessy = create_nes(comm=None, info=False, projection='rotated', + centre_lat=centre_lat, centre_lon=centre_lon, + west_boundary=west_boundary, south_boundary=south_boundary, + inc_rlat=inc_rlat, inc_rlon=inc_rlon) + +# ADD VARIABLES +nessy.variables = {'var1': {'data': None, 'units': 'kg.s-1', 'dtype': np.float32}, + 'var2': {'data': None, 'units': 'kg.s-1', 'dtype': np.float32}} +time_list = [datetime(year=2023, month=1, day=1) + timedelta(hours=x) for x in range(24)] +nessy.set_time(time_list) + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CREATE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name + '.nc', keep_open=True, info=False, serial=True) + +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# CALCULATE & APPEND +result.loc['calculate', test_name] = 0 + +for i_time, time_aux in enumerate(time_list): + # CALCULATEATE + st_time = timeit.default_timer() + + nessy.variables['var1']['data'] = np.ones((1, 1, nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1])) * i_time + nessy.variables['var2']['data'] = np.ones((1, 1, nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1])) * i_time + + comm.Barrier() + result.loc['calculate', test_name] += timeit.default_timer() - st_time + + # APPEND + st_time = timeit.default_timer() + nessy.append_time_step_data(i_time) + comm.Barrier() + if i_time == len(time_list) - 1: + nessy.close() + result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/__init__.py b/build/lib/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/build/lib/tests/unit/__init__.py b/build/lib/tests/unit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/build/lib/tests/unit/test_imports.py b/build/lib/tests/unit/test_imports.py new file mode 100644 index 0000000..346ebad --- /dev/null +++ b/build/lib/tests/unit/test_imports.py @@ -0,0 +1,106 @@ +import unittest + + +class TestImports(unittest.TestCase): + def test_imports(self): + imports_to_test = [ + 'sys', 'os', 'time', 'timeit', 'math', 'calendar', 'datetime', + 'warnings', 'geopandas', 'pandas', 'numpy', 'shapely', + 'mpi4py', 'netCDF4', 'pyproj', 'configargparse', 'filelock', + 'eccodes'] + + for module_name in imports_to_test: + with self.subTest(module=module_name): + try: + __import__(module_name) + except ImportError as e: + self.fail(f"Failed to import {module_name}: {e}") + + def test_eccodes(self): + try: + import eccodes + from eccodes import codes_grib_new_from_file + from eccodes import codes_keys_iterator_new + from eccodes import codes_keys_iterator_next + from eccodes import codes_keys_iterator_get_name + from eccodes import codes_get_string + from eccodes import codes_keys_iterator_delete + from eccodes import codes_clone + from eccodes import codes_set + from eccodes import codes_set_values + from eccodes import codes_write + from eccodes import codes_release + from eccodes import codes_samples_path + import os + os.path.join(codes_samples_path(), 'GRIB2.tmpl') + + print("Eccodes: ", eccodes.__version__) + except ImportError as e: + self.fail(f"Import error: {e}") + + def test_geopandas(self): + try: + import geopandas + print("GeoPandas: ", geopandas.__version__) + except ImportError as e: + self.fail(f"Import error: {e}") + + def test_pandas(self): + try: + import pandas + print("Pandas: ", pandas.__version__) + except ImportError as e: + self.fail(f"Import error: {e}") + + def test_numpy(self): + try: + import numpy + print("NumPy: ", numpy.__version__) + except ImportError as e: + self.fail(f"Import error: {e}") + + def test_shapely(self): + try: + import shapely + print("Shapely: ", shapely.__version__) + except ImportError as e: + self.fail(f"Import error: {e}") + + def test_mpi(self): + try: + import mpi4py + print("mpi4py: ", mpi4py.__version__) + from mpi4py import MPI + print("MPI Vendor: ", MPI.get_vendor()) + except ImportError as e: + self.fail(f"Import error: {e}") + + def test_netcdf4(self): + try: + import netCDF4 + print("netCDF4 version:", netCDF4.__version__) + print("HDF5 version:", netCDF4.__hdf5libversion__) + print("NetCDF library version:", netCDF4.__netcdf4libversion__) + except ImportError as e: + self.fail(f"Import error: {e}") + + def test_netcdf4_parallel(self): + try: + from mpi4py import MPI + import numpy as np + from netCDF4 import Dataset + nc = Dataset('/tmp/parallel_test.nc', 'w', parallel=True, comm=MPI.COMM_WORLD, info=MPI.Info()) + nc.close() + except ImportError as e: + self.fail(f"Import error: {e}") + + def test_pyproj(self): + try: + import pyproj + print("pyproj: ", pyproj.__version__) + except ImportError as e: + self.fail(f"Import error: {e}") + + +if __name__ == '__main__': + unittest.main() diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index b470eb8..3c7acda 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -4295,7 +4295,9 @@ class Nes(object): if self.variables[variable]["data"] is None: raise ValueError(f"All variables data must be loaded before using this function. Data for {variable} is not loaded.") - def convert_longitudes(self, path): + def convert_longitudes(self, + #path + ): """ Converts longitudes from the [0, 360] range to the [-180, 180] range. @@ -4347,5 +4349,5 @@ class Nes(object): reordered_data = take(data, sorted_indices, axis=3) self.variables[name]["data"] = reordered_data - self.to_netcdf(path) + #self.to_netcdf(path) return self diff --git a/tests/2.5-test_longitude_conversion.py b/tests/2.5-test_longitude_conversion.py new file mode 100644 index 0000000..b0d3f39 --- /dev/null +++ b/tests/2.5-test_longitude_conversion.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python + +import sys +import timeit +import pandas as pd +from mpi4py import MPI +from nes import open_netcdf + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'N' + +result_path = "Times_test_2.5_longitudes_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'calculate', 'write'], + columns=['2.5.1.Longitude_conversion']) + +# ====================================================================================================================== +# ===================================== FILE WITH LONGITUDES in [0, 360] ============================================= +# ====================================================================================================================== + +test_name = "2.5.1.Longitude_conversion" +if rank == 0: + print(test_name) + +# READ +st_time = timeit.default_timer() + +# NC file with longitudes in [0, 360]. +path_1 = '/gpfs/scratch/bsc32/bsc124195/preprocessed_backup.nc' +nessy_1 = open_netcdf(path=path_1, parallel_method=parallel_method, info=True) + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CONVERT LONGITUDES +st_time = timeit.default_timer() +print('Rank', rank, '-', 'Convert Longitudes', nessy_1.convert_longitudes()) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# WRITE +path_2 = test_name.replace(' ', '_') + "_{0:03d}.nc".format(size) +st_time = timeit.default_timer() +nessy_1.to_netcdf(path_2, info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") \ No newline at end of file diff --git a/tests/clean_output.sh b/tests/clean_output.sh index bac741a..d5d7810 100644 --- a/tests/clean_output.sh +++ b/tests/clean_output.sh @@ -8,6 +8,7 @@ rm 2.1.* rm 2.2.* rm 2.3.* rm 2.4.* +rm 2.5.* rm 3.1.* rm 3.2.* diff --git a/tests/run_scalability_tests_nord3v2.sh b/tests/run_scalability_tests_nord3v2.sh index 4c28785..3c2dd25 100644 --- a/tests/run_scalability_tests_nord3v2.sh +++ b/tests/run_scalability_tests_nord3v2.sh @@ -8,7 +8,7 @@ module load Python/3.7.4-GCCcore-8.3.0 module load NES/1.1.3-nord3-v2-foss-2019b-Python-3.7.4 -for EXE in "1.1-test_read_write_projection.py" "1.2-test_create_projection.py" "1.3-test_selecting.py" "2.1-test_spatial_join.py" "2.2-test_create_shapefile.py" "2.3-test_bounds.py" "2.4-test_cell_area.py" "3.1-test_vertical_interp.py" "3.2-test_horiz_interp_bilinear.py" "3.3-test_horiz_interp_conservative.py" "4.1-test_stats.py" "4.2-test_sum.py" "4.3-test_write_timestep.py" +for EXE in "1.1-test_read_write_projection.py" "1.2-test_create_projection.py" "1.3-test_selecting.py" "2.1-test_spatial_join.py" "2.2-test_create_shapefile.py" "2.3-test_bounds.py" "2.4-test_cell_area.py" "2.5-test_longitude_conversion.py" "3.1-test_vertical_interp.py" "3.2-test_horiz_interp_bilinear.py" "3.3-test_horiz_interp_conservative.py" "4.1-test_stats.py" "4.2-test_sum.py" "4.3-test_write_timestep.py" do for nprocs in 1 2 4 8 16 do diff --git a/tests/test_bash.mn4.sh b/tests/test_bash.mn4.sh index 5edea67..3f8dc9e 100644 --- a/tests/test_bash.mn4.sh +++ b/tests/test_bash.mn4.sh @@ -28,6 +28,7 @@ mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.1-test_spatial_join.py mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.2-test_create_shapefile.py mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.3-test_bounds.py mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.4-test_cell_area.py +mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.5-test_longitude_conversion.py mpirun --mca mpi_warn_on_fork 0 -np 4 python 3.1-test_vertical_interp.py mpirun --mca mpi_warn_on_fork 0 -np 4 python 3.2-test_horiz_interp_bilinear.py diff --git a/tests/test_bash.mn5.sh b/tests/test_bash.mn5.sh index 13e0a1b..fbcfb74 100644 --- a/tests/test_bash.mn5.sh +++ b/tests/test_bash.mn5.sh @@ -36,6 +36,7 @@ mpirun -np 4 python 2.1-test_spatial_join.py mpirun -np 4 python 2.2-test_create_shapefile.py mpirun -np 4 python 2.3-test_bounds.py mpirun -np 4 python 2.4-test_cell_area.py +mpirun -np 4 python 2.5-test_longitude_conversion.py mpirun -np 4 python 3.1-test_vertical_interp.py mpirun -np 4 python 3.2-test_horiz_interp_bilinear.py diff --git a/tests/test_bash.nord3v2.sh b/tests/test_bash.nord3v2.sh index ed58153..5cfe091 100644 --- a/tests/test_bash.nord3v2.sh +++ b/tests/test_bash.nord3v2.sh @@ -26,6 +26,7 @@ mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.1-test_spatial_join.py mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.2-test_create_shapefile.py mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.3-test_bounds.py mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.4-test_cell_area.py +mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.5-test_longitude_conversion.py mpirun --mca mpi_warn_on_fork 0 -np 4 python 3.1-test_vertical_interp.py mpirun --mca mpi_warn_on_fork 0 -np 4 python 3.2-test_horiz_interp_bilinear.py -- GitLab From 8c3d4bb97cb5aa2083b24c3bfe59206a094a64b7 Mon Sep 17 00:00:00 2001 From: Johanna Gehlen Date: Tue, 11 Feb 2025 15:04:35 +0100 Subject: [PATCH 17/33] entry point for reordering longitudes --- nes/nc_projections/default_nes.py | 7 ++----- nes/utilities/reorder_longitudes_cli.py | 23 +++++++++++++++++++++++ setup.py | 8 +++++++- 3 files changed, 32 insertions(+), 6 deletions(-) create mode 100644 nes/utilities/reorder_longitudes_cli.py diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 3c7acda..d859431 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -4295,9 +4295,7 @@ class Nes(object): if self.variables[variable]["data"] is None: raise ValueError(f"All variables data must be loaded before using this function. Data for {variable} is not loaded.") - def convert_longitudes(self, - #path - ): + def convert_longitudes(self): """ Converts longitudes from the [0, 360] range to the [-180, 180] range. @@ -4349,5 +4347,4 @@ class Nes(object): reordered_data = take(data, sorted_indices, axis=3) self.variables[name]["data"] = reordered_data - #self.to_netcdf(path) - return self + return None diff --git a/nes/utilities/reorder_longitudes_cli.py b/nes/utilities/reorder_longitudes_cli.py new file mode 100644 index 0000000..6d510b3 --- /dev/null +++ b/nes/utilities/reorder_longitudes_cli.py @@ -0,0 +1,23 @@ +from load_nes import open_netcdf + + +def reorder_longitudes_cli(infile, outfile): + """ + Converts longitudes in a NetCDF file and saves the modified file. + + Args: + infile (str): Path to the input NetCDF file. + outfile (str): Path to save the output NetCDF file with converted longitudes. + + Returns: + None + """ + # open + nc = open_netcdf(infile) + # load + nc.load() + # convert longitudes from default_projections + nc.convert_longitudes() + # save + nc.to_netcdf(outfile) + return None diff --git a/setup.py b/setup.py index a76cd35..0e62388 100755 --- a/setup.py +++ b/setup.py @@ -58,4 +58,10 @@ setup( setup_requires=REQUIREMENTS['setup'], install_requires=REQUIREMENTS['install'], python_requires=">=3.7", -) + + entry_points={ + "console_scripts": [ + "reorder_longitudes infile outfile=nes.utilities.reorder_longitudes_cli:reorder_longitudes_cli", + ] + } +) \ No newline at end of file -- GitLab From f90cb78ba32b935d58435c54ad96699d7b3f4295 Mon Sep 17 00:00:00 2001 From: Johanna Gehlen Date: Tue, 11 Feb 2025 15:42:50 +0100 Subject: [PATCH 18/33] fixes to the entry point feature --- nes/utilities/reorder_longitudes_cli.py | 23 ++++++++++++++++------- setup.py | 2 +- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/nes/utilities/reorder_longitudes_cli.py b/nes/utilities/reorder_longitudes_cli.py index 6d510b3..f6f55b4 100644 --- a/nes/utilities/reorder_longitudes_cli.py +++ b/nes/utilities/reorder_longitudes_cli.py @@ -1,17 +1,26 @@ -from load_nes import open_netcdf +from ..load_nes import open_netcdf +import argparse - -def reorder_longitudes_cli(infile, outfile): +def reorder_longitudes_cli(): """ Converts longitudes in a NetCDF file and saves the modified file. - Args: - infile (str): Path to the input NetCDF file. - outfile (str): Path to save the output NetCDF file with converted longitudes. - Returns: None """ + parser = argparse.ArgumentParser(description="Reorder longitudes in a NetCDF file.") + + # Define expected arguments + parser.add_argument("infile", help="Input NetCDF file path") + parser.add_argument("outfile", help="Output NetCDF file path") + + # Parse arguments + args = parser.parse_args() + + # Call your function with parsed arguments + infile = args.infile + outfile = args.outfile + # open nc = open_netcdf(infile) # load diff --git a/setup.py b/setup.py index 0e62388..f97ebbd 100755 --- a/setup.py +++ b/setup.py @@ -61,7 +61,7 @@ setup( entry_points={ "console_scripts": [ - "reorder_longitudes infile outfile=nes.utilities.reorder_longitudes_cli:reorder_longitudes_cli", + "reorder_longitudes=nes.utilities.reorder_longitudes_cli:reorder_longitudes_cli", ] } ) \ No newline at end of file -- GitLab From e433953a348bcac3ed4a971c789eaa00d2fbdded Mon Sep 17 00:00:00 2001 From: Johanna Gehlen Date: Thu, 13 Feb 2025 10:01:45 +0100 Subject: [PATCH 19/33] change parallel method to Y --- tests/2.5-test_longitude_conversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/2.5-test_longitude_conversion.py b/tests/2.5-test_longitude_conversion.py index b0d3f39..fa201a9 100644 --- a/tests/2.5-test_longitude_conversion.py +++ b/tests/2.5-test_longitude_conversion.py @@ -10,7 +10,7 @@ comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() -parallel_method = 'N' +parallel_method = 'Y' result_path = "Times_test_2.5_longitudes_{0}_{1:03d}.csv".format(parallel_method, size) result = pd.DataFrame(index=['read', 'calculate', 'write'], -- GitLab From fc90ee060d0602cd86876e4054c13e27fc92dd1e Mon Sep 17 00:00:00 2001 From: Johanna Gehlen Date: Thu, 13 Feb 2025 10:55:57 +0100 Subject: [PATCH 20/33] add nes to entry point --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f97ebbd..71994b5 100755 --- a/setup.py +++ b/setup.py @@ -61,7 +61,7 @@ setup( entry_points={ "console_scripts": [ - "reorder_longitudes=nes.utilities.reorder_longitudes_cli:reorder_longitudes_cli", + "nes_reorder_longitudes=nes.utilities.reorder_longitudes_cli:reorder_longitudes_cli", ] } ) \ No newline at end of file -- GitLab From 8209fab8eb35185abead41c61dd750b978862ace Mon Sep 17 00:00:00 2001 From: Johanna Gehlen Date: Fri, 14 Feb 2025 09:52:10 +0100 Subject: [PATCH 21/33] adjust test script to avoid permission errors --- tests/2.5-test_longitude_conversion.py | 1 + tests/test_bash.mn5.sh | 11 ++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/2.5-test_longitude_conversion.py b/tests/2.5-test_longitude_conversion.py index fa201a9..0148783 100644 --- a/tests/2.5-test_longitude_conversion.py +++ b/tests/2.5-test_longitude_conversion.py @@ -36,6 +36,7 @@ result.loc['read', test_name] = timeit.default_timer() - st_time # CONVERT LONGITUDES st_time = timeit.default_timer() +nessy_1.load() print('Rank', rank, '-', 'Convert Longitudes', nessy_1.convert_longitudes()) comm.Barrier() result.loc['calculate', test_name] = timeit.default_timer() - st_time diff --git a/tests/test_bash.mn5.sh b/tests/test_bash.mn5.sh index fbcfb74..0966f3c 100644 --- a/tests/test_bash.mn5.sh +++ b/tests/test_bash.mn5.sh @@ -15,10 +15,11 @@ module purge module load anaconda source /apps/GPP/ANACONDA/2023.07/etc/profile.d/conda.sh conda deactivate -conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/NES_dev -export PYTHONPATH=/gpfs/projects/bsc32/repository/apps/conda_envs/NES_dev/lib/python3.12/site-packages +conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/PHENOMENA_v0.2.0_bsc124195 +#export PYTHONPATH=/gpfs/projects/bsc32/repository/apps/conda_envs/NES_dev/lib/python3.12/site-packages +export PYTHONPATH=/gpfs/projects/bsc32/repository/apps/conda_envs/PHENOMENA_v0.2.0_${USER} export SLURM_CPU_BIND=none -export PYTHONPATH=/gpfs/scratch/bsc32/bsc032538/AC_PostProcess/NES:$PYTHONPATH +#export PYTHONPATH=/gpfs/scratch/bsc32/bsc032538/AC_PostProcess/NES:$PYTHONPATH #conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/NES_v1.1.4 @@ -26,7 +27,7 @@ export PYTHONPATH=/gpfs/scratch/bsc32/bsc032538/AC_PostProcess/NES:$PYTHONPATH #export PYTHONPATH=/gpfs/projects/bsc32/repository/apps/conda_envs/NES_v1.1.4/lib/python3.12/site-packages #export SLURM_CPU_BIND=none -cd /gpfs/scratch/bsc32/bsc032538/AC_PostProcess/NES/tests || exit +cd /gpfs/scratch/bsc32/${USER}/AC_PostProcess/NES/tests || exit mpirun -np 4 python 1.1-test_read_write_projection.py mpirun -np 4 python 1.2-test_create_projection.py @@ -36,7 +37,7 @@ mpirun -np 4 python 2.1-test_spatial_join.py mpirun -np 4 python 2.2-test_create_shapefile.py mpirun -np 4 python 2.3-test_bounds.py mpirun -np 4 python 2.4-test_cell_area.py -mpirun -np 4 python 2.5-test_longitude_conversion.py +mpirun -np 1 python 2.5-test_longitude_conversion.py mpirun -np 4 python 3.1-test_vertical_interp.py mpirun -np 4 python 3.2-test_horiz_interp_bilinear.py -- GitLab From ad6b15b447c69a6b175c8d862d8f6379fea4b721 Mon Sep 17 00:00:00 2001 From: Carles Tena Date: Tue, 18 Feb 2025 17:40:07 +0100 Subject: [PATCH 22/33] Added MOCAGE writer with default units "Kmol/m-2.s-1" or "Kg/m-2.s-1" --- CHANGELOG.rst | 6 +- nes/__init__.py | 4 +- nes/nc_projections/default_nes.py | 26 +- nes/nes_formats/__init__.py | 4 +- nes/nes_formats/mocage_format.py | 387 ++++++++++++++++++++++++++++++ 5 files changed, 415 insertions(+), 12 deletions(-) create mode 100644 nes/nes_formats/mocage_format.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 2ef86fa..ca46888 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,13 +4,15 @@ CHANGELOG .. start-here -1.1.x +1.1.9 ============ -* Release date: +* Release date: 2025/02/18 * Changes and new features: * Add additional names for the time variable + * Added MOCAGE format + 1.1.8 ============ diff --git a/nes/__init__.py b/nes/__init__.py index 1dcabe9..1f6c5f9 100644 --- a/nes/__init__.py +++ b/nes/__init__.py @@ -1,5 +1,5 @@ -__date__ = "2024-10-07" -__version__ = "1.1.8" +__date__ = "2025-02-18" +__version__ = "1.1.9" __all__ = [ 'open_netcdf', 'concatenate_netcdfs', 'create_nes', 'from_shapefile', 'calculate_geometry_area', 'Nes', 'LatLonNes', 'LCCNes', 'RotatedNes', 'RotatedNestedNes', 'MercatorNes', 'PointsNesProvidentia', 'PointsNesGHOST', 'PointsNes' diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index a5533e4..671964e 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -18,7 +18,7 @@ from typing import Union, List, Dict, Any from pyproj import Proj, Transformer from ..methods import vertical_interpolation, horizontal_interpolation, cell_measures, spatial_join from ..nes_formats import to_netcdf_cams_ra, to_netcdf_monarch, to_monarch_units, to_netcdf_cmaq, to_cmaq_units, \ - to_netcdf_wrf_chem, to_wrf_chem_units + to_netcdf_wrf_chem, to_wrf_chem_units, to_netcdf_mocage, to_mocage_units class Nes(object): @@ -3350,6 +3350,8 @@ class Nes(object): self.variables = to_cmaq_units(self) elif out_format == "WRF_CHEM": self.variables = to_wrf_chem_units(self) + elif out_format == "MOCAGE": + self.variables = to_mocage_units(self) for i, (var_name, var_dict) in enumerate(self.variables.items()): for att_name, att_value in var_dict.items(): if att_name == "data": @@ -3364,10 +3366,16 @@ class Nes(object): self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = 0 elif len(att_value.shape) == 4: - var[i_time, - self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], - self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value + if len(var.shape) == 3: + # No level info + var[i_time, + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value + else: + var[i_time, + self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value elif len(att_value.shape) == 3: raise NotImplementedError("It is not possible to write 3D variables.") @@ -3530,9 +3538,11 @@ class Nes(object): to_netcdf_cmaq(new_nc, path, keep_open=keep_open) elif nc_type == "WRF_CHEM": to_netcdf_wrf_chem(new_nc, path, keep_open=keep_open) + elif nc_type == "MOCAGE": + to_netcdf_mocage(new_nc, path, keep_open=keep_open) else: msg = f"Unknown NetCDF type '{nc_type}'. " - msg += "Use CAMS_RA, MONARCH or NES (or DEFAULT)" + msg += "Use CAMS_RA, MONARCH, CMAQ, WRF_CHEM, MOCAGE or NES (or DEFAULT)" raise ValueError(msg) self.serial_nc = new_nc else: @@ -3548,9 +3558,11 @@ class Nes(object): to_netcdf_cmaq(self, path, keep_open=keep_open) elif nc_type == "WRF_CHEM": to_netcdf_wrf_chem(self, path, keep_open=keep_open) + elif nc_type == "MOCAGE": + to_netcdf_mocage(self, path, keep_open=keep_open) else: msg = f"Unknown NetCDF type '{nc_type}''. " - msg += "Use CAMS_RA, MONARCH or NES (or DEFAULT)" + msg += "Use CAMS_RA, MONARCH, CMAQ, WRF_CHEM, MOCAGE or NES (or DEFAULT)" raise ValueError(msg) self.info = old_info diff --git a/nes/nes_formats/__init__.py b/nes/nes_formats/__init__.py index 39aaf30..d918831 100644 --- a/nes/nes_formats/__init__.py +++ b/nes/nes_formats/__init__.py @@ -2,8 +2,10 @@ from .cams_ra_format import to_netcdf_cams_ra from .monarch_format import to_netcdf_monarch, to_monarch_units from .cmaq_format import to_netcdf_cmaq, to_cmaq_units from .wrf_chem_format import to_netcdf_wrf_chem, to_wrf_chem_units +from .mocage_format import to_netcdf_mocage, to_mocage_units + __all__ = [ 'to_netcdf_cams_ra', 'to_netcdf_monarch', 'to_monarch_units', 'to_netcdf_cmaq', 'to_cmaq_units', - 'to_netcdf_wrf_chem', 'to_wrf_chem_units' + 'to_netcdf_wrf_chem', 'to_wrf_chem_units', 'to_netcdf_mocage', 'to_mocage_units', ] diff --git a/nes/nes_formats/mocage_format.py b/nes/nes_formats/mocage_format.py new file mode 100644 index 0000000..0a4b44b --- /dev/null +++ b/nes/nes_formats/mocage_format.py @@ -0,0 +1,387 @@ +#!/usr/bin/env python + +import nes +from numpy import float32, int32, ndarray, array, float64 +from netCDF4 import Dataset +from mpi4py import MPI +from copy import deepcopy + + +# noinspection DuplicatedCode +def to_netcdf_mocage(self, path, keep_open=False): + """ + Create the NetCDF using netcdf4-python methods. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + path : str + Path to the output netCDF file. + keep_open : bool + Indicates if you want to keep open the NetCDH to fill the data by time-step. + """ + + self.to_dtype(float64) + + # set_global_attributes(self) + # change_variable_attributes(self) + + # Open NetCDF + if self.info: + print("Rank {0:03d}: Creating {1}".format(self.rank, path)) + if self.size > 1: + netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=True, comm=self.comm, info=MPI.Info()) + else: + netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=False) + if self.info: + print("Rank {0:03d}: NetCDF ready to write".format(self.rank)) + + # Create dimensions + create_dimensions(self, netcdf) + + create_dimension_variables(self, netcdf) + if self.info: + print("Rank {0:03d}: Dimensions done".format(self.rank)) + + # Create variables + create_variables(self, netcdf) + + # for att_name in GLOBAL_ATTRIBUTES_ORDER: + # netcdf.setncattr(att_name, self.global_attrs[att_name]) + + # Close NetCDF + if keep_open: + self.dataset = netcdf + else: + netcdf.close() + + return None + + +# def change_variable_attributes(self): +# """ +# Modify the emission list to be consistent to use the output as input for WRF-CHEM model. +# +# Parameters +# ---------- +# self : nes.Nes +# A Nes Object. +# """ +# +# for var_name in self.variables.keys(): +# if self.variables[var_name]["units"] == "mol.h-1.km-2": +# self.variables[var_name]["FieldType"] = int32(104) +# self.variables[var_name]["MemoryOrder"] = "XYZ" +# self.variables[var_name]["description"] = "EMISSIONS" +# self.variables[var_name]["units"] = "mol km^-2 hr^-1" +# self.variables[var_name]["stagger"] = "" +# self.variables[var_name]["coordinates"] = "XLONG XLAT" +# +# elif self.variables[var_name]["units"] == "ug.s-1.m-2": +# self.variables[var_name]["FieldType"] = int32(104) +# self.variables[var_name]["MemoryOrder"] = "XYZ" +# self.variables[var_name]["description"] = "EMISSIONS" +# self.variables[var_name]["units"] = "ug/m3 m/s" +# self.variables[var_name]["stagger"] = "" +# self.variables[var_name]["coordinates"] = "XLONG XLAT" +# +# else: +# raise TypeError("The unit '{0}' of specie {1} is not defined correctly. ".format( +# self.variables[var_name]["units"], var_name) + "Should be 'mol.h-1.km-2' or 'ug.s-1.m-2'") +# +# if "long_name" in self.variables[var_name].keys(): +# del self.variables[var_name]["long_name"] +# +# return None + + +def to_mocage_units(self): + """ + Change the data values according to the MOCAGE conventions. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + + Returns + ------- + dict + Variable in the MONARCH units. + """ + + self.calculate_grid_area(overwrite=False) + for var_name in self.variables.keys(): + if isinstance(self.variables[var_name]["data"], ndarray): + if self.variables[var_name]["units"] == "mol/m2/s": + # 100 kg -> mol + self.variables[var_name]["data"] = array( + self.variables[var_name]["data"] * 1000, dtype=float32) + elif self.variables[var_name]["units"] == "kg/m2/s": + pass + + else: + raise TypeError("The unit '{0}' of specie {1} is not defined correctly. ".format( + self.variables[var_name]["units"], var_name) + "Should be 'mol/m2/s' or 'kg/m2/s'") + self.variables[var_name]["dtype"] = float32 + + return self.variables + + +def create_times_var(self): + """ + Create the content of the MOCAGE variable times. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + + Returns + ------- + numpy.ndarray + Array with the content of time. + """ + start_time = self.time[0] + hours = array([(dt - start_time).total_seconds() // 3600 for dt in self.time], dtype=float64) + + return hours + + +# noinspection DuplicatedCode +# def set_global_attributes(self): +# """ +# Set the NetCDF global attributes +# +# Parameters +# ---------- +# self : nes.Nes +# A Nes Object. +# """ +# +# # now = datetime.now() +# # if len(self.time) > 1: +# # tstep = ((self.time[1] - self.time[0]).seconds // 3600) * 10000 +# # else: +# # tstep = 1 * 10000 +# +# current_attributes = deepcopy(self.global_attrs) +# del self.global_attrs +# +# self.global_attrs = {"TITLE": None, +# "START_DATE": self.time[0].strftime("%Y-%m-%d_%H:%M:%S"), +# "WEST-EAST_GRID_DIMENSION": None, # Projection dependent attributes +# "SOUTH-NORTH_GRID_DIMENSION": None, # Projection dependent attributes +# "BOTTOM-TOP_GRID_DIMENSION": int32(45), +# "DX": None, # Projection dependent attributes +# "DY": None, # Projection dependent attributes +# "GRIDTYPE": "C", +# "DIFF_OPT": int32(1), +# "KM_OPT": int32(4), +# "DAMP_OPT": int32(3), +# "DAMPCOEF": float32(0.2), +# "KHDIF": float32(0.), +# "KVDIF": float32(0.), +# "MP_PHYSICS": int32(6), +# "RA_LW_PHYSICS": int32(4), +# "RA_SW_PHYSICS": int32(4), +# "SF_SFCLAY_PHYSICS": int32(2), +# "SF_SURFACE_PHYSICS": int32(2), +# "BL_PBL_PHYSICS": int32(8), +# "CU_PHYSICS": int32(0), +# "SF_LAKE_PHYSICS": int32(0), +# "SURFACE_INPUT_SOURCE": None, # Projection dependent attributes +# "SST_UPDATE": int32(0), +# "GRID_FDDA": int32(0), +# "GFDDA_INTERVAL_M": int32(0), +# "GFDDA_END_H": int32(0), +# "GRID_SFDDA": int32(0), +# "SGFDDA_INTERVAL_M": int32(0), +# "SGFDDA_END_H": int32(0), +# "WEST-EAST_PATCH_START_UNSTAG": None, # Projection dependent attributes +# "WEST-EAST_PATCH_END_UNSTAG": None, # Projection dependent attributes +# "WEST-EAST_PATCH_START_STAG": None, # Projection dependent attributes +# "WEST-EAST_PATCH_END_STAG": None, # Projection dependent attributes +# "SOUTH-NORTH_PATCH_START_UNSTAG": None, # Projection dependent attributes +# "SOUTH-NORTH_PATCH_END_UNSTAG": None, # Projection dependent attributes +# "SOUTH-NORTH_PATCH_START_STAG": None, # Projection dependent attributes +# "SOUTH-NORTH_PATCH_END_STAG": None, # Projection dependent attributes +# "BOTTOM-TOP_PATCH_START_UNSTAG": None, +# "BOTTOM-TOP_PATCH_END_UNSTAG": None, +# "BOTTOM-TOP_PATCH_START_STAG": None, +# "BOTTOM-TOP_PATCH_END_STAG": None, +# "GRID_ID": int32(1), +# "PARENT_ID": int32(0), +# "I_PARENT_START": int32(1), +# "J_PARENT_START": int32(1), +# "PARENT_GRID_RATIO": int32(1), +# "DT": float32(18.), +# "CEN_LAT": None, # Projection dependent attributes +# "CEN_LON": None, # Projection dependent attributes +# "TRUELAT1": None, # Projection dependent attributes +# "TRUELAT2": None, # Projection dependent attributes +# "MOAD_CEN_LAT": None, # Projection dependent attributes +# "STAND_LON": None, # Projection dependent attributes +# "POLE_LAT": None, # Projection dependent attributes +# "POLE_LON": None, # Projection dependent attributes +# "GMT": float32(self.time[0].hour), +# "JULYR": int32(self.time[0].year), +# "JULDAY": int32(self.time[0].strftime("%j")), +# "MAP_PROJ": None, # Projection dependent attributes +# "MMINLU": "MODIFIED_IGBP_MODIS_NOAH", +# "NUM_LAND_CAT": int32(41), +# "ISWATER": int32(17), +# "ISLAKE": int32(-1), +# "ISICE": int32(15), +# "ISURBAN": int32(13), +# "ISOILWATER": int32(14), +# "HISTORY": "", # Editable +# } +# +# # Editable attributes +# float_atts = ["DAMPCOEF", "KHDIF", "KVDIF", "CEN_LAT", "CEN_LON", "DT"] +# int_atts = ["BOTTOM-TOP_GRID_DIMENSION", "DIFF_OPT", "KM_OPT", "DAMP_OPT", +# "MP_PHYSICS", "RA_LW_PHYSICS", "RA_SW_PHYSICS", "SF_SFCLAY_PHYSICS", "SF_SURFACE_PHYSICS", +# "BL_PBL_PHYSICS", "CU_PHYSICS", "SF_LAKE_PHYSICS", "SURFACE_INPUT_SOURCE", "SST_UPDATE", +# "GRID_FDDA", "GFDDA_INTERVAL_M", "GFDDA_END_H", "GRID_SFDDA", "SGFDDA_INTERVAL_M", "SGFDDA_END_H", +# "BOTTOM-TOP_PATCH_START_UNSTAG", "BOTTOM-TOP_PATCH_END_UNSTAG", "BOTTOM-TOP_PATCH_START_STAG", +# "BOTTOM-TOP_PATCH_END_STAG", "GRID_ID", "PARENT_ID", "I_PARENT_START", "J_PARENT_START", +# "PARENT_GRID_RATIO", "NUM_LAND_CAT", "ISWATER", "ISLAKE", "ISICE", "ISURBAN", "ISOILWATER"] +# str_atts = ["GRIDTYPE", "MMINLU", "HISTORY"] +# for att_name, att_value in current_attributes.items(): +# if att_name in int_atts: +# self.global_attrs[att_name] = int32(att_value) +# elif att_name in float_atts: +# self.global_attrs[att_name] = float32(att_value) +# elif att_name in str_atts: +# self.global_attrs[att_name] = str(att_value) +# +# # Projection dependent attributes +# if isinstance(self, nes.LCCNes) or isinstance(self, nes.MercatorNes): +# self.global_attrs["WEST-EAST_GRID_DIMENSION"] = int32(len(self._full_x["data"]) + 1) +# self.global_attrs["SOUTH-NORTH_GRID_DIMENSION"] = int32(len(self._full_y["data"]) + 1) +# self.global_attrs["DX"] = float32(self._full_x["data"][1] - self._full_x["data"][0]) +# self.global_attrs["DY"] = float32(self._full_y["data"][1] - self._full_y["data"][0]) +# self.global_attrs["SURFACE_INPUT_SOURCE"] = int32(1) +# self.global_attrs["WEST-EAST_PATCH_START_UNSTAG"] = int32(1) +# self.global_attrs["WEST-EAST_PATCH_END_UNSTAG"] = int32(len(self._full_x["data"])) +# self.global_attrs["WEST-EAST_PATCH_START_STAG"] = int32(1) +# self.global_attrs["WEST-EAST_PATCH_END_STAG"] = int32(len(self._full_x["data"]) + 1) +# self.global_attrs["SOUTH-NORTH_PATCH_START_UNSTAG"] = int32(1) +# self.global_attrs["SOUTH-NORTH_PATCH_END_UNSTAG"] = int32(len(self._full_y["data"])) +# self.global_attrs["SOUTH-NORTH_PATCH_START_STAG"] = int32(1) +# self.global_attrs["SOUTH-NORTH_PATCH_END_STAG"] = int32(len(self._full_y["data"]) + 1) +# +# self.global_attrs["POLE_LAT"] = float32(90) +# self.global_attrs["POLE_LON"] = float32(0) +# +# if isinstance(self, nes.LCCNes): +# self.global_attrs["MAP_PROJ"] = int32(1) +# self.global_attrs["TRUELAT1"] = float32(self.projection_data["standard_parallel"][0]) +# self.global_attrs["TRUELAT2"] = float32(self.projection_data["standard_parallel"][1]) +# self.global_attrs["MOAD_CEN_LAT"] = float32(self.projection_data["latitude_of_projection_origin"]) +# self.global_attrs["STAND_LON"] = float32(self.projection_data["longitude_of_central_meridian"]) +# self.global_attrs["CEN_LAT"] = float32(self.projection_data["latitude_of_projection_origin"]) +# self.global_attrs["CEN_LON"] = float32(self.projection_data["longitude_of_central_meridian"]) +# elif isinstance(self, nes.MercatorNes): +# self.global_attrs["MAP_PROJ"] = int32(3) +# self.global_attrs["TRUELAT1"] = float32(self.projection_data["standard_parallel"]) +# self.global_attrs["TRUELAT2"] = float32(0) +# self.global_attrs["MOAD_CEN_LAT"] = float32(self.projection_data["standard_parallel"]) +# self.global_attrs["STAND_LON"] = float32(self.projection_data["longitude_of_projection_origin"]) +# self.global_attrs["CEN_LAT"] = float32(self.projection_data["standard_parallel"]) +# self.global_attrs["CEN_LON"] = float32(self.projection_data["longitude_of_projection_origin"]) +# +# return None + + +def create_dimensions(self, netcdf): + """ + Create "time", "longitudes", "latitudes" dimensions. + + Parameters + ---------- + self : nes.Nes + Nes Object. + netcdf : Dataset + netcdf4-python open dataset. + """ + + netcdf.createDimension("time", len(self.get_full_times())) + netcdf.createDimension('longitudes', self.get_full_longitudes()["data"].shape[-1]) + netcdf.createDimension('latitudes', self.get_full_latitudes()["data"].shape[0]) + + return None + + +def create_dimension_variables(self, netcdf): + """ + Create the "time", "latitudes", and "longitudes" variables. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + netcdf : Dataset + NetCDF object. + """ + # Time + if self.time is None: + time = netcdf.createVariable('time', 'd', ('time',)) + time.units = "UTC_hour" + time[:] = [0.] + else: + time = netcdf.createVariable('time', 'd', ('time',)) + time.units = "UTC_hour" + if self.master: + time[:] = create_times_var(self) + + # Latitude + lats = netcdf.createVariable('latitudes', 'd', ('latitudes',)) + lats.units = "degree_north" + if self.master: + lats[:] = array(self._full_lat['data'], dtype=float64) + + # Longitude + lons = netcdf.createVariable('longitudes', 'd', ('longitudes',)) + lons.units = "degree_east" + if self.master: + lons[:] = array(self._full_lon['data'], dtype=float64) + + return None + + +# noinspection DuplicatedCode +def create_variables(self, netcdf): + """ + Create the netCDF file variables. + + Parameters + ---------- + self : nes.Nes + Nes Object. + netcdf : Dataset + netcdf4-python open dataset. + """ + + for var_name, var_info in self.variables.items(): + var = netcdf.createVariable(var_name, 'd', ('time', 'latitudes', 'longitudes',)) + + var.units = var_info["units"] + + if var_info["data"] is not None: + if self.info: + print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) + + if isinstance(var_info["data"], int) and var_info["data"] == 0: + var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = 0 + + elif len(var_info["data"].shape) == 4: + var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = var_info["data"] + + return None -- GitLab From 5ba597fce38f90d4ac764fa036fc34d6a355ffae Mon Sep 17 00:00:00 2001 From: Johanna Gehlen Date: Thu, 20 Feb 2025 15:11:33 +0100 Subject: [PATCH 23/33] raise valueerror if more than one process --- nes/utilities/reorder_longitudes_cli.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/nes/utilities/reorder_longitudes_cli.py b/nes/utilities/reorder_longitudes_cli.py index f6f55b4..8f97c80 100644 --- a/nes/utilities/reorder_longitudes_cli.py +++ b/nes/utilities/reorder_longitudes_cli.py @@ -1,5 +1,7 @@ from ..load_nes import open_netcdf import argparse +from mpi4py import MPI + def reorder_longitudes_cli(): """ @@ -8,6 +10,10 @@ def reorder_longitudes_cli(): Returns: None """ + comm = MPI.COMM_WORLD + if comm.Get_size() > 1: + raise ValueError("Parallel not implemented yet. This script must be run with a single process.") + parser = argparse.ArgumentParser(description="Reorder longitudes in a NetCDF file.") # Define expected arguments -- GitLab From bbbb8427feff046efaaacef02d0cbf516336d8d7 Mon Sep 17 00:00:00 2001 From: Carles Tena Date: Fri, 21 Feb 2025 11:30:06 +0100 Subject: [PATCH 24/33] MOCAGE & units little improvements --- nes/nes_formats/mocage_format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nes/nes_formats/mocage_format.py b/nes/nes_formats/mocage_format.py index 0a4b44b..0652cc0 100644 --- a/nes/nes_formats/mocage_format.py +++ b/nes/nes_formats/mocage_format.py @@ -115,7 +115,7 @@ def to_mocage_units(self): for var_name in self.variables.keys(): if isinstance(self.variables[var_name]["data"], ndarray): if self.variables[var_name]["units"] == "mol/m2/s": - # 100 kg -> mol + # 1000 kmol -> mol self.variables[var_name]["data"] = array( self.variables[var_name]["data"] * 1000, dtype=float32) elif self.variables[var_name]["units"] == "kg/m2/s": -- GitLab From 99bb54f893c8b04868c47fb5607eb6d3baee9606 Mon Sep 17 00:00:00 2001 From: Johanna Gehlen Date: Mon, 3 Mar 2025 10:07:32 +0100 Subject: [PATCH 25/33] change path to testing file on mn5 --- tests/2.5-test_longitude_conversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/2.5-test_longitude_conversion.py b/tests/2.5-test_longitude_conversion.py index 0148783..ac28da9 100644 --- a/tests/2.5-test_longitude_conversion.py +++ b/tests/2.5-test_longitude_conversion.py @@ -28,7 +28,7 @@ if rank == 0: st_time = timeit.default_timer() # NC file with longitudes in [0, 360]. -path_1 = '/gpfs/scratch/bsc32/bsc124195/preprocessed_backup.nc' +path_1 = '/gpfs/projects/bsc32/models/NES_tutorial_data/preprocessed_backup.nc' nessy_1 = open_netcdf(path=path_1, parallel_method=parallel_method, info=True) comm.Barrier() -- GitLab From cfb1fa10d2001fa01d46bc138a7b31b1449a09ae Mon Sep 17 00:00:00 2001 From: pserrano Date: Fri, 21 Mar 2025 13:03:30 +0100 Subject: [PATCH 26/33] Added catching of vertical interpolation error --- nes/methods/vertical_interpolation.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/nes/methods/vertical_interpolation.py b/nes/methods/vertical_interpolation.py index 23ca712..5e75bfb 100644 --- a/nes/methods/vertical_interpolation.py +++ b/nes/methods/vertical_interpolation.py @@ -295,6 +295,20 @@ def interpolate_vertical(self, new_levels, new_src_vertical=None, kind="linear", else: idx_above = where(new_levels < src_levels_aux[-1]) dst_data[t, idx_above, j, i] = fill_value[1] + # catch interp1d unique values error + except ValueError as e: + if str(e) == "Expect x to not have duplicates": + dst_data[t, :, j, i] = np.empty(len(new_levels), dtype=np.float64) + else: + print("time lat lon", t, j, i) + print("***********************") + print("LEVELS", src_levels_aux) + print("DATA", np.array(self.variables[var_name]['data'][t, :, j, i], dtype=np.float64)) + print("METHOD", kind) + print("FILL_VALUE", fill_value) + print("+++++++++++++++++++++++") + raise Exception(str(e)) + except Exception as e: print("time lat lon", t, j, i) print("***********************") -- GitLab From 1cb4a6d4a5bf0f9d1fc98a43bbc411f8672bd494 Mon Sep 17 00:00:00 2001 From: pserrano Date: Fri, 21 Mar 2025 15:14:24 +0100 Subject: [PATCH 27/33] Solved issue with numpy import --- nes/methods/vertical_interpolation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nes/methods/vertical_interpolation.py b/nes/methods/vertical_interpolation.py index 5e75bfb..436536f 100644 --- a/nes/methods/vertical_interpolation.py +++ b/nes/methods/vertical_interpolation.py @@ -298,12 +298,12 @@ def interpolate_vertical(self, new_levels, new_src_vertical=None, kind="linear", # catch interp1d unique values error except ValueError as e: if str(e) == "Expect x to not have duplicates": - dst_data[t, :, j, i] = np.empty(len(new_levels), dtype=np.float64) + dst_data[t, :, j, i] = empty(len(new_levels), dtype=float64) else: print("time lat lon", t, j, i) print("***********************") print("LEVELS", src_levels_aux) - print("DATA", np.array(self.variables[var_name]['data'][t, :, j, i], dtype=np.float64)) + print("DATA", array(self.variables[var_name]['data'][t, :, j, i], dtype=float64)) print("METHOD", kind) print("FILL_VALUE", fill_value) print("+++++++++++++++++++++++") -- GitLab From 965dad7bf7f3c314ad4221b27015224b7478f174 Mon Sep 17 00:00:00 2001 From: Carles Tena Date: Tue, 25 Mar 2025 15:05:53 +0100 Subject: [PATCH 28/33] Little bugfix on WRF_Chem metadata (TITLE) --- nes/nes_formats/wrf_chem_format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nes/nes_formats/wrf_chem_format.py b/nes/nes_formats/wrf_chem_format.py index 6a06af4..84fedde 100644 --- a/nes/nes_formats/wrf_chem_format.py +++ b/nes/nes_formats/wrf_chem_format.py @@ -189,7 +189,7 @@ def set_global_attributes(self): current_attributes = deepcopy(self.global_attrs) del self.global_attrs - self.global_attrs = {"TITLE": None, + self.global_attrs = {"TITLE": "", "START_DATE": self.time[0].strftime("%Y-%m-%d_%H:%M:%S"), "WEST-EAST_GRID_DIMENSION": None, # Projection dependent attributes "SOUTH-NORTH_GRID_DIMENSION": None, # Projection dependent attributes -- GitLab From 2ddb30ce5fc1886ce6854a8ede701aaa072501a9 Mon Sep 17 00:00:00 2001 From: Carles Tena Date: Fri, 28 Mar 2025 13:12:25 +0100 Subject: [PATCH 29/33] Little fix --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 71994b5..76bc9dc 100755 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ #!/usr/bin/env python - +from nes import __version__ from setuptools import find_packages from setuptools import setup @@ -33,7 +33,7 @@ REQUIREMENTS = { setup( name='nes', license='Apache License 2.0', - version='1.1.8', + version=__version__, description='', long_description=long_description, long_description_content_type="text/markdown", -- GitLab From 77d888283ff2a893d76167037cc36ec33f4b04ae Mon Sep 17 00:00:00 2001 From: Carles Tena Date: Tue, 15 Apr 2025 15:25:05 +0200 Subject: [PATCH 30/33] removed include_fields from reading geostructures --- nes/methods/spatial_join.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/nes/methods/spatial_join.py b/nes/methods/spatial_join.py index eb35864..d96033c 100644 --- a/nes/methods/spatial_join.py +++ b/nes/methods/spatial_join.py @@ -94,11 +94,12 @@ def __prepare_external_shapefile(self, ext_shp, var_list, info=False, apply_bbox # Reading external shapefile if self.master and info: print("\tReading external shapefile") - # ext_shp = read_file(ext_shp, include_fields=var_list, mask=self.shapefile.geometry) if apply_bbox: - ext_shp = read_file(ext_shp, include_fields=var_list, bbox=__get_bbox(self)) + ext_shp = read_file(ext_shp, bbox=__get_bbox(self)) else: - ext_shp = read_file(ext_shp, include_fields=var_list) + ext_shp = read_file(ext_shp) + + ext_shp = ext_shp[var_list] else: msg = "WARNING!!! " msg += "External shapefile already read. If you pass the path to the shapefile instead of the opened shapefile " -- GitLab From d09bf5abfffabe1ea2ab9bb1f6361bb235495b11 Mon Sep 17 00:00:00 2001 From: Carles Tena Date: Tue, 15 Apr 2025 16:39:04 +0200 Subject: [PATCH 31/33] removed include_fields from reading geostructures --- nes/methods/spatial_join.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nes/methods/spatial_join.py b/nes/methods/spatial_join.py index d96033c..cc96975 100644 --- a/nes/methods/spatial_join.py +++ b/nes/methods/spatial_join.py @@ -89,6 +89,8 @@ def __prepare_external_shapefile(self, ext_shp, var_list, info=False, apply_bbox GeoDataFrame External shapefile. """ + if var_list is None: + var_list = [] if isinstance(ext_shp, str): # Reading external shapefile @@ -99,7 +101,7 @@ def __prepare_external_shapefile(self, ext_shp, var_list, info=False, apply_bbox else: ext_shp = read_file(ext_shp) - ext_shp = ext_shp[var_list] + ext_shp = ext_shp[var_list + ["geometry"]] else: msg = "WARNING!!! " msg += "External shapefile already read. If you pass the path to the shapefile instead of the opened shapefile " -- GitLab From 91f8d7113b01714f797353f41b8eaec2f23e45c0 Mon Sep 17 00:00:00 2001 From: Carles Tena Date: Tue, 22 Apr 2025 16:04:07 +0200 Subject: [PATCH 32/33] Preparing 1.1.9 release --- CHANGELOG.rst | 3 +- Makefile | 107 + build/lib/nes/__init__.py | 13 - build/lib/nes/create_nes.py | 191 - build/lib/nes/load_nes.py | 340 -- build/lib/nes/methods/__init__.py | 8 - build/lib/nes/methods/cell_measures.py | 265 - .../nes/methods/horizontal_interpolation.py | 762 --- build/lib/nes/methods/spatial_join.py | 305 -- .../lib/nes/methods/vertical_interpolation.py | 335 -- build/lib/nes/nc_projections/__init__.py | 15 - build/lib/nes/nc_projections/default_nes.py | 4252 ----------------- build/lib/nes/nc_projections/latlon_nes.py | 395 -- build/lib/nes/nc_projections/lcc_nes.py | 630 --- build/lib/nes/nc_projections/mercator_nes.py | 610 --- build/lib/nes/nc_projections/points_nes.py | 755 --- .../nes/nc_projections/points_nes_ghost.py | 818 ---- .../nc_projections/points_nes_providentia.py | 650 --- build/lib/nes/nc_projections/rotated_nes.py | 694 --- .../nes/nc_projections/rotated_nested_nes.py | 147 - build/lib/nes/nes_formats/__init__.py | 9 - build/lib/nes/nes_formats/cams_ra_format.py | 219 - build/lib/nes/nes_formats/cmaq_format.py | 355 -- build/lib/nes/nes_formats/monarch_format.py | 114 - build/lib/nes/nes_formats/wrf_chem_format.py | 398 -- .../tests/1.1-test_read_write_projection.py | 221 - build/lib/tests/1.2-test_create_projection.py | 190 - build/lib/tests/1.3-test_selecting.py | 183 - build/lib/tests/2.1-test_spatial_join.py | 329 -- build/lib/tests/2.2-test_create_shapefile.py | 201 - build/lib/tests/2.3-test_bounds.py | 275 -- build/lib/tests/2.4-test_cell_area.py | 195 - build/lib/tests/3.1-test_vertical_interp.py | 108 - .../tests/3.2-test_horiz_interp_bilinear.py | 222 - .../3.3-test_horiz_interp_conservative.py | 248 - build/lib/tests/4.1-test_stats.py | 97 - build/lib/tests/4.2-test_sum.py | 76 - build/lib/tests/4.3-test_write_timestep.py | 151 - build/lib/tests/__init__.py | 0 build/lib/tests/unit/__init__.py | 0 build/lib/tests/unit/test_imports.py | 106 - environment.yml | 31 +- nes/__init__.py | 2 +- tests/test_bash.mn4.sh | 39 - tests/test_bash.mn5.sh | 20 +- ...est_bash.nord3v2.sh => test_bash.nord4.sh} | 8 +- tests/unit/test_imports.py | 51 +- 47 files changed, 190 insertions(+), 14953 deletions(-) create mode 100644 Makefile delete mode 100644 build/lib/nes/__init__.py delete mode 100644 build/lib/nes/create_nes.py delete mode 100644 build/lib/nes/load_nes.py delete mode 100644 build/lib/nes/methods/__init__.py delete mode 100644 build/lib/nes/methods/cell_measures.py delete mode 100644 build/lib/nes/methods/horizontal_interpolation.py delete mode 100644 build/lib/nes/methods/spatial_join.py delete mode 100644 build/lib/nes/methods/vertical_interpolation.py delete mode 100644 build/lib/nes/nc_projections/__init__.py delete mode 100644 build/lib/nes/nc_projections/default_nes.py delete mode 100644 build/lib/nes/nc_projections/latlon_nes.py delete mode 100644 build/lib/nes/nc_projections/lcc_nes.py delete mode 100644 build/lib/nes/nc_projections/mercator_nes.py delete mode 100644 build/lib/nes/nc_projections/points_nes.py delete mode 100644 build/lib/nes/nc_projections/points_nes_ghost.py delete mode 100644 build/lib/nes/nc_projections/points_nes_providentia.py delete mode 100644 build/lib/nes/nc_projections/rotated_nes.py delete mode 100644 build/lib/nes/nc_projections/rotated_nested_nes.py delete mode 100644 build/lib/nes/nes_formats/__init__.py delete mode 100644 build/lib/nes/nes_formats/cams_ra_format.py delete mode 100644 build/lib/nes/nes_formats/cmaq_format.py delete mode 100644 build/lib/nes/nes_formats/monarch_format.py delete mode 100644 build/lib/nes/nes_formats/wrf_chem_format.py delete mode 100644 build/lib/tests/1.1-test_read_write_projection.py delete mode 100644 build/lib/tests/1.2-test_create_projection.py delete mode 100644 build/lib/tests/1.3-test_selecting.py delete mode 100644 build/lib/tests/2.1-test_spatial_join.py delete mode 100644 build/lib/tests/2.2-test_create_shapefile.py delete mode 100644 build/lib/tests/2.3-test_bounds.py delete mode 100644 build/lib/tests/2.4-test_cell_area.py delete mode 100644 build/lib/tests/3.1-test_vertical_interp.py delete mode 100644 build/lib/tests/3.2-test_horiz_interp_bilinear.py delete mode 100644 build/lib/tests/3.3-test_horiz_interp_conservative.py delete mode 100644 build/lib/tests/4.1-test_stats.py delete mode 100644 build/lib/tests/4.2-test_sum.py delete mode 100644 build/lib/tests/4.3-test_write_timestep.py delete mode 100644 build/lib/tests/__init__.py delete mode 100644 build/lib/tests/unit/__init__.py delete mode 100644 build/lib/tests/unit/test_imports.py delete mode 100644 tests/test_bash.mn4.sh rename tests/{test_bash.nord3v2.sh => test_bash.nord4.sh} (86%) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index b458adf..3633223 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,11 +7,12 @@ CHANGELOG 1.1.9 ============ -* Release date: 2025/02/18 +* Release date: 2025/04/22 * Changes and new features: * Add additional names for the time variable * Added MOCAGE format + * Bugfix on vertical interpolation. 1.1.8 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7c493d8 --- /dev/null +++ b/Makefile @@ -0,0 +1,107 @@ +version=1.1.9 + +### PATHS: Change them if needed +# Paths to the NES software +#-------------------------------------------------------------------------------------- +# Extracting directory where the makefile is +MAKEFILE_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) + +software_path=$(MAKEFILE_DIR) + +#-------------------------------------------------------------------------------------- +# LOCAL or MN5: Change this depending if you want to do the make locally or in MN5. Default is MN5 +SOFTWARE_PROJECT_PATH=$(software_path) + +CONDA_ENV_DIR ?= "." +CONDA_ENV_PATH=$(CONDA_ENV_DIR)/NES_v$(version) +#-------------------------------------------------------------------------------------- +# If the installation is for development, the name of the user will be added to the end of +# the name of the environment +create_env_for_dev: CONDA_ENV_PATH=$(CONDA_ENV_DIR)/NES_$(USER) +set_env_vars_for_dev: CONDA_ENV_PATH=$(CONDA_ENV_DIR)/NES_$(USER) +install_software_for_dev: CONDA_ENV_PATH=$(CONDA_ENV_DIR)/NES_$(USER) +full_installation_for_dev: CONDA_ENV_PATH=$(CONDA_ENV_DIR)/NES_$(USER) + +# Default path to the setup script +SETUP_PY=$(SOFTWARE_PROJECT_PATH)/setup.py + +# Default conda environment yaml file +ENV_YML="$(SOFTWARE_PROJECT_PATH)/environment.yml" + +.PHONY: create_env create_conda_env create_env_for_dev write_activate_vars_script write_deactivate_vars_script install_software install_software_for_dev full_installation full_installation_for_dev full_local_installation +# Task to create the conda environment +create_env: + @echo "Creating environment in: $(CONDA_ENV_PATH)" + mamba env create -p $(CONDA_ENV_PATH) -f $(ENV_YML) + +create_conda_env: + @echo "Creating environment in: $(CONDA_ENV_PATH)" + conda env create -p $(CONDA_ENV_PATH) -f $(ENV_YML) + +# Task to create the directories activate.d and deactivate.d inside the environment +create_activate_deactivate_dirs: + mkdir -p $(CONDA_ENV_PATH)/etc/conda/activate.d + mkdir -p $(CONDA_ENV_PATH)/etc/conda/deactivate.d + +#Task to write the env_vars.sh inside activate.d to set those variables when the environment is activated +write_activate_vars_script: + echo '#!/bin/bash' > $(CONDA_ENV_PATH)/etc/conda/activate.d/env_vars.sh + echo 'export SLURM_CPU_BIND=none' >> $(CONDA_ENV_PATH)/etc/conda/activate.d/env_vars.sh + +#Task to write the env_vars.sh inside deactivate.d to unset those variables when the environment is deactivated +write_deactivate_vars_script: + echo '#!/bin/bash' > $(CONDA_ENV_PATH)/etc/conda/deactivate.d/env_vars.sh + echo 'unset PYTHONPATH' >> $(CONDA_ENV_PATH)/etc/conda/deactivate.d/env_vars.sh + +# Task to set environment variables inside the Conda environment +set_env_vars: create_activate_deactivate_dirs write_activate_vars_script + @echo "Setting environment variables..." + +set_env_vars_for_dev: create_activate_deactivate_dirs write_activate_vars_script + @echo "Setting environment variables..." + +# Task to install the software using pip install inside the Conda environment +install_software: + # Install NES + conda run -p $(CONDA_ENV_PATH) python -m pip install $(SOFTWARE_PROJECT_PATH) + @echo "Installation completed. Run 'conda activate $(CONDA_ENV_PATH)' to use the environment." + +# Task to install the software using pip install inside the Conda environment for development +install_software_for_dev: + # Install NES + conda run -p $(CONDA_ENV_PATH) python -m pip install -e $(SOFTWARE_PROJECT_PATH) + @echo "Installation completed. Run 'conda activate $(CONDA_ENV_PATH)' to use the environment." + +# Task to test the imports of the required libraries +run_import_test: + conda run -p $(CONDA_ENV_PATH) pytest $(SOFTWARE_PROJECT_PATH)/tests/unit/test_imports.py + +run_tests: run_import_test + @echo "Testing completed." + +# Combined task to make full installation for user +full_installation: create_env set_env_vars install_software run_tests + @echo "Installation and testing completed. \ + Run 'conda activate $(CONDA_ENV_PATH)' to use the environment." + +full_conda_installation: create_conda_env set_env_vars install_software run_tests + @echo "Installation and testing completed. \ + Run 'conda activate $(CONDA_ENV_PATH)' to use the environment." + +# Combined task to make full installation for developer +full_installation_for_dev: create_env set_env_vars_for_dev install_software_for_dev run_tests + @echo "Installation for development and testing completed. \ + Run 'conda activate $(CONDA_ENV_PATH)' to use the environment." + +# Combined task to make full local installation for user +full_local_installation: create_env install_software run_tests + @echo "Local installation and testing completed. \ + Run 'conda activate $(CONDA_ENV_PATH)' to use the environment." + +# Task to clean up the environment +clean: + conda env remove -p $(CONDA_ENV_PATH) -y + +all: full_installation + @echo "Installation completed. Run 'conda activate $(CONDA_ENV_PATH)' to use the environment.\ + If you want to make the installation for development, use 'make full_installation_for_dev'" diff --git a/build/lib/nes/__init__.py b/build/lib/nes/__init__.py deleted file mode 100644 index 1dcabe9..0000000 --- a/build/lib/nes/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -__date__ = "2024-10-07" -__version__ = "1.1.8" -__all__ = [ - 'open_netcdf', 'concatenate_netcdfs', 'create_nes', 'from_shapefile', 'calculate_geometry_area', 'Nes', 'LatLonNes', - 'LCCNes', 'RotatedNes', 'RotatedNestedNes', 'MercatorNes', 'PointsNesProvidentia', 'PointsNesGHOST', 'PointsNes' -] - -from .load_nes import open_netcdf, concatenate_netcdfs -# from .load_nes import open_raster -from .create_nes import create_nes, from_shapefile -from .methods.cell_measures import calculate_geometry_area -from .nc_projections import (Nes, LatLonNes, LCCNes, RotatedNes, RotatedNestedNes, MercatorNes, PointsNesProvidentia, - PointsNes, PointsNesGHOST) diff --git a/build/lib/nes/create_nes.py b/build/lib/nes/create_nes.py deleted file mode 100644 index ce8b619..0000000 --- a/build/lib/nes/create_nes.py +++ /dev/null @@ -1,191 +0,0 @@ -#!/usr/bin/env python - -import warnings -import sys -from netCDF4 import num2date -from mpi4py import MPI -from .nc_projections import PointsNes, LatLonNes, RotatedNes, RotatedNestedNes, LCCNes, MercatorNes - - -def create_nes(comm=None, info=False, projection=None, parallel_method="Y", balanced=False, - times=None, avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, - **kwargs): - """ - Create a Nes class from scratch. - - Parameters - ---------- - comm : MPI.Comm, optional - MPI Communicator. If None, uses MPI.COMM_WORLD. - info : bool, optional - Indicates if reading/writing info should be provided. Default is False. - projection : str, optional - The projection type. Accepted values are None, "regular", "global", "rotated", "rotated-nested", "lcc", - "mercator". - parallel_method : str, optional - The parallelization method to use. Default is "Y". Accepted values are ["X", "Y", "T"]. - balanced : bool, optional - Indicates if balanced parallelization is desired. Balanced datasets cannot be written in chunking mode. - Default is False. - times : list of datetime, optional - List of datetime objects representing the time dimension. If None, a default time array is created. - avoid_first_hours : int, optional - Number of hours to remove from the start of the time steps. Default is 0. - avoid_last_hours : int, optional - Number of hours to remove from the end of the time steps. Default is 0. - first_level : int, optional - Index of the first level to use. Default is 0. - last_level : int or None, optional - Index of the last level to use. If None, the last level is used. Default is None. - **kwargs : additional arguments - Additional parameters required for specific projections. - - Returns - ------- - nes : Nes - An instance of the Nes class based on the specified parameters and projection. - - Raises - ------ - ValueError - If any required projection-specific parameters are missing or if invalid parameters are provided. - NotImplementedError - If an unsupported parallel method or projection type is specified. - - Notes - ----- - The function dynamically creates an instance of a specific Nes subclass based on the provided projection. - The required parameters for each projection type are: - - None: ["lat", "lon"] - - "regular": ["lat_orig", "lon_orig", "inc_lat", "inc_lon", "n_lat", "n_lon"] - - "global": ["inc_lat", "inc_lon"] - - "rotated": ["centre_lat", "centre_lon", "west_boundary", "south_boundary", "inc_rlat", "inc_rlon"] - - "rotated-nested": ["parent_grid_path", "parent_ratio", "i_parent_start", "j_parent_start", "n_rlat", "n_rlon"] - - "lcc": ["lat_1", "lat_2", "lon_0", "lat_0", "nx", "ny", "inc_x", "inc_y", "x_0", "y_0"] - - "mercator": ["lat_ts", "lon_0", "nx", "ny", "inc_x", "inc_y", "x_0", "y_0"] - - Example - ------- - >>> nes = create_nes(projection="regular", lat_orig=0, lon_orig=0, inc_lat=1, inc_lon=1, n_lat=180, n_lon=360) - """ - - if comm is None: - comm = MPI.COMM_WORLD - else: - comm = comm - - # Create time array - if times is None: - units = "days since 1996-12-31 00:00:00" - calendar = "standard" - times = num2date([0], units=units, calendar=calendar) - times = [aux.replace(second=0, microsecond=0) for aux in times] - else: - if not isinstance(times, list): - times = list(times) - - # Check if the parameters that are required to create the object have been defined in kwargs - kwargs_list = [] - for name, value in kwargs.items(): - kwargs_list.append(name) - - if projection is None: - required_vars = ["lat", "lon"] - elif projection == "regular": - required_vars = ["lat_orig", "lon_orig", "inc_lat", "inc_lon", "n_lat", "n_lon"] - elif projection == "global": - required_vars = ["inc_lat", "inc_lon"] - elif projection == "rotated": - required_vars = ["centre_lat", "centre_lon", "west_boundary", "south_boundary", "inc_rlat", "inc_rlon"] - elif projection == "rotated-nested": - required_vars = ["parent_grid_path", "parent_ratio", "i_parent_start", "j_parent_start", "n_rlat", "n_rlon"] - elif projection == "lcc": - required_vars = ["lat_1", "lat_2", "lon_0", "lat_0", "nx", "ny", "inc_x", "inc_y", "x_0", "y_0"] - elif projection == "mercator": - required_vars = ["lat_ts", "lon_0", "nx", "ny", "inc_x", "inc_y", "x_0", "y_0"] - else: - raise ValueError("Unknown projection: {0}".format(projection)) - - for var in required_vars: - if var not in kwargs_list: - msg = "Variable {0} has not been defined. ".format(var) - msg += "For a {} projection, it is necessary to define {}".format(projection, required_vars) - raise ValueError(msg) - - for var in kwargs_list: - if var not in required_vars: - msg = "Variable {0} has been defined. ".format(var) - msg += "For a {} projection, you can only define {}".format(projection, required_vars) - raise ValueError(msg) - - if projection is None: - if parallel_method == "Y": - warnings.warn("Parallel method cannot be 'Y' to create points NES. Setting it to 'X'") - sys.stderr.flush() - parallel_method = "X" - elif parallel_method == "T": - raise NotImplementedError("Parallel method T not implemented yet") - nessy = PointsNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, - avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, balanced=balanced, - create_nes=True, times=times, **kwargs) - elif projection in ["regular", "global"]: - nessy = LatLonNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, - avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, balanced=balanced, - create_nes=True, times=times, **kwargs) - elif projection == "rotated": - nessy = RotatedNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, - avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, balanced=balanced, - create_nes=True, times=times, **kwargs) - elif projection == "rotated-nested": - nessy = RotatedNestedNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, - avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, balanced=balanced, - create_nes=True, times=times, **kwargs) - elif projection == "lcc": - nessy = LCCNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, - avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, balanced=balanced, - create_nes=True, times=times, **kwargs) - elif projection == "mercator": - nessy = MercatorNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, - avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, balanced=balanced, - create_nes=True, times=times, **kwargs) - else: - raise NotImplementedError(projection) - - return nessy - - -def from_shapefile(path, method=None, parallel_method="Y", **kwargs): - """ - Create NES from shapefile data. - - 1. Create NES grid. - 2. Create shapefile for grid. - 3. Spatial join to add shapefile variables to NES variables. - - Parameters - ---------- - path : str - Path to shapefile. - method : str - Overlay method. Accepted values: ["nearest", "intersection", None]. - parallel_method : str - Indicates the parallelization method that you want. Default: "Y". - accepted values: ["X", "Y", "T"]. - """ - - # Create NES - nessy = create_nes(comm=None, info=False, parallel_method=parallel_method, **kwargs) - - # Create shapefile for grid - nessy.create_shapefile() - - # Make spatial join - nessy.spatial_join(path, method=method) - - return nessy diff --git a/build/lib/nes/load_nes.py b/build/lib/nes/load_nes.py deleted file mode 100644 index 542b583..0000000 --- a/build/lib/nes/load_nes.py +++ /dev/null @@ -1,340 +0,0 @@ -#!/usr/bin/env python - -import os -import sys -from numpy import empty -from mpi4py import MPI -from netCDF4 import Dataset -from warnings import warn -from .nc_projections import RotatedNes, PointsNes, PointsNesGHOST, PointsNesProvidentia, LCCNes, LatLonNes, MercatorNes - -DIM_VAR_NAMES = ["lat", "latitude", "lat_bnds", "lon", "longitude", "lon_bnds", "time", "time_bnds", "lev", "level", - "cell_area", "crs", "rotated_pole", "x", "y", "rlat", "rlon", "Lambert_conformal", "mercator"] - - -def open_netcdf(path, comm=None, info=False, parallel_method="Y", avoid_first_hours=0, avoid_last_hours=0, - first_level=0, last_level=None, balanced=False): - """ - Open a netCDF file. - - Parameters - ---------- - path : str - Path to the NetCDF file to read. - comm : MPI.COMM - MPI communicator to use in that netCDF. Default: MPI.COMM_WORLD. - info : bool - Indicates if you want to print (stdout) the reading/writing steps. - avoid_first_hours : int - Number of hours to remove from first time steps. - avoid_last_hours : int - Number of hours to remove from last time steps. - parallel_method : str - Indicates the parallelization method that you want. Default: "Y". - Accepted values: ["X", "Y", "T"] - balanced : bool - Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. - first_level : int - Index of the first level to use. - last_level : int, None - Index of the last level to use. None if it is the last. - - Returns - ------- - Nes - A Nes object. Variables read in lazy mode (only metadata). - """ - - if comm is None: - comm = MPI.COMM_WORLD - else: - comm = comm - - if not os.path.exists(path): - raise FileNotFoundError(path) - - dataset = Dataset(path, format="NETCDF4", mode="r", parallel=False) - # Parallel is not needed for reading - # if comm.Get_size() == 1: - # dataset = Dataset(path, format="NETCDF4", mode="r", parallel=False) - # else: - # dataset = Dataset(path, format="NETCDF4", mode="r", parallel=True, comm=comm, info=MPI.Info()) - - if __is_rotated(dataset): - # Rotated grids - nessy = RotatedNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, - avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) - elif __is_points(dataset): - if parallel_method == "Y": - warn("Parallel method cannot be 'Y' to create points NES. Setting it to 'X'") - sys.stderr.flush() - parallel_method = "X" - if __is_points_ghost(dataset): - # Points - GHOST - nessy = PointsNesGHOST(comm=comm, dataset=dataset, info=info, - parallel_method=parallel_method, - avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) - elif __is_points_providentia(dataset): - # Points - Providentia - nessy = PointsNesProvidentia(comm=comm, dataset=dataset, info=info, - parallel_method=parallel_method, - avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=False, - balanced=balanced,) - else: - # Points - non-GHOST - nessy = PointsNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, - avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) - elif __is_lcc(dataset): - # Lambert conformal conic grids - nessy = LCCNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, - avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) - elif __is_mercator(dataset): - # Mercator grids - nessy = MercatorNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, - avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) - else: - # Regular grids - nessy = LatLonNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, - avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) - - return nessy - - -def __is_rotated(dataset): - """ - Check if the netCDF is in rotated pole projection or not. - - Parameters - ---------- - dataset : Dataset - netcdf4-python open dataset object. - - Returns - ------- - value : bool - Indicated if the netCDF is a rotated one. - """ - - if "rotated_pole" in dataset.variables.keys(): - return True - elif ("rlat" in dataset.dimensions) and ("rlon" in dataset.dimensions): - return True - else: - return False - - -def __is_points(dataset): - """ - Check if the netCDF is a points dataset in non-GHOST format or not. - - Parameters - ---------- - dataset : Dataset - netcdf4-python open dataset object. - - Returns - ------- - value : bool - Indicated if the netCDF is a points non-GHOST one. - """ - - if "station" in dataset.dimensions: - return True - else: - return False - - -def __is_points_ghost(dataset): - """ - Check if the netCDF is a points dataset in GHOST format or not. - - Parameters - ---------- - dataset : Dataset - netcdf4-python open dataset object. - - Returns - ------- - value : bool - Indicated if the netCDF is a points GHOST one. - """ - - if "N_flag_codes" in dataset.dimensions and "N_qa_codes" in dataset.dimensions: - return True - else: - return False - - -def __is_points_providentia(dataset): - """ - Check if the netCDF is a points dataset in Providentia format or not. - - Parameters - ---------- - dataset : Dataset - netcdf4-python open dataset object. - - Returns - ------- - value : bool - Indicated if the netCDF is a points Providentia one. - """ - - if (("grid_edge" in dataset.dimensions) and ("model_latitude" in dataset.dimensions) and - ("model_longitude" in dataset.dimensions)): - return True - else: - return False - - -def __is_lcc(dataset): - """ - Check if the netCDF is in Lambert Conformal Conic (LCC) projection or not. - - Parameters - ---------- - dataset : Dataset - netcdf4-python open dataset object. - - Returns - ------- - value : bool - Indicated if the netCDF is an LCC one. - """ - - if "Lambert_Conformal" in dataset.variables.keys() or "Lambert_conformal" in dataset.variables.keys(): - return True - else: - return False - - -def __is_mercator(dataset): - """ - Check if the netCDF is in Mercator projection or not. - - Parameters - ---------- - dataset : Dataset - netcdf4-python open dataset object. - - Returns - ------- - value : bool - Indicated if the netCDF is a Mercator one. - """ - - if "mercator" in dataset.variables.keys(): - return True - else: - return False - - -def concatenate_netcdfs(nessy_list, comm=None, info=False, parallel_method="Y", avoid_first_hours=0, avoid_last_hours=0, - first_level=0, last_level=None, balanced=False): - """ - Concatenate variables form different sources. - - Parameters - ---------- - nessy_list : list - A List of Nes objects or list of paths to concatenate. - comm : MPI.Comm - MPI Communicator. - info: bool - Indicates if you want to get reading/writing info. - parallel_method : str - Indicates the parallelization method that you want. Default: "Y". - accepted values: ["X", "Y", "T"]. - balanced : bool - Indicates if you want a balanced parallelization or not. - Balanced dataset cannot be written in chunking mode. - avoid_first_hours : int - Number of hours to remove from first time steps. - avoid_last_hours : int - Number of hours to remove from last time steps. - first_level : int - Index of the first level to use. - last_level : int, None - Index of the last level to use. None if it is the last. - - Returns - ------- - Nes - A Nes object with all the variables. - """ - if not isinstance(nessy_list, list): - raise AttributeError("You must pass a list of NES objects or paths.") - - if isinstance(nessy_list[0], str): - nessy_first = open_netcdf(nessy_list[0], - comm=comm, - parallel_method=parallel_method, - info=info, - avoid_first_hours=avoid_first_hours, - avoid_last_hours=avoid_last_hours, - first_level=first_level, - last_level=last_level, - balanced=balanced - ) - nessy_first.load() - else: - nessy_first = nessy_list[0] - for i, aux_nessy in enumerate(nessy_list[1:]): - if isinstance(aux_nessy, str): - nc_add = Dataset(filename=aux_nessy, mode="r") - for var_name, var_info in nc_add.variables.items(): - if var_name not in DIM_VAR_NAMES: - nessy_first.variables[var_name] = {} - var_dims = var_info.dimensions - # Read data in 4 dimensions - if len(var_dims) < 2: - data = var_info[:] - elif len(var_dims) == 2: - data = var_info[nessy_first.read_axis_limits["y_min"]:nessy_first.read_axis_limits["y_max"], - nessy_first.read_axis_limits["x_min"]:nessy_first.read_axis_limits["x_max"]] - data = data.reshape(1, 1, data.shape[-2], data.shape[-1]) - elif len(var_dims) == 3: - if "strlen" in var_dims: - data = var_info[nessy_first.read_axis_limits["y_min"]:nessy_first.read_axis_limits["y_max"], - nessy_first.read_axis_limits["x_min"]:nessy_first.read_axis_limits["x_max"], - :] - data_aux = empty(shape=(data.shape[0], data.shape[1]), dtype=object) - for lat_n in range(data.shape[0]): - for lon_n in range(data.shape[1]): - data_aux[lat_n, lon_n] = "".join( - data[lat_n, lon_n].tobytes().decode("ascii").replace("\x00", "")) - data = data_aux.reshape((1, 1, data_aux.shape[-2], data_aux.shape[-1])) - else: - data = var_info[nessy_first.read_axis_limits["t_min"]:nessy_first.read_axis_limits["t_max"], - nessy_first.read_axis_limits["y_min"]:nessy_first.read_axis_limits["y_max"], - nessy_first.read_axis_limits["x_min"]:nessy_first.read_axis_limits["x_max"]] - data = data.reshape(data.shape[-3], 1, data.shape[-2], data.shape[-1]) - elif len(var_dims) == 4: - data = var_info[nessy_first.read_axis_limits["t_min"]:nessy_first.read_axis_limits["t_max"], - nessy_first.read_axis_limits["z_min"]:nessy_first.read_axis_limits["z_max"], - nessy_first.read_axis_limits["y_min"]:nessy_first.read_axis_limits["y_max"], - nessy_first.read_axis_limits["x_min"]:nessy_first.read_axis_limits["x_max"]] - else: - raise TypeError("{} data shape is nto accepted".format(var_dims)) - - nessy_first.variables[var_name]["data"] = data - # Avoid some attributes - for attrname in var_info.ncattrs(): - if attrname not in ["missing_value", "_FillValue"]: - value = getattr(var_info, attrname) - if value in ["unitless", "-"]: - value = "" - nessy_first.variables[var_name][attrname] = value - nc_add.close() - - else: - nessy_first.concatenate(aux_nessy) - - return nessy_first diff --git a/build/lib/nes/methods/__init__.py b/build/lib/nes/methods/__init__.py deleted file mode 100644 index 35b6346..0000000 --- a/build/lib/nes/methods/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from .vertical_interpolation import add_4d_vertical_info -from .vertical_interpolation import interpolate_vertical -from .horizontal_interpolation import interpolate_horizontal -from .spatial_join import spatial_join - -__all__ = [ - 'add_4d_vertical_info', 'interpolate_vertical', 'interpolate_horizontal', 'spatial_join' -] diff --git a/build/lib/nes/methods/cell_measures.py b/build/lib/nes/methods/cell_measures.py deleted file mode 100644 index 185d033..0000000 --- a/build/lib/nes/methods/cell_measures.py +++ /dev/null @@ -1,265 +0,0 @@ -#!/usr/bin/env python -from numpy import empty, newaxis, array, arcsin, tan, fabs, arctan, sqrt, radians, cos, sin, column_stack -from copy import deepcopy - - -def calculate_grid_area(self): - """ - Get coordinate bounds and call function to calculate the area of each cell of a grid. - - Parameters - ---------- - self : nes.Nes - Source projection Nes Object. - """ - - # Create bounds if they do not exist - if self.lat_bnds is None or self.lon_bnds is None: - self.create_spatial_bounds() - - # Get spatial number of vertices - spatial_nv = self.lat_bnds["data"].shape[-1] - - # Reshape bounds - if spatial_nv == 2: - - aux_shape = (self.lat_bnds["data"].shape[0], self.lon_bnds["data"].shape[0], 4) - lon_bnds_aux = empty(aux_shape) - lon_bnds_aux[:, :, 0] = self.lon_bnds["data"][newaxis, :, 0] - lon_bnds_aux[:, :, 1] = self.lon_bnds["data"][newaxis, :, 1] - lon_bnds_aux[:, :, 2] = self.lon_bnds["data"][newaxis, :, 1] - lon_bnds_aux[:, :, 3] = self.lon_bnds["data"][newaxis, :, 0] - - lon_bnds = lon_bnds_aux - del lon_bnds_aux - - lat_bnds_aux = empty(aux_shape) - lat_bnds_aux[:, :, 0] = self.lat_bnds["data"][:, newaxis, 0] - lat_bnds_aux[:, :, 1] = self.lat_bnds["data"][:, newaxis, 0] - lat_bnds_aux[:, :, 2] = self.lat_bnds["data"][:, newaxis, 1] - lat_bnds_aux[:, :, 3] = self.lat_bnds["data"][:, newaxis, 1] - - lat_bnds = lat_bnds_aux - del lat_bnds_aux - - else: - lon_bnds = self.lon_bnds["data"] - lat_bnds = self.lat_bnds["data"] - - # Reshape bounds and assign as grid corner coordinates - grid_corner_lon = deepcopy(lon_bnds).reshape(lon_bnds.shape[0]*lon_bnds.shape[1], - lon_bnds.shape[2]) - grid_corner_lat = deepcopy(lat_bnds).reshape(lat_bnds.shape[0]*lat_bnds.shape[1], - lat_bnds.shape[2]) - - # Calculate cell areas - grid_area = calculate_cell_area(grid_corner_lon, grid_corner_lat, - earth_radius_minor_axis=self.earth_radius[0], - earth_radius_major_axis=self.earth_radius[1]) - - return grid_area - - -def calculate_geometry_area(geometry_list, earth_radius_minor_axis=6356752.3142, - earth_radius_major_axis=6378137.0): - """ - Get coordinate bounds and call function to calculate the area of each cell of a set of geometries. - - Parameters - ---------- - geometry_list : List - A List with polygon geometries. - earth_radius_minor_axis : float - Radius of the minor axis of the Earth. - earth_radius_major_axis : float - Radius of the major axis of the Earth. - """ - - geometry_area = empty(shape=(len(geometry_list,))) - - for geom_ind in range(0, len(geometry_list)): - - # Calculate the area of each geometry in multipolygon and collection objects - if geometry_list[geom_ind].geom_type in ["MultiPolygon", "GeometryCollection"]: - multi_geom_area = 0 - for multi_geom_ind in range(0, len(geometry_list[geom_ind].geoms)): - if geometry_list[geom_ind].geoms[multi_geom_ind].geom_type == "Point": - continue - geometry_corner_lon, geometry_corner_lat = ( - geometry_list[geom_ind].geoms[multi_geom_ind].exterior.coords.xy) - geometry_corner_lon = array(geometry_corner_lon) - geometry_corner_lat = array(geometry_corner_lat) - geom_area = __mod_huiliers_area(geometry_corner_lon, geometry_corner_lat) - multi_geom_area += geom_area - geometry_area[geom_ind] = multi_geom_area * earth_radius_minor_axis * earth_radius_major_axis - - # Calculate the area of each geometry - else: - geometry_corner_lon, geometry_corner_lat = geometry_list[geom_ind].exterior.coords.xy - geometry_corner_lon = array(geometry_corner_lon) - geometry_corner_lat = array(geometry_corner_lat) - geom_area = __mod_huiliers_area(geometry_corner_lon, geometry_corner_lat) - geometry_area[geom_ind] = geom_area * earth_radius_minor_axis * earth_radius_major_axis - - return geometry_area - - -def calculate_cell_area(grid_corner_lon, grid_corner_lat, - earth_radius_minor_axis=6356752.3142, earth_radius_major_axis=6378137.0): - """ - Calculate the area of each cell of a grid. - - Parameters - ---------- - grid_corner_lon : array - An Array with longitude bounds of grid. - grid_corner_lat : array - An Array with longitude bounds of grid. - earth_radius_minor_axis : float - Radius of the minor axis of the Earth. - earth_radius_major_axis : float - Radius of the major axis of the Earth. - """ - - # Calculate area for each grid cell - n_cells = grid_corner_lon.shape[0] - area = empty(shape=(n_cells,)) - for i in range(0, n_cells): - area[i] = __mod_huiliers_area(grid_corner_lon[i], grid_corner_lat[i]) - - return area*earth_radius_minor_axis*earth_radius_major_axis - - -def __mod_huiliers_area(cell_corner_lon, cell_corner_lat): - """ - Calculate the area of each cell according to Huilier's theorem. - Reference: CDO (https://earth.bsc.es/gitlab/ces/cdo/). - - Parameters - ---------- - cell_corner_lon : array - Longitude boundaries of each cell. - cell_corner_lat : array - Latitude boundaries of each cell. - """ - - my_sum = 0 - - # Get points 0 (bottom left) and 1 (bottom right) in Earth coordinates - point_0 = __lon_lat_to_cartesian(cell_corner_lon[0], cell_corner_lat[0], earth_radius_major_axis=1) - point_1 = __lon_lat_to_cartesian(cell_corner_lon[1], cell_corner_lat[1], earth_radius_major_axis=1) - point_0, point_1 = point_0[0], point_1[0] - - # Get number of vertices - if cell_corner_lat[0] == cell_corner_lat[-1]: - spatial_nv = len(cell_corner_lon) - 1 - else: - spatial_nv = len(cell_corner_lon) - - for i in range(2, spatial_nv): - - # Get point 2 (top right) in Earth coordinates - point_2 = __lon_lat_to_cartesian(cell_corner_lon[i], cell_corner_lat[i], earth_radius_major_axis=1) - point_2 = point_2[0] - - # Calculate area of triangle between points 0, 1 and 2 - my_sum += __tri_area(point_0, point_1, point_2) - - # Copy to calculate area of next triangle - if i == (spatial_nv - 1): - point_1 = deepcopy(point_2) - - return my_sum - - -def __tri_area(point_0, point_1, point_2): - """ - Calculate area between three points that form a triangle. - Reference: CDO (https://earth.bsc.es/gitlab/ces/cdo/). - - Parameters - ---------- - point_0 : array - Position of first point in cartesian coordinates. - point_1 : array - Position of second point in cartesian coordinates. - point_2 : array - Position of third point in cartesian coordinates. - """ - - # Get length of side a (between point 0 and 1) - tmp_vec = __cross_product(point_0, point_1) - sin_a = __norm(tmp_vec) - a = arcsin(sin_a) - - # Get length of side b (between point 0 and 2) - tmp_vec = __cross_product(point_0, point_2) - sin_b = __norm(tmp_vec) - b = arcsin(sin_b) - - # Get length of side c (between point 1 and 2) - tmp_vec = __cross_product(point_2, point_1) - sin_c = __norm(tmp_vec) - c = arcsin(sin_c) - - # Calculate area - s = 0.5*(a+b+c) - t = tan(s*0.5) * tan((s - a)*0.5) * tan((s - b)*0.5) * tan((s - c)*0.5) - area = fabs(4.0 * arctan(sqrt(fabs(t)))) - - return area - - -def __cross_product(a, b): - """ - Calculate cross product between two points. - - Parameters - ---------- - a : array - Position of point A in cartesian coordinates. - b : array - Position of point B in cartesian coordinates. - """ - - return [a[1]*b[2] - a[2]*b[1], - a[2]*b[0] - a[0]*b[2], - a[0]*b[1] - a[1]*b[0]] - - -def __norm(cp): - """ - Normalize the result of the cross product operation. - - Parameters - ---------- - cp : array - Cross product between two points. - """ - - return sqrt(cp[0]*cp[0] + cp[1]*cp[1] + cp[2]*cp[2]) - - -# noinspection DuplicatedCode -def __lon_lat_to_cartesian(lon, lat, earth_radius_major_axis=6378137.0): - """ - Calculate lon, lat coordinates of a point on a sphere. - - Parameters - ---------- - lon : array - Longitude values. - lat : array - Latitude values. - earth_radius_major_axis : float - Radius of the major axis of the Earth. - """ - - lon_r = radians(lon) - lat_r = radians(lat) - - x = earth_radius_major_axis * cos(lat_r) * cos(lon_r) - y = earth_radius_major_axis * cos(lat_r) * sin(lon_r) - z = earth_radius_major_axis * sin(lat_r) - - return column_stack([x, y, z]) diff --git a/build/lib/nes/methods/horizontal_interpolation.py b/build/lib/nes/methods/horizontal_interpolation.py deleted file mode 100644 index 25efef6..0000000 --- a/build/lib/nes/methods/horizontal_interpolation.py +++ /dev/null @@ -1,762 +0,0 @@ -#!/usr/bin/env python - -import sys -import os -import nes -from warnings import warn, filterwarnings -from numpy import (ma, empty, nansum, concatenate, pad, nan, array, float64, int64, float32, meshgrid, expand_dims, - reciprocal, arange, uint32, array_split, radians, cos, sin, column_stack, zeros) -from pandas import concat, DataFrame -from mpi4py import MPI -from scipy import spatial -from filelock import FileLock -from datetime import datetime -from copy import deepcopy -from pyproj import Proj, Transformer, CRS -import gc - -# CONSTANTS -NEAREST_OPTS = ["NearestNeighbour", "NearestNeighbours", "nn", "NN"] -CONSERVATIVE_OPTS = ["Conservative", "Area_Conservative", "cons", "conservative", "area"] - - -def interpolate_horizontal(self, dst_grid, weight_matrix_path=None, kind="NearestNeighbour", n_neighbours=4, - info=False, to_providentia=False, only_create_wm=False, wm=None, flux=False): - """ - Horizontal methods from one grid to another one. - - Parameters - ---------- - self : nes.Nes - Source projection Nes Object. - dst_grid : nes.Nes - Final projection Nes object. - weight_matrix_path : str, None - Path to the weight matrix to read/create. - kind : str - Kind of horizontal interpolation. Accepted values: ["NearestNeighbour", "Conservative"]. - n_neighbours : int - Used if kind == NearestNeighbour. Number of nearest neighbours to interpolate. Default: 4. - info : bool - Indicates if you want to print extra info during the methods process. - to_providentia : bool - Indicates if we want the interpolated grid in Providentia format. - only_create_wm : bool - Indicates if you want to only create the Weight Matrix. - wm : Nes - Weight matrix Nes File. - flux : bool - Indicates if you want to calculate the weight matrix for flux variables. - """ - if info and self.master: - print("Creating Weight Matrix") - - # Obtain weight matrix - if self.parallel_method == "T": - weights, idx = __get_weights_idx_t_axis(self, dst_grid, weight_matrix_path, kind, n_neighbours, - only_create_wm, wm, flux) - elif self.parallel_method in ["Y", "X"]: - weights, idx = __get_weights_idx_xy_axis(self, dst_grid, weight_matrix_path, kind, n_neighbours, - only_create_wm, wm, flux) - else: - raise NotImplementedError("Parallel method {0} is not implemented yet for horizontal interpolations.".format( - self.parallel_method) + "Use 'T'") - - if info and self.master: - print("Weight Matrix done!") - if only_create_wm: - # weights for only_create is the WM NES object - return weights - - # idx[idx < 0] = nan - idx = ma.masked_array(idx, mask=idx == -999) - # idx = array(idx, dtype=float) - # idx[idx < 0] = nan - # weights[weights < 0] = nan - weights = ma.masked_array(weights, mask=weights == -999) - # weights = array(weights, dtype=float) - # weights[weights < 0] = nan - - # Copy NES - final_dst = dst_grid.copy() - - sys.stdout.flush() - final_dst.set_communicator(dst_grid.comm) - - # Remove original file information - final_dst.__ini_path = None - final_dst.netcdf = None - final_dst.dataset = None - - # Return final_dst - final_dst.lev = self.lev - final_dst.set_full_levels(self.get_full_levels()) - final_dst.time = self.time - final_dst.set_full_times(self.get_full_times()) - final_dst.hours_start = self.hours_start - final_dst.hours_end = self.hours_end - - if info and self.master: - print("Applying weights") - # Apply weights - for var_name, var_info in self.variables.items(): - if info and self.master: - print("\t{var} horizontal interpolation".format(var=var_name)) - sys.stdout.flush() - src_shape = var_info["data"].shape - if isinstance(dst_grid, nes.PointsNes): - dst_shape = (src_shape[0], src_shape[1], idx.shape[-1]) - else: - dst_shape = (src_shape[0], src_shape[1], idx.shape[-2], idx.shape[-1]) - # Creating new variable without data - final_dst.variables[var_name] = {attr_name: attr_value for attr_name, attr_value in var_info.items() - if attr_name != "data"} - # Creating empty data - final_dst.variables[var_name]["data"] = empty(dst_shape) - - # src_data = var_info["data"].reshape((src_shape[0], src_shape[1], src_shape[2] * src_shape[3])) - for time in range(dst_shape[0]): - for lev in range(dst_shape[1]): - src_aux = __get_src_data(self.comm, var_info["data"][time, lev], idx, self.parallel_method) - final_dst.variables[var_name]["data"][time, lev] = nansum(weights * src_aux, axis=1) - - if isinstance(dst_grid, nes.PointsNes): - # Removing level axis - if src_shape[1] != 1: - raise IndexError("Data with vertical levels cannot be interpolated to points") - final_dst.variables[var_name]["data"] = final_dst.variables[var_name]["data"].reshape( - (src_shape[0], idx.shape[-1])) - if isinstance(dst_grid, nes.PointsNesGHOST) and not to_providentia: - final_dst = final_dst.to_points() - - final_dst.global_attrs = self.global_attrs - - if info and self.master: - print("Formatting") - - if to_providentia: - # self = experiment to interpolate (regular, rotated, etc.) - # final_dst = interpolated experiment (points) - if isinstance(final_dst, nes.PointsNes): - model_centre_lat, model_centre_lon = self.create_providentia_exp_centre_coordinates() - grid_edge_lat, grid_edge_lon = self.create_providentia_exp_grid_edge_coordinates() - final_dst = final_dst.to_providentia(model_centre_lon=model_centre_lon, - model_centre_lat=model_centre_lat, - grid_edge_lon=grid_edge_lon, - grid_edge_lat=grid_edge_lat) - else: - msg = "The final projection must be points to interpolate an experiment and get it in Providentia format." - warn(msg) - sys.stderr.flush() - else: - # Convert dimensions (time, lev, lat, lon) or (time, lat, lon) to (time, station) for interpolated variables - # and reshape data - if isinstance(final_dst, nes.PointsNes): - for var_name, var_info in final_dst.variables.items(): - if len(var_info["dimensions"]) != len(var_info["data"].shape): - final_dst.variables[var_name]["dimensions"] = ("time", "station") - - return final_dst - - -def __get_src_data(comm, var_data, idx, parallel_method): - """ - To obtain the needed src data to interpolate. - - Parameters - ---------- - comm : MPI.Comm. - MPI communicator. - var_data : array - Rank source data. - idx : array - Index of the needed data in a 2D flatten way. - parallel_method: str - Source parallel method. - - Returns - ------- - array - Flatten source needed data. - """ - - if parallel_method == "T": - var_data = var_data.flatten() - else: - var_data = comm.gather(var_data, root=0) - if comm.Get_rank() == 0: - if parallel_method == "Y": - axis = 0 - elif parallel_method == "X": - axis = 1 - else: - raise NotImplementedError(parallel_method) - var_data = concatenate(var_data, axis=axis) - var_data = var_data.flatten() - - var_data = comm.bcast(var_data) - - var_data = pad(var_data, [1, 1], "constant", constant_values=nan).take(idx + 1, mode="clip") - - return var_data - - -# noinspection DuplicatedCode -def __get_weights_idx_t_axis(self, dst_grid, weight_matrix_path, kind, n_neighbours, only_create, wm, flux): - """ - To obtain the weights and source data index through the T axis. - - Parameters - ---------- - self : nes.Nes - Source projection Nes Object. - dst_grid : nes.Nes - Final projection Nes object. - weight_matrix_path : str, None - Path to the weight matrix to read/create. - kind : str - Kind of horizontal interpolation. Accepted values: ["NearestNeighbour", "Conservative"]. - n_neighbours : int - Used if kind == NearestNeighbour. Number of nearest neighbours to interpolate. Default: 4. - only_create : bool - Indicates if you want to only create the Weight Matrix. - wm : Nes - Weight matrix Nes File. - flux : bool - Indicates if you want to calculate the weight matrix for flux variables. - - Returns - ------- - tuple - Weights and source data index. - """ - weight_matrix = None - - if wm is not None: - weight_matrix = wm - - elif weight_matrix_path is not None: - with FileLock(weight_matrix_path + "{0:03d}.lock".format(self.rank)): - if os.path.isfile(weight_matrix_path): - if self.master: - weight_matrix = __read_weight_matrix(weight_matrix_path, comm=MPI.COMM_SELF) - else: - weight_matrix = True - if kind in NEAREST_OPTS: - if self.master: - if len(weight_matrix.lev["data"]) != n_neighbours: - warn("The selected weight matrix does not have the same number of nearest neighbours." + - "Re-calculating again but not saving it.") - sys.stderr.flush() - weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours) - else: - weight_matrix = True - - else: - if self.master: - if kind in NEAREST_OPTS: - weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours, - wm_path=weight_matrix_path) - elif kind in CONSERVATIVE_OPTS: - weight_matrix = __create_area_conservative_weight_matrix( - self, dst_grid, wm_path=weight_matrix_path, flux=flux) - else: - raise NotImplementedError(kind) - else: - weight_matrix = True - - if os.path.exists(weight_matrix_path + "{0:03d}.lock".format(self.rank)): - os.remove(weight_matrix_path + "{0:03d}.lock".format(self.rank)) - else: - if self.master: - if kind in NEAREST_OPTS: - weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours) - elif kind in CONSERVATIVE_OPTS: - weight_matrix = __create_area_conservative_weight_matrix(self, dst_grid, flux=flux) - else: - raise NotImplementedError(kind) - else: - weight_matrix = True - - if only_create: - return weight_matrix, None - - if self.master: - if kind in NEAREST_OPTS: - # Normalize to 1 - weights = array(array(weight_matrix.variables["weight"]["data"], dtype=float64) / - array(weight_matrix.variables["weight"]["data"], dtype=float64).sum(axis=1), - dtype=float64) - else: - weights = array(weight_matrix.variables["weight"]["data"], dtype=float64) - idx = array(weight_matrix.variables["idx"]["data"][0], dtype=int) - else: - weights = None - idx = None - - weights = self.comm.bcast(weights, root=0) - idx = self.comm.bcast(idx, root=0) - - return weights, idx - - -# noinspection DuplicatedCode -def __get_weights_idx_xy_axis(self, dst_grid, weight_matrix_path, kind, n_neighbours, only_create, wm, flux): - """ - To obtain the weights and source data index through the X or Y axis. - - Parameters - ---------- - self : nes.Nes - Source projection Nes Object. - dst_grid : nes.Nes - Final projection Nes object. - weight_matrix_path : str, None - Path to the weight matrix to read/create. - kind : str - Kind of horizontal interpolation. Accepted values: ["NearestNeighbour", "Conservative"]. - n_neighbours : int - Used if kind == NearestNeighbour. Number of nearest neighbours to interpolate. Default: 4. - only_create : bool - Indicates if you want to only create the Weight Matrix. - wm : Nes - Weight matrix Nes File. - flux : bool - Indicates if you want to calculate the weight matrix for flux variables. - - Returns - ------- - tuple - Weights and source data index. - """ - weight_matrix = None - - if isinstance(dst_grid, nes.PointsNes) and weight_matrix_path is not None: - if self.master: - warn("To point weight matrix cannot be saved.") - sys.stderr.flush() - weight_matrix_path = None - - if wm is not None: - weight_matrix = wm - - elif weight_matrix_path is not None: - with FileLock(weight_matrix_path + "{0:03d}.lock".format(self.rank)): - if os.path.isfile(weight_matrix_path): - if self.master: - weight_matrix = __read_weight_matrix(weight_matrix_path, comm=MPI.COMM_SELF) - else: - weight_matrix = True - if kind in NEAREST_OPTS: - if self.master: - if len(weight_matrix.lev["data"]) != n_neighbours: - warn("The selected weight matrix does not have the same number of nearest neighbours." + - "Re-calculating again but not saving it.") - sys.stderr.flush() - weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours) - else: - weight_matrix = True - else: - if kind in NEAREST_OPTS: - if self.master: - weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours, - wm_path=weight_matrix_path) - else: - weight_matrix = True - elif kind in CONSERVATIVE_OPTS: - weight_matrix = __create_area_conservative_weight_matrix( - self, dst_grid, wm_path=weight_matrix_path, flux=flux) - else: - raise NotImplementedError(kind) - - if os.path.exists(weight_matrix_path + "{0:03d}.lock".format(self.rank)): - os.remove(weight_matrix_path + "{0:03d}.lock".format(self.rank)) - else: - if kind in NEAREST_OPTS: - if self.master: - weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours) - else: - weight_matrix = True - elif kind in CONSERVATIVE_OPTS: - weight_matrix = __create_area_conservative_weight_matrix(self, dst_grid, flux=flux) - else: - raise NotImplementedError(kind) - - if only_create: - return weight_matrix, None - - # Normalize to 1 - if self.master: - if kind in NEAREST_OPTS: - weights = array(array(weight_matrix.variables["weight"]["data"], dtype=float64) / - array(weight_matrix.variables["weight"]["data"], dtype=float64).sum(axis=1), - dtype=float64) - else: - weights = array(weight_matrix.variables["weight"]["data"], dtype=float64) - idx = array(weight_matrix.variables["idx"]["data"][0], dtype=int64) - else: - weights = None - idx = None - - weights = self.comm.bcast(weights, root=0) - idx = self.comm.bcast(idx, root=0) - - # if isinstance(dst_grid, nes.PointsNes): - # print("weights 1 ->", weights.shape) - # print("idx 1 ->", idx.shape) - # weights = weights[:, dst_grid.write_axis_limits["x_min"]:dst_grid.write_axis_limits["x_max"]] - # idx = idx[dst_grid.write_axis_limits["x_min"]:dst_grid.write_axis_limits["x_max"]] - # else: - weights = weights[:, :, dst_grid.write_axis_limits["y_min"]:dst_grid.write_axis_limits["y_max"], - dst_grid.write_axis_limits["x_min"]:dst_grid.write_axis_limits["x_max"]] - idx = idx[:, dst_grid.write_axis_limits["y_min"]:dst_grid.write_axis_limits["y_max"], - dst_grid.write_axis_limits["x_min"]:dst_grid.write_axis_limits["x_max"]] - # print("weights 2 ->", weights.shape) - # print("idx 2 ->", idx.shape) - - return weights, idx - - -def __read_weight_matrix(weight_matrix_path, comm=None, parallel_method="T"): - """ - Read weight matrix. - - Parameters - ---------- - weight_matrix_path : str - Path of the weight matrix. - comm : MPI.Comm - A Communicator to read the weight matrix. - parallel_method : str - Nes parallel method to read the weight matrix. - - Returns - ------- - nes.Nes - Weight matrix. - """ - - weight_matrix = nes.open_netcdf(path=weight_matrix_path, comm=comm, parallel_method=parallel_method, balanced=True) - weight_matrix.load() - - # In previous versions of NES weight was called inverse_dists - if "inverse_dists" in weight_matrix.variables.keys(): - weight_matrix.variables["weight"] = weight_matrix.variables["inverse_dists"] - - weight_matrix.variables["weight"]["data"][weight_matrix.variables["weight"]["data"] <= 0] = nan - weight_matrix.variables["weight"]["data"][weight_matrix.variables["idx"]["data"] <= 0] = nan - - return weight_matrix - - -# noinspection DuplicatedCode,PyProtectedMember -def __create_nn_weight_matrix(self, dst_grid, n_neighbours=4, wm_path=None, info=False): - """ - To create the weight matrix with the nearest neighbours method. - - Parameters - ---------- - self : nes.Nes - Source projection Nes Object. - dst_grid : nes.Nes - Final projection Nes object. - n_neighbours : int - Used if kind == NearestNeighbour. Number of nearest neighbours to interpolate. Default: 4. - wm_path : str - Path where write the weight matrix. - info: bool - Indicates if you want to print extra info during the methods process. - - Returns - ------- - nes.Nes - Weight matrix. - """ - # Only master is here. - if info and self.master: - print("\tCreating Nearest Neighbour Weight Matrix with {0} neighbours".format(n_neighbours)) - sys.stdout.flush() - # Source - src_lat = array(self._full_lat["data"], dtype=float32) - src_lon = array(self._full_lon["data"], dtype=float32) - - # 1D to 2D coordinates - if len(src_lon.shape) == 1: - src_lon, src_lat = meshgrid(src_lon, src_lat) - - # Destination - dst_lat = array(dst_grid._full_lat["data"], dtype=float32) - dst_lon = array(dst_grid._full_lon["data"], dtype=float32) - - if isinstance(dst_grid, nes.PointsNes): - dst_lat = expand_dims(dst_grid._full_lat["data"], axis=0) - dst_lon = expand_dims(dst_grid._full_lon["data"], axis=0) - else: - # 1D to 2D coordinates - if len(dst_lon.shape) == 1: - dst_lon, dst_lat = meshgrid(dst_lon, dst_lat) - - # calculate N nearest neighbour inverse distance weights (and indices) - # from gridcells centres of model 1 to each grid cell centre of model 2 - # model geographic longitude/latitude coordinates are first converted - # to cartesian ECEF (Earth Centred, Earth Fixed) coordinates, before - # calculating distances. - - # src_mod_xy = lon_lat_to_cartesian(src_lon.flatten(), src_lat.flatten()) - # dst_mod_xy = lon_lat_to_cartesian(dst_lon.flatten(), dst_lat.flatten()) - - src_mod_xy = __lon_lat_to_cartesian_ecef(src_lon.flatten(), src_lat.flatten()) - dst_mod_xy = __lon_lat_to_cartesian_ecef(dst_lon.flatten(), dst_lat.flatten()) - - # generate KDtree using model 1 coordinates (i.e. the model grid you are - # interpolating from) - src_tree = spatial.cKDTree(src_mod_xy) - - # get n-neighbour nearest distances/indices (ravel form) of model 1 grid cell - # centres from each model 2 grid cell centre - - dists, idx = src_tree.query(dst_mod_xy, k=n_neighbours) - # self.nearest_neighbour_inds = \ - # column_stack(unravel_index(idx, lon.shape)) - - weight_matrix = dst_grid.copy() - weight_matrix.time = [datetime(year=2000, month=1, day=1, hour=0, second=0, microsecond=0)] - weight_matrix._full_time = [datetime(year=2000, month=1, day=1, hour=0, second=0, microsecond=0)] - weight_matrix._full_time_bnds = None - weight_matrix.time_bnds = None - weight_matrix.last_level = None - weight_matrix.first_level = 0 - weight_matrix.hours_start = 0 - weight_matrix.hours_end = 0 - - weight_matrix.set_communicator(MPI.COMM_SELF) - # take the reciprocals of the nearest neighbours distances - dists[dists < 1] = 1 - inverse_dists = reciprocal(dists) - - inverse_dists_transf = inverse_dists.T.reshape((1, n_neighbours, dst_lon.shape[0], dst_lon.shape[1])) - weight_matrix.variables["weight"] = {"data": inverse_dists_transf, "units": "m"} - idx_transf = idx.T.reshape((1, n_neighbours, dst_lon.shape[0], dst_lon.shape[1])) - weight_matrix.variables["idx"] = {"data": idx_transf, "units": ""} - weight_matrix.lev = {"data": arange(inverse_dists_transf.shape[1]), "units": ""} - weight_matrix._full_lev = {"data": arange(inverse_dists_transf.shape[1]), "units": ""} - if wm_path is not None: - weight_matrix.to_netcdf(wm_path) - - return weight_matrix - - -# noinspection DuplicatedCode -def __create_area_conservative_weight_matrix(self, dst_nes, wm_path=None, flux=False, info=False): - """ - To create the weight matrix with the area conservative method. - - Parameters - ---------- - self : nes.Nes - Source projection Nes Object. - dst_nes : nes.Nes - Final projection Nes object. - wm_path : str - Path where write the weight matrix. - flux : bool - Indicates if you want to calculate the weight matrix for flux variables. - info: bool - Indicates if you want to print extra info during the methods process. - - Returns - ------- - nes.Nes - Weight matrix. - """ - - if info and self.master: - print("\tCreating area conservative Weight Matrix") - sys.stdout.flush() - - my_crs = CRS.from_proj4("+proj=latlon") # Common projection for both shapefiles - - # Get a portion of the destiny grid - if dst_nes.shapefile is None: - dst_nes.create_shapefile() - dst_grid = deepcopy(dst_nes.shapefile) - - # Formatting Destination grid - dst_grid.to_crs(crs=my_crs, inplace=True) - dst_grid["FID_dst"] = dst_grid.index - - # Preparing Source grid - if self.shapefile is None: - self.create_shapefile() - src_grid = deepcopy(self.shapefile) - - # Formatting Source grid - src_grid.to_crs(crs=my_crs, inplace=True) - - # Serialize index intersection function to avoid memory problems - if self.size > 1 and self.parallel_method != "T": - src_grid = self.comm.gather(src_grid, root=0) - dst_grid = self.comm.gather(dst_grid, root=0) - if self.master: - src_grid = concat(src_grid) - dst_grid = concat(dst_grid) - if self.master: - src_grid["FID_src"] = src_grid.index - src_grid = src_grid.reset_index() - dst_grid = dst_grid.reset_index() - fid_src, fid_dst = dst_grid.sindex.query(src_grid.geometry, predicate="intersects") - - # Calculate intersected areas and fractions - intersection_df = DataFrame(columns=["FID_src", "FID_dst"]) - - intersection_df["FID_src"] = array(src_grid.loc[fid_src, "FID_src"], dtype=uint32) - intersection_df["FID_dst"] = array(dst_grid.loc[fid_dst, "FID_dst"], dtype=uint32) - - intersection_df["geometry_src"] = src_grid.loc[fid_src, "geometry"].values - intersection_df["geometry_dst"] = dst_grid.loc[fid_dst, "geometry"].values - del src_grid, dst_grid, fid_src, fid_dst - # Split the array into smaller arrays in order to scatter the data among the processes - intersection_df = array_split(intersection_df, self.size) - else: - intersection_df = None - - intersection_df = self.comm.scatter(intersection_df, root=0) - - if info and self.master: - print("\t\tGrids created and ready to interpolate") - sys.stdout.flush() - if True: - # No Warnings Zone - filterwarnings("ignore") - # intersection_df["weight"] = array(intersection_df.apply( - # lambda x: x["geometry_src"].intersection(x["geometry_dst"]).buffer(0).area / x["geometry_src"].area, - # axis=1), dtype=float64) - if flux: - intersection_df["weight"] = array(intersection_df.apply( - lambda x: (x["geometry_src"].intersection(x["geometry_dst"]).buffer(0).area / x["geometry_src"].area) * - (nes.Nes.calculate_geometry_area([x["geometry_src"]])[0] / - nes.Nes.calculate_geometry_area([x["geometry_dst"]])[0]), - axis=1), dtype=float64) - else: - intersection_df["weight"] = array(intersection_df.apply( - lambda x: x["geometry_src"].intersection(x["geometry_dst"]).buffer(0).area / x["geometry_src"].area, - axis=1), dtype=float64) - - intersection_df.drop(columns=["geometry_src", "geometry_dst"], inplace=True) - gc.collect() - filterwarnings("default") - - # Format & Clean - if info and self.master: - print("\t\tWeights calculated. Formatting weight matrix.") - sys.stdout.flush() - - # Initialising weight matrix - if self.parallel_method != "T": - intersection_df = self.comm.gather(intersection_df, root=0) - if self.master: - if self.parallel_method != "T": - intersection_df = concat(intersection_df) - intersection_df = intersection_df.set_index( - ["FID_dst", intersection_df.groupby("FID_dst").cumcount()]).rename_axis(("FID", "level")).sort_index() - intersection_df.rename(columns={"FID_src": "idx"}, inplace=True) - weight_matrix = dst_nes.copy() - weight_matrix.time = [datetime(year=2000, month=1, day=1, hour=0, second=0, microsecond=0)] - weight_matrix._full_time = [datetime(year=2000, month=1, day=1, hour=0, second=0, microsecond=0)] - weight_matrix._full_time_bnds = None - weight_matrix.time_bnds = None - weight_matrix.last_level = None - weight_matrix.first_level = 0 - weight_matrix.hours_start = 0 - weight_matrix.hours_end = 0 - - weight_matrix.set_communicator(MPI.COMM_SELF) - - weight_matrix.set_levels({"data": arange(intersection_df.index.get_level_values("level").max() + 1), - "dimensions": ("lev",), - "units": "", - "positive": "up"}) - - # Creating Weight matrix empty variables - wm_shape = weight_matrix.get_full_shape() - shape = (1, len(weight_matrix.lev["data"]), wm_shape[0], wm_shape[1],) - shape_flat = (1, len(weight_matrix.lev["data"]), wm_shape[0] * wm_shape[1],) - - weight_matrix.variables["weight"] = {"data": empty(shape_flat), "units": "-"} - weight_matrix.variables["weight"]["data"][:] = -999 - weight_matrix.variables["idx"] = {"data": empty(shape_flat), "units": "-"} - weight_matrix.variables["idx"]["data"][:] = -999 - - # Filling Weight matrix variables - for aux_lev in weight_matrix.lev["data"]: - aux_data = intersection_df.xs(level="level", key=aux_lev) - weight_matrix.variables["weight"]["data"][0, aux_lev, aux_data.index] = aux_data.loc[:, "weight"].values - weight_matrix.variables["idx"]["data"][0, aux_lev, aux_data.index] = aux_data.loc[:, "idx"].values - # Re-shaping - weight_matrix.variables["weight"]["data"] = weight_matrix.variables["weight"]["data"].reshape(shape) - weight_matrix.variables["idx"]["data"] = weight_matrix.variables["idx"]["data"].reshape(shape) - if wm_path is not None: - if info and self.master: - print("\t\tWeight matrix saved at {0}".format(wm_path)) - sys.stdout.flush() - weight_matrix.to_netcdf(wm_path) - else: - weight_matrix = True - return weight_matrix - - -# noinspection DuplicatedCode -def __lon_lat_to_cartesian(lon, lat, radius=6378137.0): - """ - Calculate lon, lat coordinates of a point on a sphere. - - DEPRECATED!!!! - - Parameters - ---------- - lon : array - Longitude values. - lat : array - Latitude values. - radius : float - Radius of the sphere to get the distances. - """ - - lon_r = radians(lon) - lat_r = radians(lat) - - x = radius * cos(lat_r) * cos(lon_r) - y = radius * cos(lat_r) * sin(lon_r) - z = radius * sin(lat_r) - - return column_stack([x, y, z]) - - -def __lon_lat_to_cartesian_ecef(lon, lat): - """ - Convert observational/model geographic longitude/latitude coordinates to cartesian ECEF (Earth Centred, - Earth Fixed) coordinates, assuming WGS84 datum and ellipsoid, and that all heights = 0. - ECEF coordinates represent positions (in meters) as X, Y, Z coordinates, approximating the earth surface - as an ellipsoid of revolution. - This conversion is for the subsequent calculation of Euclidean distances of the model grid cell centres - from each observational station. - Defining the distance between two points on the earth's surface as simply the Euclidean distance - between the two lat/lon pairs could lead to inaccurate results depending on the distance - between two points (i.e. 1 deg. of longitude varies with latitude). - - Parameters - ---------- - lon : array - Longitude values. - lat : array - Latitude values. - """ - - lla = Proj(proj="latlong", ellps="WGS84", datum="WGS84") - ecef = Proj(proj="geocent", ellps="WGS84", datum="WGS84") - - # x, y, z = pyproj.transform(lla, ecef, lon, lat, zeros(lon.shape), radians=False) - # Deprecated: https://pyproj4.github.io/pyproj/stable/gotchas.html#upgrading-to-pyproj-2-from-pyproj-1 - transformer = Transformer.from_proj(lla, ecef) - x, y, z = transformer.transform(lon, lat, zeros(lon.shape), radians=False) - return column_stack([x, y, z]) diff --git a/build/lib/nes/methods/spatial_join.py b/build/lib/nes/methods/spatial_join.py deleted file mode 100644 index eb35864..0000000 --- a/build/lib/nes/methods/spatial_join.py +++ /dev/null @@ -1,305 +0,0 @@ -#!/usr/bin/env python - -import sys -from warnings import warn, filterwarnings -from geopandas import sjoin_nearest, sjoin, read_file -from pandas import DataFrame -from numpy import array, uint32, nan -from shapely.errors import TopologicalError - - -def spatial_join(self, ext_shp, method=None, var_list=None, info=False, apply_bbox=True): - """ - Compute overlay intersection of two GeoPandasDataFrames. - - Parameters - ---------- - self : nes.Nes - A Nes Object. - ext_shp : GeoPandasDataFrame or str - File or path from where the data will be obtained on the intersection. - method : str - Overlay method. Accepted values: ["nearest", "intersection", "centroid"]. - var_list : List or None or str - Variables that will be included in the resulting shapefile. - info : bool - Indicates if you want to print the process info. - apply_bbox : bool - Indicates if you want to reduce the shapefile to a bbox. - """ - - if self.master and info: - print("Starting spatial join") - if isinstance(var_list, str): - # Transforming string (variable name) to a list with length 0 - var_list = [var_list] - - # Create source shapefile if it does not exist - if self.shapefile is None: - if self.master and info: - print("\tCreating shapefile") - sys.stdout.flush() - self.create_shapefile() - - ext_shp = __prepare_external_shapefile(self, ext_shp=ext_shp, var_list=var_list, info=info, - apply_bbox=apply_bbox) - - if method == "nearest": - # Nearest centroids to the shapefile polygons - __spatial_join_nearest(self, ext_shp=ext_shp, info=info) - elif method == "intersection": - # Intersect the areas of the shapefile polygons, outside the shapefile there will be NaN - __spatial_join_intersection(self, ext_shp=ext_shp, info=info) - elif method == "centroid": - # Centroids that fall on the shapefile polygons, outside the shapefile there will be NaN - __spatial_join_centroid(self, ext_shp=ext_shp, info=info) - - else: - accepted_values = ["nearest", "intersection", "centroid"] - raise NotImplementedError("{0} is not implemented. Choose from: {1}".format(method, accepted_values)) - - return None - - -def __prepare_external_shapefile(self, ext_shp, var_list, info=False, apply_bbox=True): - """ - Prepare the external shapefile. - - It is high recommended to pass ext_shp parameter as string because it will clip the external shapefile to the rank. - - 1. Read if it is not already read - 2. Filter variables list - 3. Standardize projections - - Parameters - ---------- - self : nes.Nes - A Nes Object. - ext_shp : geopandas.GeoDataFrame or str - External shapefile or path to it. - var_list : List[str] or None - External shapefile variables to be computed. - info : bool - Indicates if you want to print the information. - apply_bbox : bool - Indicates if you want to reduce the shapefile to a bbox. - - Returns - ------- - GeoDataFrame - External shapefile. - """ - - if isinstance(ext_shp, str): - # Reading external shapefile - if self.master and info: - print("\tReading external shapefile") - # ext_shp = read_file(ext_shp, include_fields=var_list, mask=self.shapefile.geometry) - if apply_bbox: - ext_shp = read_file(ext_shp, include_fields=var_list, bbox=__get_bbox(self)) - else: - ext_shp = read_file(ext_shp, include_fields=var_list) - else: - msg = "WARNING!!! " - msg += "External shapefile already read. If you pass the path to the shapefile instead of the opened shapefile " - msg += "a best usage of memory is performed because the external shape will be clipped while reading." - warn(msg) - sys.stderr.flush() - ext_shp.reset_index(inplace=True) - if var_list is not None: - ext_shp = ext_shp.loc[:, var_list + ["geometry"]] - - self.comm.Barrier() - if self.master and info: - print("\t\tReading external shapefile done!") - - # Standardizing projection - ext_shp = ext_shp.to_crs(self.shapefile.crs) - - return ext_shp - - -def __get_bbox(self): - """ - Obtain the bounding box of the rank data (lon_min, lat_min, lon_max, lat_max). - - Parameters - ---------- - self : nes.Nes - A Nes Object. - - Returns - ------- - tuple - Bounding box - """ - - bbox = (self.lon_bnds["data"].min(), self.lat_bnds["data"].min(), - self.lon_bnds["data"].max(), self.lat_bnds["data"].max(), ) - - return bbox - - -# noinspection DuplicatedCode -def __spatial_join_nearest(self, ext_shp, info=False): - """ - Perform the spatial join using the nearest method. - - Parameters - ---------- - self : nes.Nes - A Nes Object. - ext_shp : GeoDataFrame - External shapefile. - info : bool - Indicates if you want to print the information. - """ - - if self.master and info: - print("\tNearest spatial joint") - sys.stdout.flush() - grid_shp = self.get_centroids_from_coordinates() - - # From geodetic coordinates (e.g. 4326) to meters (e.g. 4328) to use sjoin_nearest - # TODO: Check if the projection 4328 does not distort the coordinates too much - # https://gis.stackexchange.com/questions/372564/ - # userwarning-when-trying-to-get-centroid-from-a-polygon-geopandas - # ext_shp = ext_shp.to_crs("EPSG:4328") - # grid_shp = grid_shp.to_crs("EPSG:4328") - - # Calculate spatial joint by distance - aux_grid = sjoin_nearest(grid_shp, ext_shp, distance_col="distance") - - # Get data from closest shapes to centroids - del aux_grid["geometry"], aux_grid["index_right"] - self.shapefile.loc[aux_grid.index, aux_grid.columns] = aux_grid - - var_list = list(ext_shp.columns) - var_list.remove("geometry") - for var_name in var_list: - self.shapefile.loc[:, var_name] = array(self.shapefile.loc[:, var_name], dtype=ext_shp[var_name].dtype) - - return None - - -# noinspection DuplicatedCode -def __spatial_join_centroid(self, ext_shp, info=False): - """ - Perform the spatial join using the centroid method. - - Parameters - ---------- - self : nes.Nes - A Nes Object. - ext_shp : GeoDataFrame - External shapefile. - info : bool - Indicates if you want to print the information. - """ - - if self.master and info: - print("\tCentroid spatial join") - sys.stdout.flush() - if info and self.master: - print("\t\tCalculating centroids") - sys.stdout.flush() - - # Get centroids - grid_shp = self.get_centroids_from_coordinates() - - # Calculate spatial joint - if info and self.master: - print("\t\tCalculating centroid spatial join") - sys.stdout.flush() - aux_grid = sjoin(grid_shp, ext_shp, predicate="within") - - # Get data from shapes where there are centroids, rest will be NaN - del aux_grid["geometry"], aux_grid["index_right"] - self.shapefile.loc[aux_grid.index, aux_grid.columns] = aux_grid - - var_list = list(ext_shp.columns) - var_list.remove("geometry") - for var_name in var_list: - self.shapefile.loc[:, var_name] = array(self.shapefile.loc[:, var_name], dtype=ext_shp[var_name].dtype) - - return None - - -def __spatial_join_intersection(self, ext_shp, info=False): - """ - Perform the spatial join using the intersection method. - - Parameters - ---------- - self : nes.Nes - A Nes Object. - ext_shp : GeoDataFrame - External shapefile. - info : bool - Indicates if you want to print the information. - """ - - var_list = list(ext_shp.columns) - var_list.remove("geometry") - - grid_shp = self.shapefile - grid_shp["FID_grid"] = grid_shp.index - grid_shp = grid_shp.reset_index() - - # Get intersected areas - # inp, res = ext_shp.sindex.query(grid_shp.geometry, predicate="intersects") - inp, res = grid_shp.sindex.query(ext_shp.geometry, predicate="intersects") - - if info: - print("\t\tRank {0:03d}: {1} intersected areas found".format(self.rank, len(inp))) - sys.stdout.flush() - - # Calculate intersected areas and fractions - intersection = DataFrame(columns=["FID", "ext_shp_id", "weight"]) - intersection["FID"] = array(grid_shp.loc[res, "FID_grid"], dtype=uint32) - intersection["ext_shp_id"] = array(inp, dtype=uint32) - - if len(intersection) > 0: - if True: - # No Warnings Zone - counts = intersection["FID"].value_counts() - filterwarnings("ignore") - intersection.loc[:, "weight"] = 1. - - for i, row in intersection.iterrows(): - if isinstance(i, int) and i % 1000 == 0 and info: - print("\t\t\tRank {0:03d}: {1:.3f} %".format(self.rank, i * 100 / len(intersection))) - sys.stdout.flush() - # Filter to do not calculate percentages over 100% grid cells spatial joint - if counts[row["FID"]] > 1: - try: - intersection.loc[i, "weight"] = grid_shp.loc[res[i], "geometry"].intersection( - ext_shp.loc[inp[i], "geometry"]).area / grid_shp.loc[res[i], "geometry"].area - except TopologicalError: - # If for some reason the geometry is corrupted it should work with the buffer function - ext_shp.loc[[inp[i]], "geometry"] = ext_shp.loc[[inp[i]], "geometry"].buffer(0) - intersection.loc[i, "weight"] = grid_shp.loc[res[i], "geometry"].intersection( - ext_shp.loc[inp[i], "geometry"]).area / grid_shp.loc[res[i], "geometry"].area - # intersection["intersect_area"] = intersection.apply( - # lambda x: x["geometry_grid"].intersection(x["geometry_ext"]).area, axis=1) - intersection.drop(intersection[intersection["weight"] <= 0].index, inplace=True) - - filterwarnings("default") - - # Choose the biggest area from intersected areas with multiple options - intersection.sort_values("weight", ascending=False, inplace=True) - intersection = intersection.drop_duplicates(subset="FID", keep="first") - intersection = intersection.sort_values("FID").set_index("FID") - - for var_name in var_list: - self.shapefile.loc[intersection.index, var_name] = array( - ext_shp.loc[intersection["ext_shp_id"], var_name]) - - else: - for var_name in var_list: - self.shapefile.loc[:, var_name] = nan - - for var_name in var_list: - self.shapefile.loc[:, var_name] = array(self.shapefile.loc[:, var_name], dtype=ext_shp[var_name].dtype) - - return None diff --git a/build/lib/nes/methods/vertical_interpolation.py b/build/lib/nes/methods/vertical_interpolation.py deleted file mode 100644 index 23ca712..0000000 --- a/build/lib/nes/methods/vertical_interpolation.py +++ /dev/null @@ -1,335 +0,0 @@ -#!/usr/bin/env python - -import sys -from numpy import nan, flip, cumsum, nanmean, empty, ndarray, ma, float64, array, interp, where -from scipy.interpolate import interp1d -from copy import copy - - -def add_4d_vertical_info(self, info_to_add): - """ - To add the vertical information from other source. - - Parameters - ---------- - self : nes.Nes - Source Nes object. - info_to_add : nes.Nes, str - Nes object with the vertical information as variable or str with the path to the NetCDF file that contains - the vertical data. - """ - - vertical_var = list(self.concatenate(info_to_add)) - self.vertical_var_name = vertical_var[0] - - return None - - -def __parse_extrapolate(extrapolate) -> tuple: - """ - Parses the "extrapolate" parameter and returns a tuple representing the extrapolation options. - - Parameters - ---------- - extrapolate : bool or tuple or None or number or NaN - If bool: - - If True, both extrapolation options are set to "extrapolate". - - If False, extrapolation options are set to ("bottom", "top"). - If tuple: - - The first element represents the extrapolation option for the lower bound. - - The second element represents the extrapolation option for the upper bound. - - If any element is bool: - - If True, it represents "extrapolate". - - If False: - - If it"s the first element, it represents "bottom". - - If it"s the second element, it represents "top". - - If any element is None, it is replaced with numpy.nan. - - Other numeric values are kept as they are. - - If any element is NaN, it is kept as NaN. - If None: - - Both extrapolation options are set to (NaN, NaN). - If number: - - Both extrapolation options are set to the provided number. - If NaN: - - Both extrapolation options are set to NaN. - - Returns - ------- - tuple - A tuple representing the extrapolation options. If the input is invalid, it returns - ("extrapolate", "extrapolate"). - """ - if isinstance(extrapolate, bool): - if extrapolate: - extrapolate_options = ("extrapolate", "extrapolate") - else: - extrapolate_options = ("bottom", "top") - elif isinstance(extrapolate, tuple): - extrapolate_options = [None, None] - for i in range(len(extrapolate)): - if isinstance(extrapolate[i], bool): - if extrapolate[i]: - extrapolate_options[i] = "extrapolate" - else: - if i == 0: - extrapolate_options[i] = "bottom" - else: - extrapolate_options[i] = "top" - elif extrapolate[i] is None: - extrapolate_options[i] = nan - else: - extrapolate_options[i] = extrapolate[i] - extrapolate_options = tuple(extrapolate_options) - elif extrapolate is None: - extrapolate_options = ("bottom", "top") - else: - extrapolate_options = (extrapolate, extrapolate) - - return extrapolate_options - - -def interpolate_vertical(self, new_levels, new_src_vertical=None, kind="linear", extrapolate_options=False, info=None, - overwrite=False): - """ - Vertical interpolation. - - Parameters - ---------- - self : Nes - Source Nes object. - new_levels : List - A List of new vertical levels. - new_src_vertical : nes.Nes, str - Nes object with the vertical information as variable or str with the path to the NetCDF file that contains - the vertical data. - kind : str - Vertical methods type. - extrapolate_options : bool or tuple or None or number or NaN - If bool: - - If True, both extrapolation options are set to "extrapolate". - - If False, extrapolation options are set to ("bottom", "top"). - If tuple: - - The first element represents the extrapolation option for the lower bound. - - The second element represents the extrapolation option for the upper bound. - - If any element is bool: - - If True, it represents "extrapolate". - - If False: - - If it"s the first element, it represents "bottom". - - If it"s the second element, it represents "top". - - If any element is None, it is replaced with numpy.nan. - - Other numeric values are kept as they are. - - If any element is NaN, it is kept as NaN. - If None: - - Both extrapolation options are set to (NaN, NaN). - If number: - - Both extrapolation options are set to the provided number. - If NaN: - - Both extrapolation options are set to NaN. - info: None, bool - Indicates if you want to print extra information. - overwrite: bool - Indicates if you want to compute the vertical interpolation in the same object or not. - """ - src_levels_aux = None - fill_value = None - - extrapolate_options = __parse_extrapolate(extrapolate_options) - do_extrapolation = "extrapolate" in extrapolate_options - - if len(self.lev) == 1: - raise RuntimeError("1D data cannot be vertically interpolated.") - if not overwrite: - self = self.copy(copy_vars=True) - if info is None: - info = self.info - - if new_src_vertical is not None: - self.add_4d_vertical_info(new_src_vertical) - if new_levels[0] > new_levels[-1]: - ascendant = False - else: - ascendant = True - - nz_new = len(new_levels) - - if self.vertical_var_name is None: - # To use current level data - current_level = True - # Checking old order - src_levels = self.lev["data"] - if src_levels[0] > src_levels[-1]: - if not ascendant: - do_flip = False - else: - do_flip = True - src_levels = flip(src_levels) - else: - if ascendant: - do_flip = False - else: - do_flip = True - src_levels = flip(src_levels) - else: - current_level = False - src_levels = self.variables[self.vertical_var_name]["data"] - if self.vertical_var_name == "layer_thickness": - src_levels = flip(cumsum(flip(src_levels, axis=1), axis=1)) - else: - # src_levels = flip(src_levels, axis=1) - pass - # Checking old order - if nanmean(src_levels[:, 0, :, :]) > nanmean(src_levels[:, -1, :, :]): - if not ascendant: - do_flip = False - else: - do_flip = True - src_levels = flip(src_levels, axis=1) - else: - if ascendant: - do_flip = False - else: - do_flip = True - src_levels = flip(src_levels, axis=1) - - # Loop over variables - for var_name in self.variables.keys(): - if self.variables[var_name]["data"] is None: - # Load data if it is not loaded yet - self.load(var_name) - - if var_name != self.vertical_var_name: - if do_flip: - self.variables[var_name]["data"] = flip(self.variables[var_name]["data"], axis=1) - if info and self.master: - print("\t{var} vertical methods".format(var=var_name)) - sys.stdout.flush() - nt, nz, ny, nx = self.variables[var_name]["data"].shape - dst_data = empty((nt, nz_new, ny, nx), dtype=self.variables[var_name]["data"].dtype) - for t in range(nt): - # if info and self.rank == self.size - 1: - if self.info and self.master: - print("\t\t{3} time step {0} ({1}/{2}))".format(self.time[t], t + 1, nt, var_name)) - sys.stdout.flush() - for j in range(ny): - for i in range(nx): - if len(src_levels.shape) == 1: - # To use 1D level information - curr_level_values = src_levels - else: - # To use 4D level data - curr_level_values = src_levels[t, :, j, i] - try: - # Check if all values are identical or masked - if ((isinstance(curr_level_values, ndarray) and - (curr_level_values == curr_level_values[0]).all()) or - (isinstance(curr_level_values, ma.core.MaskedArray) and - curr_level_values.mask.all())): - kind = "slinear" - else: - kind = kind # "cubic" - - # Filtering filling values to extrapolation - fill_value = [nan, nan] - if "bottom" in extrapolate_options: - if ascendant: - fill_value[0] = float64(self.variables[var_name]["data"][t, 0, j, i]) - else: - fill_value[0] = float64(self.variables[var_name]["data"][t, -1, j, i]) - else: - fill_value[0] = extrapolate_options[0] - if "top" in extrapolate_options: - if ascendant: - fill_value[1] = float64(self.variables[var_name]["data"][t, -1, j, i]) - else: - fill_value[1] = float64(self.variables[var_name]["data"][t, 0, j, i]) - else: - fill_value[1] = extrapolate_options[1] - fill_value = tuple(fill_value) - - # We force the methods with float64 to avoid negative values - # We don"t know why the negatives appears with float34 - if current_level: - # 1D vertical component - src_levels_aux = src_levels - else: - # 4D vertical component - src_levels_aux = src_levels[t, :, j, i] - - if kind == "linear" and ascendant and not do_extrapolation: - dst_data[t, :, j, i] = array( - interp(new_levels, - array(src_levels_aux, dtype=float64), - array(self.variables[var_name]["data"][t, :, j, i], dtype=float64), - left=fill_value[0], right=fill_value[1]), - dtype=self.variables[var_name]["data"].dtype) - else: - if not do_extrapolation: - dst_data[t, :, j, i] = array( - interp1d(array(src_levels_aux, dtype=float64), - array(self.variables[var_name]["data"][t, :, j, i], dtype=float64), - kind=kind, - bounds_error=False, - fill_value=fill_value)(new_levels), - dtype=self.variables[var_name]["data"].dtype) - else: - # If extrapolation first we need to extrapolate all (below & above) - dst_data[t, :, j, i] = array( - interp1d(array(src_levels_aux, dtype=float64), - array(self.variables[var_name]["data"][t, :, j, i], - dtype=float64), - kind=kind, - bounds_error=False, - fill_value="extrapolate")(new_levels), - dtype=self.variables[var_name]["data"].dtype) - # Check values below the lower vertical level - if fill_value[0] != "extrapolate": - if ascendant: - idx_bellow = where(new_levels < src_levels_aux[0]) - else: - idx_bellow = where(new_levels > src_levels_aux[0]) - dst_data[t, idx_bellow, j, i] = fill_value[0] - # Check values above the upper vertical level - if fill_value[1] != "extrapolate": - if ascendant: - idx_above = where(new_levels > src_levels_aux[-1]) - else: - idx_above = where(new_levels < src_levels_aux[-1]) - dst_data[t, idx_above, j, i] = fill_value[1] - except Exception as e: - print("time lat lon", t, j, i) - print("***********************") - print("LEVELS", src_levels_aux) - print("DATA", array(self.variables[var_name]["data"][t, :, j, i], dtype=float64)) - print("METHOD", kind) - print("FILL_VALUE", fill_value) - print("+++++++++++++++++++++++") - raise Exception(str(e)) - # if level_array is not None: - # dst_data[t, :, j, i] = array(f(level_array), dtype=float32) - - self.variables[var_name]["data"] = copy(dst_data) - # print(self.variables[var_name]["data"]) - - # Update level information - new_lev_info = {"data": array(new_levels)} - if "positive" in self.lev.keys(): - # Vertical level direction - if flip: - self.reverse_level_direction() - new_lev_info["positive"] = self.lev["positive"] - - if self.vertical_var_name is not None: - for var_attr, attr_info in self.variables[self.vertical_var_name].items(): - if var_attr not in ["data", "dimensions", "crs", "grid_mapping"]: - new_lev_info[var_attr] = copy(attr_info) - self.free_vars(self.vertical_var_name) - self.vertical_var_name = None - - self.set_levels(new_lev_info) - - # Remove original file information - self.__ini_path = None - self.dataset = None - self.dataset = None - - return self diff --git a/build/lib/nes/nc_projections/__init__.py b/build/lib/nes/nc_projections/__init__.py deleted file mode 100644 index 4839ec5..0000000 --- a/build/lib/nes/nc_projections/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -from .default_nes import Nes -from .latlon_nes import LatLonNes -from .rotated_nes import RotatedNes -from .rotated_nested_nes import RotatedNestedNes -from .points_nes import PointsNes -from .points_nes_ghost import PointsNesGHOST -from .points_nes_providentia import PointsNesProvidentia -from .lcc_nes import LCCNes -from .mercator_nes import MercatorNes -# from .raster_nes import RasterNes - -__all__ = [ - 'MercatorNes', 'Nes', 'LatLonNes', 'RotatedNes', 'RotatedNestedNes', 'PointsNes', 'PointsNesGHOST', - 'PointsNesProvidentia', 'LCCNes', -] diff --git a/build/lib/nes/nc_projections/default_nes.py b/build/lib/nes/nc_projections/default_nes.py deleted file mode 100644 index d7c28c9..0000000 --- a/build/lib/nes/nc_projections/default_nes.py +++ /dev/null @@ -1,4252 +0,0 @@ -#!/usr/bin/env python - -import sys -from gc import collect -from warnings import warn -from numpy import (array, ndarray, abs, mean, diff, dstack, append, tile, empty, unique, stack, vstack, full, isnan, - flipud, nan, float32, float64, ma, generic, character, issubdtype, arange, newaxis, concatenate, - split, cumsum, zeros, column_stack) -from pandas import Index, concat -from geopandas import GeoDataFrame -from datetime import timedelta, datetime -from netCDF4 import Dataset, num2date, date2num, stringtochar -from mpi4py import MPI -from shapely.geometry import Polygon, Point -from copy import deepcopy, copy -from dateutil.relativedelta import relativedelta -from typing import Union, List, Dict, Any -from pyproj import Proj, Transformer -from ..methods import vertical_interpolation, horizontal_interpolation, cell_measures, spatial_join -from ..nes_formats import to_netcdf_cams_ra, to_netcdf_monarch, to_monarch_units, to_netcdf_cmaq, to_cmaq_units, \ - to_netcdf_wrf_chem, to_wrf_chem_units - - -class Nes(object): - """ - A class to handle netCDF data with parallel processing capabilities using MPI. - - Attributes - ---------- - comm : MPI.Comm - MPI communicator. - rank : int - MPI rank. - master : bool - True when rank == 0. - size : int - Size of the communicator. - info : bool - Indicates if you want to print reading/writing info. - __ini_path : str - Path to the original file to read when open_netcdf is called. - hours_start : int - Number of hours to avoid from the first original values. - hours_end : int - Number of hours to avoid from the last original values. - dataset : Dataset - netcdf4-python Dataset. - variables : Dict[str, Dict[str, Any]] - Variables information. The dictionary structure is: - { - var_name: { - "data": ndarray or None, # Array values or None if the variable is not loaded. - attr_name: attr_value, # Variable attributes. - ... - }, - ... - } - _full_time : List[datetime] - Complete list of original time step values. - _full_lev : Dict[str, array] - Vertical level dictionary with the complete "data" key for all the values and the rest of the attributes. - { - "data": ndarray, # Array of vertical level values. - attr_name: attr_value, # Vertical level attributes. - ... - } - _full_lat : dict - Latitudes dictionary with the complete "data" key for all the values and the rest of the attributes. - { - "data": ndarray, # Array of latitude values. - attr_name: attr_value, # Latitude attributes. - ... - } - _full_lon : dict - Longitudes dictionary with the complete "data" key for all the values and the rest of the attributes. - { - "data": ndarray, # Array of longitude values. - attr_name: attr_value, # Longitude attributes. - ... - } - _full_lat_bnds : dict - Latitude bounds dictionary with the complete "data" key for the latitudinal boundaries of each grid and the - rest of the attributes. - { - "data": ndarray, # Array of latitude bounds. - attr_name: attr_value, # Latitude bounds attributes. - ... - } - _full_lon_bnds : dict - Longitude bounds dictionary with the complete "data" key for the longitudinal boundaries of each grid and the - rest of the attributes. - { - "data": ndarray, # Array of longitude bounds. - attr_name: attr_value, # Longitude bounds attributes. - ... - } - parallel_method : str - Parallel method to read/write. Can be chosen from any of the following axes to parallelize: "T", "Y", or "X". - read_axis_limits : dict - Dictionary with the 4D limits of the rank data to read. Structure: - { - "t_min": int, "t_max": int, # Time axis limits. - "z_min": int, "z_max": int, # Vertical axis limits. - "y_min": int, "y_max": int, # Latitudinal axis limits. - "x_min": int, "x_max": int, # Longitudinal axis limits. - } - write_axis_limits : dict - Dictionary with the 4D limits of the rank data to write. Structure: - { - "t_min": int, "t_max": int, # Time axis limits. - "z_min": int, "z_max": int, # Vertical axis limits. - "y_min": int, "y_max": int, # Latitudinal axis limits. - "x_min": int, "x_max": int, # Longitudinal axis limits. - } - time : List[datetime] - List of time steps of the rank data. - lev : dict - Vertical levels dictionary with the portion of "data" corresponding to the rank values. Structure: - { - "data": ndarray, # Array of vertical level values for the rank. - attr_name: attr_value, # Vertical level attributes. - ... - } - lat : dict - Latitudes dictionary with the portion of "data" corresponding to the rank values. Structure: - { - "data": ndarray, # Array of latitude values for the rank. - attr_name: attr_value, # Latitude attributes. - ... - } - lon : dict - Longitudes dictionary with the portion of "data" corresponding to the rank values. Structure: - { - "data": ndarray, # Array of longitude values for the rank. - attr_name: attr_value, # Longitude attributes. - ... - } - lat_bnds : dict - Latitude bounds dictionary with the portion of "data" for the latitudinal boundaries corresponding to the rank - values. - Structure: - { - "data": ndarray, # Array of latitude bounds for the rank. - attr_name: attr_value, # Latitude bounds attributes. - ... - } - lon_bnds : dict - Longitude bounds dictionary with the portion of "data" for the longitudinal boundaries corresponding to the - rank values. - Structure: - { - "data": ndarray, # Array of longitude bounds for the rank. - attr_name: attr_value, # Longitude bounds attributes. - ... - } - global_attrs : dict - Global attributes with the attribute name as key and data as values. Structure: - { - attr_name: attr_value, # Global attribute name and value. - ... - } - _var_dim : tuple - Name of the Y and X dimensions for the variables. - _lat_dim : tuple - Name of the dimensions of the Latitude values. - _lon_dim : tuple - Name of the dimensions of the Longitude values. - projection : Proj - Grid projection. - projection_data : dict - Dictionary with the projection information. Structure: - { - proj_param: proj_value, # Projection parameters. - ... - } - """ - def __init__(self, comm: Union[MPI.Comm, None] = None, path: Union[str, None] = None, info: bool = False, - dataset: Union[Dataset, None] = None, parallel_method: str = "Y", avoid_first_hours: int = 0, - avoid_last_hours: int = 0, first_level: int = 0, last_level: Union[int, None] = None, - create_nes: bool = False, balanced: bool = False, times: Union[List[datetime], None] = None, - **kwargs) -> None: - """ - Initialize the Nes class - - Parameters - ---------- - comm: MPI.COMM - MPI Communicator. - path: str - Path to the NetCDF to initialize the object. - info: bool - Indicates if you want to get reading/writing info. - dataset: Dataset or None - NetCDF4-python Dataset to initialize the class. - parallel_method : str - Indicates the parallelization method that you want. Default over Y axis - accepted values: ["X", "Y", "T"]. - avoid_first_hours : int - Number of hours to remove from first time steps. - avoid_last_hours : int - Number of hours to remove from last time steps. - first_level : int - Index of the first level to use. - last_level : int or None - Index of the last level to use. None if it is the last. - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - balanced : bool - Indicates if you want a balanced parallelization or not. - Balanced dataset cannot be written in chunking mode. - times : List[datetime] or None - List of times to substitute the current ones while creation. - """ - - # MPI Initialization - if comm is None: - self.comm = MPI.COMM_WORLD - else: - self.comm = comm - self.rank = self.comm.Get_rank() - self.master = self.rank == 0 - self.size = self.comm.Get_size() - - # General info - self.info = info - self.__ini_path = path - self.shapefile = None - - # Selecting info - self.hours_start = avoid_first_hours - self.hours_end = avoid_last_hours - self.first_level = first_level - self.last_level = last_level - self.lat_min = None - self.lat_max = None - self.lon_min = None - self.lon_max = None - self.balanced = balanced - - # Define parallel method - self.parallel_method = parallel_method - self.serial_nc = None # Place to store temporally the serial Nes instance - - # Get minor and major axes of Earth - self.earth_radius = self.get_earth_radius("WGS84") - - # Time resolution and climatology will be modified, if needed, during the time variable reading - self._time_resolution = "hours" - self._climatology = False - self._climatology_var_name = "climatology_bounds" # Default var_name but can be changed if the input is dif - - # NetCDF object - if create_nes: - - self.dataset = None - - # Set string length - self.strlen = None - - # Initialize variables - self.variables = {} - - # Projection data This is duplicated due to if it is needed to create the object NES needs that info to - # create coordinates data. - self.projection_data = self._get_projection_data(create_nes, **kwargs) - self.projection = self._get_pyproj_projection() - - # Complete dimensions - self._full_time = times - - self._full_time_bnds = self.__get_time_bnds(create_nes) - self._full_lat_bnds, self._full_lon_bnds = self.__get_coordinates_bnds(create_nes) - self._full_lev = {"data": array([0]), "units": "", "positive": "up"} - self._full_lat, self._full_lon = self._create_centre_coordinates(**kwargs) - - # Set axis limits for parallel reading - self.read_axis_limits = self._get_read_axis_limits() - self.write_axis_limits = self._get_write_axis_limits() - - # Dimensions screening - self.time = self.get_full_times()[self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"]] - self.time_bnds = self.get_full_time_bnds() - self.lev = self.get_full_levels() - self.lat_bnds = self.get_full_latitudes_boundaries() - self.lon_bnds = self.get_full_longitudes_boundaries() - - # Cell measures screening - self.cell_measures = self.__get_cell_measures(create_nes) - - # Set NetCDF attributes - self.global_attrs = self.__get_global_attributes(create_nes) - - else: - if dataset is not None: - self.dataset = dataset - elif self.__ini_path is not None: - self._open() - - # Get string length - self.strlen = self._get_strlen() - - # Lazy variables - self.variables = self._get_lazy_variables() - - # Complete dimensions - self._full_time = self.__get_time() - self._full_time_bnds = self.__get_time_bnds() - self._full_lev = self._get_coordinate_dimension(["lev", "level", "lm", "plev"]) - self._full_lat = self._get_coordinate_dimension(["lat", "latitude", "latitudes"]) - self._full_lon = self._get_coordinate_dimension(["lon", "longitude", "longitudes"]) - self._full_lat_bnds, self._full_lon_bnds = self.__get_coordinates_bnds() - - # Complete cell measures - self._cell_measures = self.__get_cell_measures() - - # Set axis limits for parallel reading - self.read_axis_limits = self._get_read_axis_limits() - # Set axis limits for parallel writing - self.write_axis_limits = self._get_write_axis_limits() - - # Dimensions screening - self.time = self.get_full_times()[self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"]] - self.time_bnds = self.get_full_time_bnds() - self.lev = self._get_coordinate_values(self.get_full_levels(), "Z") - self.lat = self._get_coordinate_values(self.get_full_latitudes(), "Y") - self.lon = self._get_coordinate_values(self.get_full_longitudes(), "X") - self.lat_bnds = self._get_coordinate_values(self.get_full_latitudes_boundaries(), "Y", bounds=True) - self.lon_bnds = self._get_coordinate_values(self.get_full_longitudes_boundaries(), "X", bounds=True) - - # Cell measures screening - self.cell_measures = self._get_cell_measures_values(self._cell_measures) - - # Set NetCDF attributes - self.global_attrs = self.__get_global_attributes() - - # Projection data - self.projection_data = self._get_projection_data(create_nes, **kwargs) - self.projection = self._get_pyproj_projection() - - # Writing options - self.zip_lvl = 0 - - # Dimensions information - self._var_dim = None - self._lat_dim = None - self._lon_dim = None - - self.vertical_var_name = None - - # Filtering (portion of the filter coordinates function) - idx = self._get_idx_intervals() - if self.master: - self.set_full_times(self._full_time[idx["idx_t_min"]:idx["idx_t_max"]]) - self._full_lev["data"] = self._full_lev["data"][idx["idx_z_min"]:idx["idx_z_max"]] - - self.hours_start = 0 - self.hours_end = 0 - self.last_level = None - self.first_level = None - - def __test_mpi__(self, num_test=None): - print(f"{self.rank} Barrier {num_test}") - sys.stdout.flush() - self.comm.Barrier() - if self.master: - data = 1 - else: - data = 0 - data = self.comm.bcast(data, root=0) - print(f"{self.rank} data {data}") - sys.stdout.flush() - return None - - @staticmethod - def new(comm=None, path=None, info=False, dataset=None, parallel_method="Y", - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, - balanced=False, times=None, **kwargs): - """ - Initialize the Nes class. - - Parameters - ---------- - comm: MPI.COMM - MPI Communicator. - path: str - Path to the NetCDF to initialize the object. - info: bool - Indicates if you want to get reading/writing info. - dataset: Dataset - NetCDF4-python Dataset to initialize the class. - parallel_method : str - Indicates the parallelization method that you want. Default over Y axis - accepted values: ["X", "Y", "T"]. - avoid_first_hours : int - Number of hours to remove from first time steps. - avoid_last_hours : int - Number of hours to remove from last time steps. - first_level : int - Index of the first level to use. - last_level : int or None - Index of the last level to use. None if it is the last. - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - balanced : bool - Indicates if you want a balanced parallelization or not. - Balanced dataset cannot be written in chunking mode. - times : List[datetime] or None - List of times to substitute the current ones while creation. - """ - - new = Nes(comm=comm, path=path, info=info, dataset=dataset, parallel_method=parallel_method, - avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, - last_level=last_level, create_nes=create_nes, balanced=balanced, times=times, **kwargs) - - return new - - def _get_strlen(self): - """ - Get the strlen - - Returns - ------- - int - Max length of the string data - """ - - if "strlen" in self.dataset.dimensions: - strlen = self.dataset.dimensions["strlen"].size - else: - return None - - return strlen - - def set_strlen(self, strlen=75): - """ - Set the strlen - - 75 is the standard value used in GHOST data - - Parameters - ---------- - strlen : int or None - Max length of the string - """ - - self.strlen = strlen - - return None - - def __del__(self): - """ - To delete the Nes object and close all the open datasets. - """ - - self.close() - try: - self.free_vars(list(self.variables.keys())) - del self.variables - del self.time - del self._full_time - del self.time_bnds - del self._full_time_bnds - del self.lev - del self._full_lev - del self.lat - del self._full_lat - del self.lon - del self._full_lon - del self._full_lat_bnds - del self.lat_bnds - del self._full_lon_bnds - del self.lon_bnds - del self.strlen - del self.shapefile - for cell_measure in self.cell_measures.keys(): - if self.cell_measures[cell_measure]["data"] is not None: - del self.cell_measures[cell_measure]["data"] - del self.cell_measures - except (AttributeError, KeyError): - pass - - del self - collect() - - return None - - def __getstate__(self): - """ - Read the CSV file that contains all the Reduce variable specifications. - - Returns - ------- - state : dict - Dictionary with the class parameters. - """ - - d = self.__dict__ - state = {k: d[k] for k in d if k not in ["comm", "variables", "dataset", "cell_measures"]} - - return state - - def __setstate__(self, state): - """ - Set the state of the class. - - Parameters - ---------- - state: dict - Dictionary with the class parameters. - """ - - self.__dict__ = state - - return None - - def __add__(self, other): - """ - Sum two NES objects - - Parameters - ---------- - other : Nes - A Nes to be summed - - Returns - ------- - Nes - Summed Nes object - """ - nessy = self.copy(copy_vars=True) - for var_name in other.variables.keys(): - if var_name not in nessy.variables.keys(): - # Create New variable - nessy.variables[var_name] = deepcopy(other.variables[var_name]) - else: - nessy.variables[var_name]["data"] += other.variables[var_name]["data"] - return nessy - - def __radd__(self, other): - if other == 0 or other is None: - return self - else: - return self.__add__(other) - - def __getitem__(self, key: str) -> Union[array, None]: - """ - Retrieve the data associated with the specified key. - - Parameters - ---------- - key : str - The key to retrieve the data for. - - Returns - ------- - Union[array, None] - The data associated with the specified key, or None if the key - does not exist. - - Notes - ----- - This method allows accessing data in the variables dictionary using - dictionary-like syntax, e.g., obj[key]["data"]. - - """ - return self.variables[key]["data"] - - def copy(self, copy_vars: bool = False): - """ - Copy the Nes object. - The copy will avoid to copy the communicator, dataset and variables by default. - - Parameters - ---------- - copy_vars: bool - Indicates if you want to copy the variables (in lazy mode). - - Returns - ------- - nessy : Nes - Copy of the Nes object. - """ - - nessy = deepcopy(self) - nessy.dataset = None - if copy_vars: - nessy.set_communicator(self.comm) - nessy.variables = deepcopy(self.variables) - nessy.cell_measures = deepcopy(self.cell_measures) - else: - nessy.variables = {} - nessy.cell_measures = {} - - return nessy - - def get_full_times(self) -> List[datetime]: - """ - Retrieve the complete list of original time step values. - - Returns - ------- - List[datetime] - The complete list of original time step values from the netCDF data. - """ - if self.master: - data = self._full_time - else: - data = None - data = self.comm.bcast(data, root=0) - - if not isinstance(data, list): - data = list(data) - return data - - def get_full_time_bnds(self) -> List[datetime]: - """ - Retrieve the complete list of original time step boundaries. - - Returns - ------- - List[datetime] - The complete list of original time step boundary values from the netCDF data. - """ - data = self.comm.bcast(self._full_time_bnds) - return data - - def get_full_levels(self) -> Dict[str, Any]: - """ - Retrieve the complete vertical level information. - - Returns - ------- - Dict[str, Any] - A dictionary containing the complete vertical level data and its attributes. - The dictionary structure is: - { - "data": ndarray, # Array of vertical level values. - attr_name: attr_value, # Vertical level attributes. - ... - } - """ - data = self.comm.bcast(self._full_lev) - return data - - def get_full_latitudes(self) -> Dict[str, Any]: - """ - Retrieve the complete latitude information. - - Returns - ------- - Dict[str, Any] - A dictionary containing the complete latitude data and its attributes. - The dictionary structure is: - { - "data": ndarray, # Array of latitude values. - attr_name: attr_value, # Latitude attributes. - ... - } - """ - data = self.comm.bcast(self._full_lat) - - return data - - def get_full_longitudes(self) -> Dict[str, Any]: - """ - Retrieve the complete longitude information. - - Returns - ------- - Dict[str, Any] - A dictionary containing the complete longitude data and its attributes. - The dictionary structure is: - { - "data": ndarray, # Array of longitude values. - attr_name: attr_value, # Longitude attributes. - ... - } - """ - data = self.comm.bcast(self._full_lon) - return data - - def get_full_latitudes_boundaries(self) -> Dict[str, Any]: - """ - Retrieve the complete latitude boundaries information. - - Returns - ------- - Dict[str, Any] - A dictionary containing the complete latitude boundaries data and its attributes. - The dictionary structure is: - { - "data": ndarray, # Array of latitude boundaries values. - attr_name: attr_value, # Latitude boundaries attributes. - ... - } - """ - data = self.comm.bcast(self._full_lat_bnds) - return data - - def get_full_longitudes_boundaries(self) -> Dict[str, Any]: - """ - Retrieve the complete longitude boundaries information. - - Returns - ------- - Dict[str, Any] - A dictionary containing the complete longitude boundaries data and its attributes. - The dictionary structure is: - { - "data": ndarray, # Array of longitude boundaries values. - attr_name: attr_value, # Longitude boundaries attributes. - ... - } - """ - data = self.comm.bcast(self._full_lon_bnds) - return data - - def set_full_times(self, data: List[datetime]) -> None: - """ - Set the complete list of original time step values. - - Parameters - ---------- - data : List[datetime] - The complete list of original time step values to set. - """ - if self.master: - self._full_time = data - return None - - def set_full_time_bnds(self, data: List[datetime]) -> None: - """ - Set the complete list of original time step boundaries. - - Parameters - ---------- - data : List[datetime] - The complete list of original time step boundary values to set. - """ - if self.master: - self._full_time_bnds = data - return None - - def set_full_levels(self, data: Dict[str, Any]) -> None: - """ - Set the complete vertical level information. - - Parameters - ---------- - data : Dict[str, Any] - A dictionary containing the complete vertical level data and its attributes. - The dictionary structure is: - { - "data": ndarray, # Array of vertical level values. - attr_name: attr_value, # Vertical level attributes. - ... - } - """ - if self.master: - self._full_lev = data - return None - - def set_full_latitudes(self, data: Dict[str, Any]) -> None: - """ - Set the complete latitude information. - - Parameters - ---------- - data : Dict[str, Any] - A dictionary containing the complete latitude data and its attributes. - The dictionary structure is: - { - "data": ndarray, # Array of latitude values. - attr_name: attr_value, # Latitude attributes. - ... - } - """ - if self.master: - self._full_lat = data - return None - - def set_full_longitudes(self, data: Dict[str, Any]) -> None: - """ - Set the complete longitude information. - - Parameters - ---------- - data : Dict[str, Any] - A dictionary containing the complete longitude data and its attributes. - The dictionary structure is: - { - "data": ndarray, # Array of longitude values. - attr_name: attr_value, # Longitude attributes. - ... - } - """ - if self.master: - self._full_lon = data - return None - - def set_full_latitudes_boundaries(self, data: Dict[str, Any]) -> None: - """ - Set the complete latitude boundaries information. - - Parameters - ---------- - data : Dict[str, Any] - A dictionary containing the complete latitude boundaries data and its attributes. - The dictionary structure is: - { - "data": ndarray, # Array of latitude boundaries values. - attr_name: attr_value, # Latitude boundaries attributes. - ... - } - """ - if self.master: - self._full_lat_bnds = data - return None - - def set_full_longitudes_boundaries(self, data: Dict[str, Any]) -> None: - """ - Set the complete longitude boundaries information. - - Parameters - ---------- - data : Dict[str, Any] - A dictionary containing the complete longitude boundaries data and its attributes. - The dictionary structure is: - { - "data": ndarray, # Array of longitude boundaries values. - attr_name: attr_value, # Longitude boundaries attributes. - ... - } - """ - if self.master: - self._full_lon_bnds = data - - return None - - def get_fids(self, use_read=False): - """ - Obtain the FIDs in a 2D format. - - Parameters - ---------- - use_read : bool - Indicate if you want to use the read_axis_limits - - Returns - ------- - array - 2D array with the FID data. - """ - if self.master: - fids = arange(self._full_lat["data"].shape[0] * self._full_lon["data"].shape[-1]) - fids = fids.reshape((self._full_lat["data"].shape[0], self._full_lon["data"].shape[-1])) - else: - fids = None - fids = self.comm.bcast(fids) - - if use_read: - fids = fids[self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], - self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] - else: - fids = fids[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] - return fids - - def get_full_shape(self): - """ - Obtain the Full 2D shape of tha data - - Returns - ------- - tuple - 2D shape of tha data. - """ - if self.master: - shape = (self._full_lat["data"].shape[0], self._full_lon["data"].shape[-1]) - else: - shape = None - shape = self.comm.bcast(shape) - - return shape - - def set_level_direction(self, new_direction): - """ - Set the direction of the vertical level values. - - Parameters - ---------- - new_direction : str - The new direction for the vertical levels. Must be either "up" or "down". - - Returns - ------- - bool - True if the direction was set successfully. - - Raises - ------ - ValueError - If `new_direction` is not "up" or "down". - """ - if new_direction not in ["up", "down"]: - raise ValueError(f"Level direction mus be up or down. '{new_direction}' is not a valid option") - if self.master: - self._full_lev["positive"] = new_direction - self.lev["positive"] = new_direction - - return True - - def reverse_level_direction(self): - """ - Reverse the current direction of the vertical level values. - - Returns - ------- - bool - True if the direction was reversed successfully. - """ - if "positive" in self.lev.keys(): - if self.lev["positive"] == "up": - if self.master: - self._full_lev["positive"] = "down" - self.lev["positive"] = "down" - else: - if self.master: - self._full_lev["positive"] = "up" - self.lev["positive"] = "up" - return True - - def clear_communicator(self): - """ - Erase the communicator and the parallelization indexes. - """ - - self.comm = None - self.rank = 0 - self.master = 0 - self.size = 0 - - return None - - def set_communicator(self, comm): - """ - Set a new communicator and the correspondent parallelization indexes. - - Parameters - ---------- - comm: MPI.COMM - Communicator to be set. - """ - - self.comm = comm - self.rank = self.comm.Get_rank() - self.master = self.rank == 0 - self.size = self.comm.Get_size() - - self.read_axis_limits = self._get_read_axis_limits() - self.write_axis_limits = self._get_write_axis_limits() - - return None - - def set_climatology(self, is_climatology): - """ - Set whether the dataset represents climatological data. - - Parameters - ---------- - is_climatology : bool - A boolean indicating if the dataset represents climatological data. - - Returns - ------- - None - - Raises - ------ - TypeError - If `is_climatology` is not a boolean. - """ - if not isinstance(is_climatology, bool): - raise TypeError("Only boolean values are accepted") - self._climatology = is_climatology - return None - - def get_climatology(self): - """ - Get whether the dataset represents climatological data. - - Returns - ------- - bool - True if the dataset represents climatological data, False otherwise. - """ - return self._climatology - - def set_levels(self, levels): - """ - Modify the original level values with new ones. - - Parameters - ---------- - levels : dict - Dictionary with the new level information to be set. - """ - self.set_full_levels(deepcopy(levels)) - self.lev = deepcopy(levels) - - return None - - def set_time(self, time_list): - """ - Modify the original level values with new ones. - - Parameters - ---------- - time_list : List[datetime] - List of time steps - """ - if self.parallel_method == "T": - raise TypeError("Cannot set time on a 'T' parallel method") - self.set_full_times(deepcopy(time_list)) - self.time = deepcopy(time_list) - - return None - - def set_time_bnds(self, time_bnds): - """ - Modify the original time bounds values with new ones. - - Parameters - ---------- - time_bnds : List - AList with the new time bounds information to be set. - """ - - correct_format = True - for time_bnd in array(time_bnds).flatten(): - if not isinstance(time_bnd, datetime): - print("{0} is not a datetime object".format(time_bnd)) - correct_format = False - if correct_format: - if len(self.get_full_times()) == len(time_bnds): - self.set_full_time_bnds(deepcopy(time_bnds)) - self.time_bnds = deepcopy(time_bnds) - else: - msg = "WARNING!!! " - msg += "The given time bounds list has a different length than the time array. " - msg += "(time:{0}, bnds:{1}). Time bounds will not be set.".format(len(self.time), len(time_bnds)) - warn(msg) - sys.stderr.flush() - else: - msg = "WARNING!!! " - msg += "There is at least one element in the time bounds to be set that is not a datetime object. " - msg += "Time bounds will not be set." - warn(msg) - sys.stderr.flush() - - return None - - def set_time_resolution(self, new_resolution): - """ - Set the time resolution for the dataset. - - Parameters - ---------- - new_resolution : str - The new time resolution. Accepted values are "second", "seconds", "minute", "minutes", - "hour", "hours", "day", "days". - - Returns - ------- - bool - True if the time resolution was set successfully. - - Raises - ------ - ValueError - If `new_resolution` is not one of the accepted values. - """ - accepted_resolutions = ["second", "seconds", "minute", "minutes", "hour", "hours", "day", "days"] - if new_resolution in accepted_resolutions: - self._time_resolution = new_resolution - else: - raise ValueError(f"Time resolution '{new_resolution}' is not accepted. " + - f"Use one of this: {accepted_resolutions}") - return True - - @staticmethod - def _create_single_spatial_bounds(coordinates, inc, spatial_nv=2, inverse=False): - """ - Calculate the vertices coordinates. - - Parameters - ---------- - coordinates : array - Coordinates in degrees (latitude or longitude). - inc : float - Increment between centre values. - spatial_nv : int - Non-mandatory parameter that informs the number of vertices that the boundaries must have. Default: 2. - inverse : bool - For some grid latitudes. - - Returns - ---------- - bounds : array - An Array with as many elements as vertices for each value of coords. - """ - - # Create new arrays moving the centres half increment less and more. - coords_left = coordinates - inc / 2 - coords_right = coordinates + inc / 2 - - # Defining the number of corners needed. 2 to regular grids and 4 for irregular ones. - if spatial_nv == 2: - # Create an array of N arrays of 2 elements to store the floor and the ceil values for each cell - bounds = dstack((coords_left, coords_right)) - bounds = bounds.reshape((len(coordinates), spatial_nv)) - elif spatial_nv == 4: - # Create an array of N arrays of 4 elements to store the corner values for each cell - # It can be stored in clockwise starting form the left-top element, or in inverse mode. - if inverse: - bounds = dstack((coords_left, coords_left, coords_right, coords_right)) - else: - bounds = dstack((coords_left, coords_right, coords_right, coords_left)) - else: - raise ValueError("The number of vertices of the boundaries must be 2 or 4.") - - return bounds - - def create_spatial_bounds(self): - """ - Calculate longitude and latitude bounds and set them. - """ - # Latitudes - full_lat = self.get_full_latitudes() - inc_lat = abs(mean(diff(full_lat["data"]))) - lat_bnds = self._create_single_spatial_bounds(full_lat["data"], inc_lat, spatial_nv=2) - - self.set_full_latitudes_boundaries({"data": deepcopy(lat_bnds)}) - self.lat_bnds = {"data": lat_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], :]} - - # Longitudes - full_lon = self.get_full_longitudes() - inc_lon = abs(mean(diff(full_lon["data"]))) - lon_bnds = self._create_single_spatial_bounds(full_lon["data"], inc_lon, spatial_nv=2) - - self.set_full_longitudes_boundaries({"data": deepcopy(lon_bnds)}) - self.lon_bnds = {"data": lon_bnds[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :]} - - return None - - def get_spatial_bounds_mesh_format(self): - """ - Get the spatial bounds in the pcolormesh format: - - see: https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.pcolormesh.html - - Returns - ------- - lon_bnds_mesh : numpy.ndarray - Longitude boundaries in the mesh format - lat_bnds_mesh : numpy.ndarray - Latitude boundaries in the mesh format - """ - if self.size > 1: - raise RuntimeError("NES.get_spatial_bounds_mesh_format() function only works in serial mode.") - if self.lat_bnds is None: - self.create_spatial_bounds() - - if self.lat_bnds["data"].shape[-1] == 2: - # get the lat_b and lon_b first rows - lat_b_0 = append(self.lat_bnds["data"][:, 0], self.lat_bnds["data"][-1, -1]) - lon_b_0 = append(self.lon_bnds["data"][:, 0], self.lon_bnds["data"][-1, -1]) - # expand lat_band lon_b in 2D - lat_bnds_mesh = tile(lat_b_0, (len(self.lon["data"]) + 1, 1)).transpose() - lon_bnds_mesh = tile(lon_b_0, (len(self.lat["data"]) + 1, 1)) - - elif self.lat_bnds["data"].shape[-1] == 4: - # Irregular quadrilateral polygon cell definition - lat_bnds_mesh = empty((self.lat["data"].shape[0] + 1, self.lat["data"].shape[1] + 1)) - lat_bnds_mesh[:-1, :-1] = self.lat_bnds["data"][:, :, 0] - lat_bnds_mesh[:-1, 1:] = self.lat_bnds["data"][:, :, 1] - lat_bnds_mesh[1:, 1:] = self.lat_bnds["data"][:, :, 2] - lat_bnds_mesh[1:, :-1] = self.lat_bnds["data"][:, :, 3] - - lon_bnds_mesh = empty((self.lat["data"].shape[0] + 1, self.lat["data"].shape[1] + 1)) - lon_bnds_mesh[:-1, :-1] = self.lon_bnds["data"][:, :, 0] - lon_bnds_mesh[:-1, 1:] = self.lon_bnds["data"][:, :, 1] - lon_bnds_mesh[1:, 1:] = self.lon_bnds["data"][:, :, 2] - lon_bnds_mesh[1:, :-1] = self.lon_bnds["data"][:, :, 3] - else: - raise RuntimeError("Invalid number of vertices: {0}".format(self.lat_bnds["data"].shape[-1])) - - return lon_bnds_mesh, lat_bnds_mesh - - def free_vars(self, var_list): - """ - Erase the selected variables from the variables' information. - - Parameters - ---------- - var_list : List or str - List (or single string) of the variables to be loaded. - """ - - if isinstance(var_list, str): - var_list = [var_list] - - if self.variables is not None: - for var_name in var_list: - if var_name in self.variables: - if "data" in self.variables[var_name].keys(): - del self.variables[var_name]["data"] - del self.variables[var_name] - collect() - - return None - - def keep_vars(self, var_list): - """ - Keep the selected variables and erases the rest. - - Parameters - ---------- - var_list : List or str - List (or single string) of the variables to be loaded. - """ - - if isinstance(var_list, str): - var_list = [var_list] - - to_remove = list(set(self.variables.keys()).difference(set(var_list))) - - self.free_vars(to_remove) - - return None - - @property - def get_time_interval(self): - """ - Calculate the interrval of hours between time steps. - - Returns - ------- - int - Number of hours between time steps. - """ - if self.master: - time_interval = self._full_time[1] - self._full_time[0] - time_interval = int(time_interval.seconds // 3600) - else: - time_interval = None - - return self.comm.bcast(time_interval) - - def sel_time(self, time, inplace=True): - """ - To select only one time step. - - Parameters - ---------- - time : datetime - Time stamp to select. - inplace : bool - Indicates if you want a copy with the selected time step (False) or to modify te existing one (True). - - Returns - ------- - Nes - A Nes object with the data (and metadata) of the selected time step. - """ - - if not inplace: - aux_nessy = self.copy(copy_vars=False) - aux_nessy.comm = self.comm - else: - aux_nessy = self - - aux_nessy.hours_start = 0 - aux_nessy.hours_end = 0 - - idx_time = aux_nessy.time.index(time) - - aux_nessy.time = [self.time[idx_time]] - aux_nessy._full_time = aux_nessy.time - for var_name, var_info in self.variables.items(): - if copy: - aux_nessy.variables[var_name] = {} - for att_name, att_value in var_info.items(): - if att_name == "data": - if att_value is None: - raise ValueError("{} data not loaded".format(var_name)) - aux_nessy.variables[var_name][att_name] = att_value[[idx_time]] - else: - aux_nessy.variables[var_name][att_name] = att_value - else: - aux_nessy.variables[var_name]["data"] = aux_nessy.variables[var_name]["data"][[idx_time]] - - return aux_nessy - - def sel(self, hours_start=None, time_min=None, hours_end=None, time_max=None, lev_min=None, lev_max=None, - lat_min=None, lat_max=None, lon_min=None, lon_max=None): - """ - Select a slice of time, vertical level, latitude, or longitude given minimum and maximum limits. - - Parameters - ---------- - hours_start : int, optional - The number of hours from the start to begin the selection. - time_min : datetime, optional - The minimum datetime for the time selection. Mutually exclusive with `hours_start`. - hours_end : int, optional - The number of hours from the end to end the selection. - time_max : datetime, optional - The maximum datetime for the time selection. Mutually exclusive with `hours_end`. - lev_min : int, optional - The minimum vertical level index for the selection. - lev_max : int, optional - The maximum vertical level index for the selection. - lat_min : float, optional - The minimum latitude for the selection. - lat_max : float, optional - The maximum latitude for the selection. - lon_min : float, optional - The minimum longitude for the selection. - lon_max : float, optional - The maximum longitude for the selection. - - Returns - ------- - None - - Raises - ------ - ValueError - If any variables are already loaded or if mutually exclusive parameters are both provided. - - Notes - ----- - This method updates the selection criteria for the dataset and recalculates the read and write axis limits - accordingly. It also updates the time, level, latitude, and longitude slices based on the new criteria. - """ - full_time = self.get_full_times() - loaded_vars = False - for var_info in self.variables.values(): - if var_info["data"] is not None: - loaded_vars = True - if loaded_vars: - raise ValueError("Some variables have been loaded. Use select function before load.") - - # First time filter - if hours_start is not None: - if time_min is not None: - raise ValueError("Choose to select by hours_start or time_min but not both") - self.hours_start = hours_start - elif time_min is not None: - if time_min <= full_time[0]: - self.hours_start = 0 - else: - self.hours_start = int((time_min - full_time[0]).total_seconds() // 3600) - - # Last time filter - if hours_end is not None: - if time_max is not None: - raise ValueError("Choose to select by hours_end or time_max but not both") - self.hours_end = hours_end - elif time_max is not None: - if time_max >= full_time[-1]: - self.hours_end = 0 - else: - self.hours_end = int((full_time[-1] - time_max).total_seconds() // 3600) - - # Level filter - self.first_level = lev_min - self.last_level = lev_max - - # Coordinate filter - self.lat_min = lat_min - self.lat_max = lat_max - self.lon_min = lon_min - self.lon_max = lon_max - - # New axis limits - self.read_axis_limits = self._get_read_axis_limits() - - # Dimensions screening - self.time = self.get_full_times()[self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"]] - self.time_bnds = self.get_full_time_bnds() - self.lev = self._get_coordinate_values(self.get_full_levels(), "Z") - self.lat = self._get_coordinate_values(self.get_full_latitudes(), "Y") - self.lon = self._get_coordinate_values(self.get_full_longitudes(), "X") - - self.lat_bnds = self._get_coordinate_values(self.get_full_latitudes_boundaries(), "Y", bounds=True) - self.lon_bnds = self._get_coordinate_values(self.get_full_longitudes_boundaries(), "X", bounds=True) - - # Filter dimensions - self._filter_coordinates_selection() - - # Removing complete coordinates - self.write_axis_limits = self._get_write_axis_limits() - - return None - - def _filter_coordinates_selection(self): - """ - Use the selection limits to filter time, lev, lat, lon, lon_bnds and lat_bnds. - """ - - idx = self._get_idx_intervals() - - if self.master: - self._full_time = self._full_time[idx["idx_t_min"]:idx["idx_t_max"]] - self._full_lev["data"] = self._full_lev["data"][idx["idx_z_min"]:idx["idx_z_max"]] - - if len(self._full_lat["data"].shape) == 1: - # Regular projection - self._full_lat["data"] = self._full_lat["data"][idx["idx_y_min"]:idx["idx_y_max"]] - self._full_lon["data"] = self._full_lon["data"][idx["idx_x_min"]:idx["idx_x_max"]] - - if self._full_lat_bnds is not None: - self._full_lat_bnds["data"] = self._full_lat_bnds["data"][idx["idx_y_min"]:idx["idx_y_max"], :] - if self._full_lon_bnds is not None: - self._full_lon_bnds["data"] = self._full_lon_bnds["data"][idx["idx_x_min"]:idx["idx_x_max"], :] - else: - # Irregular projections - self._full_lat["data"] = self._full_lat["data"][idx["idx_y_min"]:idx["idx_y_max"], - idx["idx_x_min"]:idx["idx_x_max"]] - self._full_lon["data"] = self._full_lon["data"][idx["idx_y_min"]:idx["idx_y_max"], - idx["idx_x_min"]:idx["idx_x_max"]] - - if self._full_lat_bnds is not None: - self._full_lat_bnds["data"] = self._full_lat_bnds["data"][idx["idx_y_min"]:idx["idx_y_max"], - idx["idx_x_min"]:idx["idx_x_max"], :] - if self._full_lon_bnds is not None: - self._full_lon_bnds["data"] = self._full_lon_bnds["data"][idx["idx_y_min"]:idx["idx_y_max"], - idx["idx_x_min"]:idx["idx_x_max"], :] - - self.hours_start = 0 - self.hours_end = 0 - self.last_level = None - self.first_level = None - self.lat_min = None - self.lat_max = None - self.lon_max = None - self.lon_min = None - - return None - - def _get_projection_data(self, create_nes, **kwargs): - """ - Retrieves projection data based on grid details. - - Parameters - ---------- - create_nes : bool - Flag indicating whether to create new object (True) or use existing (False). - **kwargs : dict - Additional keyword arguments for specifying projection details. - """ - - raise NotImplementedError("Must be implemented on inner class.") - - @staticmethod - def _get_pyproj_projection(): - """ - Retrieves Pyproj projection data based on grid details. - - """ - - raise NotImplementedError("Must be implemented on inner class.") - - def _get_idx_intervals(self): - """ - Calculate the index intervals - - Returns - ------- - dict - Dictionary with the index intervals - """ - full_lat = self.get_full_latitudes() - full_lon = self.get_full_longitudes() - idx = {"idx_t_min": self._get_time_id(self.hours_start, first=True), - "idx_t_max": self._get_time_id(self.hours_end, first=False), - "idx_z_min": self.first_level, - "idx_z_max": self.last_level} - - # Axis Y - if self.lat_min is None: - idx["idx_y_min"] = 0 - else: - idx["idx_y_min"] = self._get_coordinate_id(full_lat["data"], self.lat_min, axis=0) - if self.lat_max is None: - idx["idx_y_max"] = full_lat["data"].shape[0] - else: - idx["idx_y_max"] = self._get_coordinate_id(full_lat["data"], self.lat_max, axis=0) + 1 - - if idx["idx_y_min"] > idx["idx_y_max"]: - idx_aux = copy(idx["idx_y_min"]) - idx["idx_y_min"] = idx["idx_y_max"] - idx["idx_y_max"] = idx_aux - - # Axis X - - if self.lon_min is None: - idx["idx_x_min"] = 0 - else: - if len(full_lon["data"].shape) == 1: - axis = 0 - else: - axis = 1 - idx["idx_x_min"] = self._get_coordinate_id(full_lon["data"], self.lon_min, axis=axis) - if self.lon_max is None: - idx["idx_x_max"] = full_lon["data"].shape[-1] - else: - if len(full_lon["data"].shape) == 1: - axis = 0 - else: - axis = 1 - idx["idx_x_max"] = self._get_coordinate_id(full_lon["data"], self.lon_max, axis=axis) + 1 - - if idx["idx_x_min"] > idx["idx_x_max"]: - idx_aux = copy(idx["idx_x_min"]) - idx["idx_x_min"] = idx["idx_x_max"] - idx["idx_x_max"] = idx_aux - return idx - - # ================================================================================================================== - # Statistics - # ================================================================================================================== - - def last_time_step(self): - """ - Modify variables to keep only the last time step. - """ - - if self.parallel_method == "T": - raise NotImplementedError("Statistics are not implemented on time axis parallelization method.") - aux_time = self.get_full_times()[0].replace(hour=0, minute=0, second=0, microsecond=0) - self.set_full_times([aux_time]) - self.time = [aux_time] - - for var_name, var_info in self.variables.items(): - if var_info["data"] is None: - self.load(var_name) - aux_data = var_info["data"][-1, :] - if len(aux_data.shape) == 3: - aux_data = aux_data.reshape((1, aux_data.shape[0], aux_data.shape[1], aux_data.shape[2])) - self.variables[var_name]["data"] = aux_data - self.hours_start = 0 - self.hours_end = 0 - - return None - - def daily_statistic(self, op, type_op="calendar"): - """ - Calculate daily statistic. - - Parameters - ---------- - op : str - Statistic to perform. Accepted values: "max", "mean" and "min". - type_op : str - Type of statistic to perform. Accepted values: "calendar", "alltsteps", and "withoutt0". - - "calendar": Calculate the statistic using the time metadata. It will avoid single time step by day - calculations - - "alltsteps": Calculate a single time statistic with all the time steps. - - "withoutt0": Calculate a single time statistic with all the time steps avoiding the first one. - """ - - if self.parallel_method == "T": - raise NotImplementedError("Statistics are not implemented on time axis parallel method.") - time_interval = self.get_time_interval - if type_op == "calendar": - aux_time_bounds = [] - aux_time = [] - day_list = [date_aux.day for date_aux in self.time] - for var_name, var_info in self.variables.items(): - if var_info["data"] is None: - self.load(var_name) - stat_data = None - for day in unique(day_list): - idx_first = next(i for i, val in enumerate(day_list, 0) if val == day) - idx_last = len(day_list) - next(i for i, val in enumerate(reversed(day_list), 1) if val == day) - if idx_first != idx_last: # To avoid single time step statistic - if idx_last != len(day_list): - if op == "mean": - data_aux = var_info["data"][idx_first:idx_last + 1, :, :, :].mean(axis=0) - elif op == "max": - data_aux = var_info["data"][idx_first:idx_last + 1, :, :, :].max(axis=0) - elif op == "min": - data_aux = var_info["data"][idx_first:idx_last + 1, :, :, :].min(axis=0) - else: - raise NotImplementedError(f"Statistic operation '{op}' is not implemented.") - aux_time_bounds.append([self.time[idx_first], self.time[idx_last]]) - else: - if op == "mean": - data_aux = var_info["data"][idx_first:, :, :, :].mean(axis=0) - elif op == "max": - data_aux = var_info["data"][idx_first:, :, :, :].max(axis=0) - elif op == "min": - data_aux = var_info["data"][idx_first:, :, :, :].min(axis=0) - else: - raise NotImplementedError(f"Statistic operation '{op}' is not implemented.") - aux_time_bounds.append([self.time[idx_first], self.time[-1]]) - - data_aux = data_aux.reshape((1, data_aux.shape[0], data_aux.shape[1], data_aux.shape[2])) - aux_time.append(self.time[idx_first].replace(hour=0, minute=0, second=0)) - # Append over time dimension - if stat_data is None: - stat_data = data_aux.copy() - else: - stat_data = vstack([stat_data, data_aux]) - self.variables[var_name]["data"] = stat_data - self.variables[var_name]["cell_methods"] = "time: {0} (interval: {1}hr)".format(op, time_interval) - self.time = aux_time - self.set_full_times(self.time) - - self.set_time_bnds(aux_time_bounds) - - elif type_op == "alltsteps": - for var_name, var_info in self.variables.items(): - if var_info["data"] is None: - self.load(var_name) - if op == "mean": - aux_data = var_info["data"].mean(axis=0) - elif op == "max": - aux_data = var_info["data"].max(axis=0) - elif op == "min": - aux_data = var_info["data"].min(axis=0) - else: - raise NotImplementedError(f"Statistic operation '{op}' is not implemented.") - if len(aux_data.shape) == 3: - aux_data = aux_data.reshape((1, aux_data.shape[0], aux_data.shape[1], aux_data.shape[2])) - self.variables[var_name]["data"] = aux_data - self.variables[var_name]["cell_methods"] = "time: {0} (interval: {1}hr)".format(op, time_interval) - - aux_time = self.time[0].replace(hour=0, minute=0, second=0, microsecond=0) - aux_time_bounds = [[self.time[0], self.time[-1]]] - self.time = [aux_time] - self.set_full_times(self.time) - - self.set_time_bnds(aux_time_bounds) - - elif type_op == "withoutt0": - for var_name, var_info in self.variables.items(): - if var_info["data"] is None: - self.load(var_name) - if op == "mean": - aux_data = var_info["data"][1:, :].mean(axis=0) - elif op == "max": - aux_data = var_info["data"][1:, :].max(axis=0) - elif op == "min": - aux_data = var_info["data"][1:, :].min(axis=0) - else: - raise NotImplementedError(f"Statistic operation '{op}' is not implemented.") - if len(aux_data.shape) == 3: - aux_data = aux_data.reshape((1, aux_data.shape[0], aux_data.shape[1], aux_data.shape[2])) - self.variables[var_name]["data"] = aux_data - self.variables[var_name]["cell_methods"] = "time: {0} (interval: {1}hr)".format(op, time_interval) - full_time = self.get_full_times() - aux_time = full_time[1].replace(hour=0, minute=0, second=0, microsecond=0) - aux_time_bounds = [[full_time[1], full_time[-1]]] - self.time = [aux_time] - self.set_full_times(self.time) - - self.set_time_bnds(aux_time_bounds) - else: - raise NotImplementedError(f"Statistic operation type '{type_op}' is not implemented.") - self.hours_start = 0 - self.hours_end = 0 - - return None - - @staticmethod - def _get_axis_index_(axis): - - if axis == "T": - value = 0 - elif axis == "Z": - value = 1 - elif axis == "Y": - value = 2 - elif axis == "X": - value = 3 - else: - raise ValueError("Unknown axis: {0}".format(axis)) - - return value - - def sum_axis(self, axis="Z"): - - if self.parallel_method == axis: - raise NotImplementedError( - f"It is not possible to sum the axis with it is parallelized '{self.parallel_method}'") - - for var_name, var_info in self.variables.items(): - if var_info["data"] is not None: - self.variables[var_name]["data"] = self.variables[var_name]["data"].sum( - axis=self._get_axis_index_(axis), keepdims=True) - if axis == "T": - self.variables[var_name]["cell_methods"] = "time: sum (interval: {0}hr)".format( - (self.time[-1] - self.time[0]).total_seconds() // 3600) - - if axis == "T": - self.set_time_bnds([self.time[0], self.time[-1]]) - self.time = [self.time[0]] - self.set_full_times([self.time[0]]) - if axis == "Z": - self.lev["data"] = array([self.lev["data"][0]]) - self.set_full_levels(self.lev) - - return None - - def find_time_id(self, time): - """ - Find index of time in time array. - - Parameters - ---------- - time : datetime - Time element. - - Returns - ------- - int - Index of time element. - """ - - if time in self.time: - return self.time.index(time) - - def rolling_mean(self, var_list=None, hours=8): - """ - Calculate rolling mean for given hours - - Parameters - ---------- - var_list : : List, str, None - List (or single string) of the variables to be loaded. - hours : int, optional - Window hours to calculate rolling mean, by default 8 - - Returns - ------- - Nes - A Nes object - """ - - if self.parallel_method == "T": - raise NotImplementedError("The rolling mean cannot be calculated using the time axis parallel method.") - - aux_nessy = self.copy(copy_vars=False) - aux_nessy.set_communicator(self.comm) - - if isinstance(var_list, str): - var_list = [var_list] - elif var_list is None: - var_list = list(self.variables.keys()) - - for var_name in var_list: - # Load variables if they have not been loaded previously - if self.variables[var_name]["data"] is None: - self.load(var_name) - - # Get original file shape - nessy_shape = self.variables[var_name]["data"].shape - - # Initialise array - aux_nessy.variables[var_name] = {} - aux_nessy.variables[var_name]["data"] = empty(shape=nessy_shape) - aux_nessy.variables[var_name]["dimensions"] = deepcopy(self.variables[var_name]["dimensions"]) - - for curr_time in self.time: - # Get previous time given a set of hours - prev_time = curr_time - timedelta(hours=(hours-1)) - - # Get time indices - curr_time_id = self.find_time_id(curr_time) - prev_time_id = self.find_time_id(prev_time) - - # Get mean if previous time is available - if prev_time_id is not None: - if self.info: - print(f"Calculating mean between {prev_time} and {curr_time}.") - aux_nessy.variables[var_name]["data"][curr_time_id, :, :, :] = self.variables[var_name]["data"][ - prev_time_id:curr_time_id, :, :, :].mean(axis=0, keepdims=True) - # Fill with nan if previous time is not available - else: - if self.info: - msg = f"Mean between {prev_time} and {curr_time} cannot be calculated " - msg += f"because data for {prev_time} is not available." - print(msg) - aux_nessy.variables[var_name]["data"][curr_time_id, :, :, :] = full( - shape=(1, nessy_shape[1], nessy_shape[2], nessy_shape[3]), fill_value=nan) - - return aux_nessy - - # ================================================================================================================== - # Reading - # ================================================================================================================== - - def _get_read_axis_limits(self): - """ - Calculate the 4D reading axis limits depending on if them have to balanced or not. - - Returns - ------- - dict - Dictionary with the 4D limits of the rank data to read. - t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max. - """ - - if self.balanced: - return self._get_read_axis_limits_balanced() - else: - return self._get_read_axis_limits_unbalanced() - - def _get_read_axis_limits_unbalanced(self): - """ - Calculate the 4D reading axis limits. - - Returns - ------- - dict - Dictionary with the 4D limits of the rank data to read. - t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max. - """ - - axis_limits = {"x_min": None, "x_max": None, - "y_min": None, "y_max": None, - "z_min": None, "z_max": None, - "t_min": None, "t_max": None} - - idx = self._get_idx_intervals() - if self.parallel_method == "Y": - y_len = idx["idx_y_max"] - idx["idx_y_min"] - if y_len < self.size: - raise IndexError("More processors (size={0}) selected than Y elements (size={1})".format( - self.size, y_len)) - axis_limits["y_min"] = ((y_len // self.size) * self.rank) + idx["idx_y_min"] - if self.rank + 1 < self.size: - axis_limits["y_max"] = ((y_len // self.size) * (self.rank + 1)) + idx["idx_y_min"] - else: - axis_limits["y_max"] = idx["idx_y_max"] - - # Non parallel filters - axis_limits["x_min"] = idx["idx_x_min"] - axis_limits["x_max"] = idx["idx_x_max"] - - axis_limits["t_min"] = idx["idx_t_min"] - axis_limits["t_max"] = idx["idx_t_max"] - - elif self.parallel_method == "X": - x_len = idx["idx_x_max"] - idx["idx_x_min"] - if x_len < self.size: - raise IndexError("More processors (size={0}) selected than X elements (size={1})".format( - self.size, x_len)) - axis_limits["x_min"] = ((x_len // self.size) * self.rank) + idx["idx_x_min"] - if self.rank + 1 < self.size: - axis_limits["x_max"] = ((x_len // self.size) * (self.rank + 1)) + idx["idx_x_min"] - else: - axis_limits["x_max"] = idx["idx_x_max"] - - # Non parallel filters - axis_limits["y_min"] = idx["idx_y_min"] - axis_limits["y_max"] = idx["idx_y_max"] - - axis_limits["t_min"] = idx["idx_t_min"] - axis_limits["t_max"] = idx["idx_t_max"] - - elif self.parallel_method == "T": - t_len = idx["idx_t_max"] - idx["idx_t_min"] - if t_len < self.size: - raise IndexError("More processors (size={0}) selected than T elements (size={1})".format( - self.size, t_len)) - axis_limits["t_min"] = ((t_len // self.size) * self.rank) + idx["idx_t_min"] - if self.rank + 1 < self.size: - axis_limits["t_max"] = ((t_len // self.size) * (self.rank + 1)) + idx["idx_t_min"] - - # Non parallel filters - axis_limits["y_min"] = idx["idx_y_min"] - axis_limits["y_max"] = idx["idx_y_max"] - - axis_limits["x_min"] = idx["idx_x_min"] - axis_limits["x_max"] = idx["idx_x_max"] - - else: - raise NotImplementedError("Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( - meth=self.parallel_method, accept=["X", "Y", "T"])) - - # Vertical levels selection: - axis_limits["z_min"] = self.first_level - if self.last_level == -1 or self.last_level is None: - self.last_level = None - elif self.last_level + 1 == len(self.get_full_levels()["data"]): - self.last_level = None - else: - self.last_level += 1 - axis_limits["z_max"] = self.last_level - - return axis_limits - - def _get_read_axis_limits_balanced(self): - """ - Calculate the 4D reading balanced axis limits. - - Returns - ------- - dict - Dictionary with the 4D limits of the rank data to read. - t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max. - """ - idx = self._get_idx_intervals() - - fid_dist = {} - if self.parallel_method == "Y": - len_to_split = idx["idx_y_max"] - idx["idx_y_min"] - if len_to_split < self.size: - raise IndexError("More processors (size={0}) selected than Y elements (size={1})".format( - self.size, len_to_split)) - min_axis = "y_min" - max_axis = "y_max" - to_add = idx["idx_y_min"] - - elif self.parallel_method == "X": - len_to_split = idx["idx_x_max"] - idx["idx_x_min"] - if len_to_split < self.size: - raise IndexError("More processors (size={0}) selected than X elements (size={1})".format( - self.size, len_to_split)) - min_axis = "x_min" - max_axis = "x_max" - to_add = idx["idx_x_min"] - elif self.parallel_method == "T": - len_to_split = idx["idx_t_max"] - idx["idx_t_min"] - if len_to_split < self.size: - raise IndexError(f"More processors (size={self.size}) selected than T elements (size={len_to_split})") - min_axis = "t_min" - max_axis = "t_max" - to_add = idx["idx_t_min"] - else: - raise NotImplementedError("Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( - meth=self.parallel_method, accept=["X", "Y", "T"])) - - procs_len = len_to_split // self.size - procs_rows_extended = len_to_split - (procs_len * self.size) - - rows_sum = 0 - for proc in range(self.size): - fid_dist[proc] = {"x_min": 0, "x_max": None, - "y_min": 0, "y_max": None, - "z_min": 0, "z_max": None, - "t_min": 0, "t_max": None} - if proc < procs_rows_extended: - aux_rows = procs_len + 1 - else: - aux_rows = procs_len - - len_to_split -= aux_rows - if len_to_split < 0: - rows = len_to_split + aux_rows - else: - rows = aux_rows - - fid_dist[proc][min_axis] = rows_sum - fid_dist[proc][max_axis] = rows_sum + rows - - if to_add is not None: - fid_dist[proc][min_axis] += to_add - fid_dist[proc][max_axis] += to_add - - # # Last element - # if len_to_split == 0 and to_add == 0: - # fid_dist[proc][max_axis] = None - - rows_sum += rows - - axis_limits = fid_dist[self.rank] - - # Non parallel filters - if self.parallel_method != "T": - axis_limits["t_min"] = idx["idx_t_min"] - axis_limits["t_max"] = idx["idx_t_max"] - if self.parallel_method != "X": - axis_limits["x_min"] = idx["idx_x_min"] - axis_limits["x_max"] = idx["idx_x_max"] - if self.parallel_method != "Y": - axis_limits["y_min"] = idx["idx_y_min"] - axis_limits["y_max"] = idx["idx_y_max"] - - # Vertical levels selection: - axis_limits["z_min"] = self.first_level - if self.last_level == -1 or self.last_level is None: - self.last_level = None - elif self.last_level + 1 == len(self.get_full_levels()["data"]): - self.last_level = None - else: - self.last_level += 1 - axis_limits["z_max"] = self.last_level - - return axis_limits - - def _get_time_id(self, hours, first=True): - """ - Get the index of the corresponding time value. - - Parameters - ---------- - hours : int - Number of hours to avoid. - first : bool - Indicates if you want to avoid from the first hours (True) or from the last (False). - Default: True. - - Returns - ------- - int - Index of the time array. - """ - full_time = self.get_full_times() - - if first: - idx = full_time.index(full_time[0] + timedelta(hours=hours)) - else: - idx = full_time.index(full_time[-1] - timedelta(hours=hours)) + 1 - - return idx - - @staticmethod - def _get_coordinate_id(my_array, value, axis=0): - """ - Get the index of the corresponding coordinate value. - - Parameters - ---------- - my_array : array - An Array with the coordinate data - value : float - Coordinate value to search. - axis : int - Axis where find the value - Default: 0. - - Returns - ------- - int - Index of the coordinate array. - """ - idx = (abs(my_array - value)).argmin(axis=axis).min() - - return idx - - def _open(self): - """ - Open the NetCDF. - """ - - self.dataset = self.__open_netcdf4() - - return None - - def __open_netcdf4(self, mode="r"): - """ - Open the NetCDF with netcdf4-python. - - Parameters - ---------- - mode : str - Inheritance from mode parameter from https://unidata.github.io/netcdf4-python/#Dataset.__init__ - Default: "r" (read-only). - Returns - ------- - netcdf : Dataset - Open dataset. - """ - - if self.size == 1: - netcdf = Dataset(self.__ini_path, format="NETCDF4", mode=mode, parallel=False) - else: - netcdf = Dataset(self.__ini_path, format="NETCDF4", mode=mode, parallel=True, comm=self.comm, - info=MPI.Info()) - self.dataset = netcdf - - return netcdf - - def close(self): - """ - Close the NetCDF with netcdf4-python. - """ - if (hasattr(self, "serial_nc")) and (self.serial_nc is not None): - if self.master: - self.serial_nc.close() - self.serial_nc = None - if (hasattr(self, "dataset")) and (self.dataset is not None): - self.dataset.close() - self.dataset = None - - return None - - @staticmethod - def __get_dates_from_months(time): - """ - Calculates the number of days since the first date - in the "time" list and store in new list: - This is useful when the units are "months since", - which cannot be transformed to dates using "num2date". - - Parameter - --------- - time: List[datetime] - Original time. - - Returns - ------- - time: List - CF compliant time. - """ - - start_date_str = time.units.split("since")[1].lstrip() - start_date = datetime(int(start_date_str[0:4]), int(start_date_str[5:7]), int(start_date_str[8:10])) - - new_time_deltas = [] - - for month_delta in time[:]: - # Transform current_date into number of days since base date - current_date = start_date + relativedelta(months=month_delta) - - # Calculate number of days between base date and the other dates - n_days = int((current_date - start_date).days) - - # Store in list - new_time_deltas.append(n_days) - - return new_time_deltas - - def __parse_time(self, time): - """ - Parses the time to be CF compliant. - - Parameters - ---------- - time: Namespace - Original time. - - Returns - ------- - time : str - CF compliant time. - """ - - units = self.__parse_time_unit(time.units) - - if not hasattr(time, "calendar"): - calendar = "standard" - else: - calendar = time.calendar - - if "months since" in time.units: - units = "days since " + time.units.split("since")[1].lstrip() - time = self.__get_dates_from_months(time) - - time_data = time[:] - - if len(time_data) == 1 and isnan(time_data[0]): - time_data[0] = 0 - - return time_data, units, calendar - - @staticmethod - def __parse_time_unit(t_units): - """ - Parses the time units to be CF compliant. - - Parameters - ---------- - t_units : str - Original time units. - - Returns - ------- - t_units : str - CF compliant time units. - """ - - if "h @" in t_units: - t_units = "hours since {0}-{1}-{2} {3}:{4}:{5} UTC".format( - t_units[4:8], t_units[8:10], t_units[10:12], t_units[13:15], t_units[15:17], t_units[17:-4]) - - return t_units - - @staticmethod - def __get_time_resolution_from_units(units): - """ - Parses the time units to get the time resolution - - Parameters - ---------- - units : str - Time variable units - - Returns - ------- - str - Time variable resolution - """ - if "day" in units or "days" in units: - resolution = "days" - elif "hour" in units or "hours" in units: - resolution = "hours" - elif "minute" in units or "minutes" in units: - resolution = "minutes" - elif "second" in units or "seconds" in units: - resolution = "seconds" - else: - # Default resolution is "hours" - resolution = "hours" - return resolution - - def __get_time(self): - """ - Get the NetCDF file time values. - - Returns - ------- - time : List[datetime] - List of times (datetime) of the NetCDF data. - """ - if self.master: - nc_var = self.dataset.variables["time"] - time_data, units, calendar = self.__parse_time(nc_var) - # Extracting time resolution depending on the units - self._time_resolution = self.__get_time_resolution_from_units(units) - # Checking if it is a climatology dataset - if hasattr(nc_var, "climatology"): - self._climatology = True - self._climatology_var_name = nc_var.climatology - time = num2date(time_data, units, calendar=calendar) - time = [datetime(year=dt.year, month=dt.month, day=dt.day, hour=dt.hour, minute=dt.minute) for dt in time] - else: - time = None - self.free_vars("time") - - return time - - def __get_time_bnds(self, create_nes=False): - """ - Get the NetCDF time bounds values. - - Parameters - ---------- - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - - Returns - ------- - time_bnds : List - A List of time bounds (datetime) of the NetCDF data. - """ - - if not create_nes: - if self.master: - if "time_bnds" in self.dataset.variables.keys() or self._climatology: - time = self.dataset.variables["time"] - if self._climatology: - nc_var = self.dataset.variables[self._climatology_var_name] - else: - nc_var = self.dataset.variables["time_bnds"] - time_bnds = num2date(nc_var[:], self.__parse_time_unit(time.units), - calendar=time.calendar).tolist() - # Iterate over each inner list - for inner_list in time_bnds: - # Create a new list to store datetime objects - new_inner_list = [] - # Iterate over datetime objects within each inner list - for dt in inner_list: - # Access year, month, day, hour, and minute attributes of datetime objects - new_dt = datetime(year=dt.year, month=dt.month, day=dt.day, hour=dt.hour, minute=dt.minute) - # Append the new datetime object to the new inner list - new_inner_list.append(new_dt) - # Replace the old inner list with the new one - time_bnds[time_bnds.index(inner_list)] = new_inner_list - else: - time_bnds = None - else: - time_bnds = None - else: - time_bnds = None - - self.free_vars("time_bnds") - - return time_bnds - - def __get_coordinates_bnds(self, create_nes=False): - """ - Get the NetCDF coordinates bounds values. - - Parameters - ---------- - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - - Returns - ------- - lat_bnds : dict - Latitude bounds of the NetCDF data. - lon_bnds : dict - Longitude bounds of the NetCDF data. - """ - - if not create_nes: - if self.master: - if "lat_bnds" in self.dataset.variables.keys(): - lat_bnds = {"data": self._unmask_array(self.dataset.variables["lat_bnds"][:])} - else: - lat_bnds = None - - if "lon_bnds" in self.dataset.variables.keys(): - lon_bnds = {"data": self._unmask_array(self.dataset.variables["lon_bnds"][:])} - else: - lon_bnds = None - else: - lat_bnds = None - lon_bnds = None - else: - lat_bnds = None - lon_bnds = None - - self.free_vars(["lat_bnds", "lon_bnds"]) - - return lat_bnds, lon_bnds - - def __get_cell_measures(self, create_nes=False): - """ - Get the NetCDF cell measures values. - - Parameters - ---------- - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - - Returns - ------- - dict - Dictionary of cell measures of the NetCDF data. - """ - - c_measures = {} - if self.master: - if not create_nes: - if "cell_area" in self.dataset.variables.keys(): - c_measures["cell_area"] = {} - c_measures["cell_area"]["data"] = self._unmask_array(self.dataset.variables["cell_area"][:]) - c_measures = self.comm.bcast(c_measures, root=0) - - self.free_vars(["cell_area"]) - - return c_measures - - def _get_coordinate_dimension(self, possible_names): - """ - Read the coordinate dimension data. - - This will read the complete data of the coordinate. - - Parameters - ---------- - possible_names: List, str, list - A List (or single string) of the possible names of the coordinate (e.g. ["lat", "latitude"]). - - Returns - ------- - nc_var : dict - Dictionary with the "data" key with the coordinate variable values. and the attributes as other keys. - """ - - if isinstance(possible_names, str): - possible_names = [possible_names] - - try: - dimension_name = set(possible_names).intersection(set(self.variables.keys())).pop() - if self.master: - nc_var = self.variables[dimension_name].copy() - nc_var["data"] = self.dataset.variables[dimension_name][:] - if hasattr(nc_var, "units"): - if nc_var["units"] in ["unitless", "-"]: - nc_var["units"] = "" - else: - nc_var = None - self.free_vars(dimension_name) - except KeyError: - if self.master: - nc_var = {"data": array([0]), - "units": ""} - else: - nc_var = None - - return nc_var - - def _get_coordinate_values(self, coordinate_info, coordinate_axis, bounds=False): - """ - Get the coordinate data of the current portion. - - Parameters - ---------- - coordinate_info : dict, list - Dictionary with the "data" key with the coordinate variable values. and the attributes as other keys. - coordinate_axis : str - Name of the coordinate to extract. Accepted values: ["Z", "Y", "X"]. - bounds : bool - Boolean variable to know if there are coordinate bounds. - Returns - ------- - values : dict - Dictionary with the portion of data corresponding to the rank. - """ - - if coordinate_info is None: - return None - - if not isinstance(coordinate_info, dict): - values = {"data": deepcopy(coordinate_info)} - else: - values = deepcopy(coordinate_info) - - coordinate_len = len(values["data"].shape) - if bounds: - coordinate_len -= 1 - - if coordinate_axis == "Y": - if coordinate_len == 1: - values["data"] = values["data"][self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"]] - elif coordinate_len == 2: - values["data"] = values["data"][self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], - self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] - else: - raise NotImplementedError("The coordinate has wrong dimensions: {dim}".format( - dim=values["data"].shape)) - elif coordinate_axis == "X": - if coordinate_len == 1: - values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] - elif coordinate_len == 2: - values["data"] = values["data"][self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], - self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] - else: - raise NotImplementedError("The coordinate has wrong dimensions: {dim}".format( - dim=values["data"].shape)) - elif coordinate_axis == "Z": - if coordinate_len == 1: - values["data"] = values["data"][self.read_axis_limits["z_min"]:self.read_axis_limits["z_max"]] - else: - raise NotImplementedError("The coordinate has wrong dimensions: {dim}".format( - dim=values["data"].shape)) - - return values - - def _get_cell_measures_values(self, cell_measures_info): - """ - Get the cell measures data of the current portion. - - Parameters - ---------- - cell_measures_info : dict, list - Dictionary with the "data" key with the cell measures variable values. and the attributes as other keys. - - Returns - ------- - values : dict - Dictionary with the portion of data corresponding to the rank. - """ - - if cell_measures_info is None: - return None - - cell_measures_values = {} - - for cell_measures_var in cell_measures_info.keys(): - - values = deepcopy(cell_measures_info[cell_measures_var]) - coordinate_len = len(values["data"].shape) - - if coordinate_len == 1: - values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] - elif coordinate_len == 2: - values["data"] = values["data"][self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], - self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] - else: - raise NotImplementedError("The coordinate has wrong dimensions: {dim}".format( - dim=values["data"].shape)) - - cell_measures_values[cell_measures_var] = values - - return cell_measures_values - - def _get_lazy_variables(self): - """ - Get all the variables' information. - - Returns - ------- - variables : dict - Dictionary with the variable name as key and another dictionary as value. - De value dictionary will have the "data" key with None as value and all the variable attributes as the - other keys. - e.g. - {"var_name_1": {"data": None, "attr_1": value_1_1, "attr_2": value_1_2, ...}, - "var_name_2": {"data": None, "attr_1": value_2_1, "attr_2": value_2_2, ...}, - ...} - """ - - if self.master: - variables = {} - # Initialise data - for var_name, var_info in self.dataset.variables.items(): - variables[var_name] = {} - variables[var_name]["data"] = None - variables[var_name]["dimensions"] = var_info.dimensions - variables[var_name]["dtype"] = var_info.dtype - if variables[var_name]["dtype"] in [str, object]: - if self.strlen is None: - self.set_strlen() - variables[var_name]["dtype"] = str - - # Avoid some attributes - for attrname in var_info.ncattrs(): - if attrname not in ["missing_value", "_FillValue", "add_offset", "scale_factor"]: - value = getattr(var_info, attrname) - if str(value) in ["unitless", "-"]: - value = "" - variables[var_name][attrname] = value - else: - variables = None - variables = self.comm.bcast(variables, root=0) - - return variables - - def _read_variable(self, var_name): - """ - Read the corresponding variable data according to the current rank. - - Parameters - ---------- - var_name : str - Name of the variable to read. - - Returns - ------- - data: array - Portion of the variable data corresponding to the rank. - """ - - nc_var = self.dataset.variables[var_name] - var_dims = nc_var.dimensions - - # Read data in 4 dimensions - if len(var_dims) < 2: - data = nc_var[:] - elif len(var_dims) == 2: - data = nc_var[self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], - self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] - data = data.reshape(1, 1, data.shape[-2], data.shape[-1]) - elif len(var_dims) == 3: - if "strlen" in var_dims: - data = nc_var[self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], - self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], - :] - data_aux = empty(shape=(data.shape[0], data.shape[1]), dtype=object) - for lat_n in range(data.shape[0]): - for lon_n in range(data.shape[1]): - data_aux[lat_n, lon_n] = "".join( - data[lat_n, lon_n].tobytes().decode("ascii").replace("\x00", "")) - data = data_aux.reshape((1, 1, data_aux.shape[-2], data_aux.shape[-1])) - else: - data = nc_var[self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], - self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], - self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] - data = data.reshape(data.shape[-3], 1, data.shape[-2], data.shape[-1]) - elif len(var_dims) == 4: - data = nc_var[self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], - self.read_axis_limits["z_min"]:self.read_axis_limits["z_max"], - self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], - self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] - elif len(var_dims) == 5: - if "strlen" in var_dims: - data = nc_var[self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], - self.read_axis_limits["z_min"]:self.read_axis_limits["z_max"], - self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], - self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], - :] - data_aux = empty(shape=(data.shape[0], data.shape[1], data.shape[2], data.shape[3]), dtype=object) - for time_n in range(data.shape[0]): - for lev_n in range(data.shape[1]): - for lat_n in range(data.shape[2]): - for lon_n in range(data.shape[3]): - data_aux[time_n, lev_n, lat_n, lon_n] = "".join( - data[time_n, lev_n, lat_n, lon_n].tobytes().decode("ascii").replace("\x00", "")) - data = data_aux - else: - raise NotImplementedError("Error with {0}. Only can be read netCDF with 4 dimensions or less".format( - var_name)) - else: - raise NotImplementedError("Error with {0}. Only can be read netCDF with 4 dimensions or less".format( - var_name)) - - # Unmask array - data = self._unmask_array(data) - - return data - - def load(self, var_list=None): - """ - Load of the selected variables. - - That function will fill the variable "data" key with the corresponding values. - - Parameters - ---------- - var_list : List, str, None - List (or single string) of the variables to be loaded. - """ - - if (self.__ini_path is None) and (self.dataset is None): - raise RuntimeError("Only data from existing files can be loaded.") - - if self.dataset is None: - self.__open_netcdf4() - close = True - else: - close = False - - if isinstance(var_list, str): - var_list = [var_list] - elif var_list is None: - var_list = list(self.variables.keys()) - - for i, var_name in enumerate(var_list): - if self.info: - print("Rank {0:03d}: Loading {1} var ({2}/{3})".format(self.rank, var_name, i + 1, len(var_list))) - if self.variables[var_name]["data"] is None: - self.variables[var_name]["data"] = self._read_variable(var_name) - # Data type changes when joining characters in read_variable (S1 to S+strlen) - if "strlen" in self.variables[var_name]["dimensions"]: - if self.strlen is None: - self.set_strlen() - self.variables[var_name]["dtype"] = str - self.variables[var_name]["dimensions"] = tuple([x for x in self.variables[var_name]["dimensions"] - if x != "strlen"]) - else: - if self.master: - print("Data for {0} was previously loaded. Skipping variable.".format(var_name)) - if self.info: - print("Rank {0:03d}: Loaded {1} var ({2})".format( - self.rank, var_name, self.variables[var_name]["data"].shape)) - - if close: - self.close() - - return None - - @staticmethod - def _unmask_array(data): - """ - Missing to nan. This operation is done because sometimes the missing value is lost during the calculation. - - Parameters - ---------- - data : array - Masked array to unmask. - - Returns - ------- - array - Unmasked array. - """ - - if isinstance(data, ma.MaskedArray): - try: - data = data.filled(nan) - except TypeError: - msg = "Data missing values cannot be converted to nan." - warn(msg) - sys.stderr.flush() - - return data - - def to_dtype(self, data_type="float32"): - """ Cast variables data into selected data type. - - Parameters - ---------- - data_type : str or Type - Data type, by default "float32" - """ - - for var_name, var_info in self.variables.items(): - if isinstance(var_info["data"], ndarray): - self.variables[var_name]["data"] = self.variables[var_name]["data"].astype(data_type) - self.variables[var_name]["dtype"] = data_type - - return None - - def concatenate(self, aux_nessy): - """ - Concatenate different variables into the same NES object. - - Parameters - ---------- - aux_nessy : Nes, str - Nes object or str with the path to the NetCDF file that contains the variables to add. - - Returns - ------- - list - A List of var names added. - """ - - if isinstance(aux_nessy, str): - aux_nessy = self.new(path=aux_nessy, comm=self.comm, parallel_method=self.parallel_method, - avoid_first_hours=self.hours_start, avoid_last_hours=self.hours_end, - first_level=self.first_level, last_level=self.last_level) - new = True - else: - new = False - for var_name, var_info in aux_nessy.variables.items(): - if var_info["data"] is None: - aux_nessy.read_axis_limits = self.read_axis_limits - aux_nessy.load(var_name) - - new_vars_added = [] - for new_var_name, new_var_data in aux_nessy.variables.items(): - if new_var_name not in self.variables.keys(): - self.variables[new_var_name] = deepcopy(new_var_data) - new_vars_added.append(new_var_name) - - if new: - del aux_nessy - - return new_vars_added - - def __get_global_attributes(self, create_nes=False): - """ - Read the netcdf global attributes. - - Parameters - ---------- - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - - Returns - ------- - gl_attrs : dict - Dictionary with the netCDF global attributes. - """ - - gl_attrs = {} - - if not create_nes: - for attrname in self.dataset.ncattrs(): - gl_attrs[attrname] = getattr(self.dataset, attrname) - - return gl_attrs - - # ================================================================================================================== - # Writing - # ================================================================================================================== - - def _get_write_axis_limits(self): - """ - Calculate the 4D writing axis limits depending on if them have to balanced or not. - - Returns - ------- - dict - Dictionary with the 4D limits of the rank data to write. - t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max. - """ - - if self.balanced: - return self._get_write_axis_limits_balanced() - else: - return self._get_write_axis_limits_unbalanced() - - def _get_write_axis_limits_unbalanced(self): - """ - Calculate the 4D writing axis limits. - - Returns - ------- - dict - Dictionary with the 4D limits of the rank data to write. - t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max. - """ - - axis_limits = {"x_min": None, "x_max": None, - "y_min": None, "y_max": None, - "z_min": None, "z_max": None, - "t_min": None, "t_max": None} - my_shape = self.get_full_shape() - if self.parallel_method == "Y": - y_len = my_shape[0] - axis_limits["y_min"] = (y_len // self.size) * self.rank - if self.rank + 1 < self.size: - axis_limits["y_max"] = (y_len // self.size) * (self.rank + 1) - elif self.parallel_method == "X": - x_len = my_shape[-1] - axis_limits["x_min"] = (x_len // self.size) * self.rank - if self.rank + 1 < self.size: - axis_limits["x_max"] = (x_len // self.size) * (self.rank + 1) - elif self.parallel_method == "T": - t_len = len(self.get_full_times()) - axis_limits["t_min"] = ((t_len // self.size) * self.rank) - if self.rank + 1 < self.size: - axis_limits["t_max"] = (t_len // self.size) * (self.rank + 1) - else: - raise NotImplementedError("Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( - meth=self.parallel_method, accept=["X", "Y", "T"])) - - return axis_limits - - def _get_write_axis_limits_balanced(self): - """ - Calculate the 4D reading balanced axis limits. - - Returns - ------- - dict - Dictionary with the 4D limits of the rank data to read. - t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max. - """ - my_shape = self.get_full_shape() - fid_dist = {} - if self.parallel_method == "Y": - len_to_split = my_shape[0] - min_axis = "y_min" - max_axis = "y_max" - elif self.parallel_method == "X": - len_to_split = my_shape[-1] - min_axis = "x_min" - max_axis = "x_max" - elif self.parallel_method == "T": - len_to_split = len(self.get_full_times()) - min_axis = "t_min" - max_axis = "t_max" - else: - raise NotImplementedError("Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( - meth=self.parallel_method, accept=["X", "Y", "T"])) - - procs_len = len_to_split // self.size - procs_rows_extended = len_to_split - (procs_len * self.size) - - rows_sum = 0 - for proc in range(self.size): - fid_dist[proc] = {"x_min": 0, "x_max": None, - "y_min": 0, "y_max": None, - "z_min": 0, "z_max": None, - "t_min": 0, "t_max": None} - if proc < procs_rows_extended: - aux_rows = procs_len + 1 - else: - aux_rows = procs_len - - len_to_split -= aux_rows - if len_to_split < 0: - rows = len_to_split + aux_rows - else: - rows = aux_rows - - fid_dist[proc][min_axis] = rows_sum - fid_dist[proc][max_axis] = rows_sum + rows - - # Last element - if len_to_split == 0: - fid_dist[proc][max_axis] = None - - rows_sum += rows - - axis_limits = fid_dist[self.rank] - - return axis_limits - - def _create_dimensions(self, netcdf): - """ - Create "time", "time_bnds", "lev", "lon" and "lat" dimensions. - - Parameters - ---------- - netcdf : Dataset - netcdf4-python open dataset. - """ - - # Create time dimension - netcdf.createDimension("time", None) - - # Create time_nv (number of vertices) dimension - full_time_bnds = self.get_full_time_bnds() - if full_time_bnds is not None: - netcdf.createDimension("time_nv", 2) - - # Create lev, lon and lat dimensions - netcdf.createDimension("lev", len(self.lev["data"])) - - # Create string length dimension - if self.strlen is not None: - netcdf.createDimension("strlen", self.strlen) - - return None - - def _create_dimension_variables(self, netcdf): - """ - Create the "time", "time_bnds", "lev", "lat", "lat_bnds", "lon" and "lon_bnds" variables. - - Parameters - ---------- - netcdf : Dataset - netcdf4-python open dataset. - """ - - self._create_dimension_variables_64(netcdf) - - return None - - def _create_dimension_variables_32(self, netcdf): - """ - Create the "time", "time_bnds", "lev", "lat", "lat_bnds", "lon" and "lon_bnds" variables. - - Parameters - ---------- - netcdf : Dataset - netcdf4-python open dataset. - """ - - # TIMES - full_time = self.get_full_times() - full_time_bnds = self.get_full_time_bnds() - time_var = netcdf.createVariable("time", float32, ("time",), zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - time_var.units = "{0} since {1}".format(self._time_resolution, full_time[0].strftime("%Y-%m-%d %H:%M:%S")) - time_var.standard_name = "time" - time_var.calendar = "standard" - time_var.long_name = "time" - if full_time_bnds is not None: - if self._climatology: - time_var.climatology = self._climatology_var_name - else: - time_var.bounds = "time_bnds" - if self.size > 1: - time_var.set_collective(True) - time_var[:] = date2num(full_time[:], time_var.units, time_var.calendar) - - # TIME BOUNDS - if full_time_bnds is not None: - if self._climatology: - time_bnds_var = netcdf.createVariable(self._climatology_var_name, float64, ("time", "time_nv",), - zlib=self.zip_lvl, complevel=self.zip_lvl) - else: - time_bnds_var = netcdf.createVariable("time_bnds", float64, ("time", "time_nv",), - zlib=self.zip_lvl, complevel=self.zip_lvl) - if self.size > 1: - time_bnds_var.set_collective(True) - time_bnds_var[:] = date2num(full_time_bnds, time_var.units, calendar="standard") - - # LEVELS - full_lev = self.get_full_levels() - lev = netcdf.createVariable("lev", float32, ("lev",), - zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - if "units" in full_lev.keys(): - lev.units = full_lev["units"] - else: - lev.units = "" - if "positive" in full_lev.keys(): - lev.positive = full_lev["positive"] - - if self.size > 1: - lev.set_collective(True) - lev[:] = array(full_lev["data"], dtype=float32) - - # LATITUDES - full_lat = self.get_full_latitudes() - full_lat_bnds = self.get_full_latitudes_boundaries() - lat = netcdf.createVariable("lat", float32, self._lat_dim, - zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - lat.units = "degrees_north" - lat.axis = "Y" - lat.long_name = "latitude coordinate" - lat.standard_name = "latitude" - if full_lat_bnds is not None: - lat.bounds = "lat_bnds" - if self.size > 1: - lat.set_collective(True) - lat[:] = array(full_lat["data"], dtype=float32) - - # LATITUDES BOUNDS - if full_lat_bnds is not None: - lat_bnds_var = netcdf.createVariable("lat_bnds", float32, - self._lat_dim + ("spatial_nv",), - zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - if self.size > 1: - lat_bnds_var.set_collective(True) - lat_bnds_var[:] = array(full_lat_bnds["data"], dtype=float32) - - # LONGITUDES - full_lon = self.get_full_longitudes() - full_lon_bnds = self.get_full_longitudes_boundaries() - lon = netcdf.createVariable("lon", float32, self._lon_dim, - zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - lon.units = "degrees_east" - lon.axis = "X" - lon.long_name = "longitude coordinate" - lon.standard_name = "longitude" - if full_lon_bnds is not None: - lon.bounds = "lon_bnds" - if self.size > 1: - lon.set_collective(True) - lon[:] = array(full_lon["data"], dtype=float32) - - # LONGITUDES BOUNDS - if full_lon_bnds is not None: - lon_bnds_var = netcdf.createVariable("lon_bnds", float32, - self._lon_dim + ("spatial_nv",), - zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - if self.size > 1: - lon_bnds_var.set_collective(True) - lon_bnds_var[:] = array(full_lon_bnds["data"], dtype=float32) - - return None - - def _create_dimension_variables_64(self, netcdf): - """ - Create the "time", "time_bnds", "lev", "lat", "lat_bnds", "lon" and "lon_bnds" variables. - - Parameters - ---------- - netcdf : Dataset - netcdf4-python open dataset. - """ - - # TIMES - full_time = self.get_full_times() - full_time_bnds = self.get_full_time_bnds() - time_var = netcdf.createVariable("time", float64, ("time",), zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - time_var.units = "{0} since {1}".format(self._time_resolution, full_time[0].strftime("%Y-%m-%d %H:%M:%S")) - time_var.standard_name = "time" - time_var.calendar = "standard" - time_var.long_name = "time" - if full_time_bnds is not None: - if self._climatology: - time_var.climatology = self._climatology_var_name - else: - time_var.bounds = "time_bnds" - if self.size > 1: - time_var.set_collective(True) - time_var[:] = date2num(full_time[:], time_var.units, time_var.calendar) - - # TIME BOUNDS - if full_time_bnds is not None: - if self._climatology: - time_bnds_var = netcdf.createVariable(self._climatology_var_name, float64, ("time", "time_nv",), - zlib=self.zip_lvl, complevel=self.zip_lvl) - else: - time_bnds_var = netcdf.createVariable("time_bnds", float64, ("time", "time_nv",), - zlib=self.zip_lvl, complevel=self.zip_lvl) - if self.size > 1: - time_bnds_var.set_collective(True) - time_bnds_var[:] = date2num(full_time_bnds, time_var.units, calendar="standard") - - # LEVELS - full_lev = self.get_full_levels() - lev = netcdf.createVariable("lev", full_lev["data"].dtype, ("lev",), - zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - if "units" in full_lev.keys(): - lev.units = full_lev["units"] - else: - lev.units = "" - if "positive" in full_lev.keys(): - lev.positive = full_lev["positive"] - - if self.size > 1: - lev.set_collective(True) - lev[:] = full_lev["data"] - - # LATITUDES - full_lat = self.get_full_latitudes() - full_lat_bnds = self.get_full_latitudes_boundaries() - lat = netcdf.createVariable("lat", full_lat["data"].dtype, self._lat_dim, - zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - lat.units = "degrees_north" - lat.axis = "Y" - lat.long_name = "latitude coordinate" - lat.standard_name = "latitude" - if full_lat_bnds is not None: - lat.bounds = "lat_bnds" - if self.size > 1: - lat.set_collective(True) - lat[:] = full_lat["data"] - - # LATITUDES BOUNDS - if full_lat_bnds is not None: - lat_bnds_var = netcdf.createVariable("lat_bnds", full_lat_bnds["data"].dtype, - self._lat_dim + ("spatial_nv",), - zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - if self.size > 1: - lat_bnds_var.set_collective(True) - lat_bnds_var[:] = full_lat_bnds["data"] - - # LONGITUDES - full_lon = self.get_full_longitudes() - full_lon_bnds = self.get_full_longitudes_boundaries() - lon = netcdf.createVariable("lon", full_lon["data"].dtype, self._lon_dim, - zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - lon.units = "degrees_east" - lon.axis = "X" - lon.long_name = "longitude coordinate" - lon.standard_name = "longitude" - if full_lon_bnds is not None: - lon.bounds = "lon_bnds" - if self.size > 1: - lon.set_collective(True) - lon[:] = full_lon["data"] - - # LONGITUDES BOUNDS - if full_lon_bnds is not None: - lon_bnds_var = netcdf.createVariable("lon_bnds", full_lon_bnds["data"].dtype, - self._lon_dim + ("spatial_nv",), - zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - if self.size > 1: - lon_bnds_var.set_collective(True) - lon_bnds_var[:] = full_lon_bnds["data"] - - return None - - def _create_cell_measures(self, netcdf): - - # CELL AREA - if "cell_area" in self.cell_measures.keys(): - cell_area = netcdf.createVariable("cell_area", self.cell_measures["cell_area"]["data"].dtype, self._var_dim, - zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - if self.size > 1: - cell_area.set_collective(True) - cell_area[self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], - self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] = \ - self.cell_measures["cell_area"]["data"] - - cell_area.long_name = "area of grid cell" - cell_area.standard_name = "cell_area" - cell_area.units = "m2" - - for var_name in self.variables.keys(): - self.variables[var_name]["cell_measures"] = "area: cell_area" - - if self.info: - print("Rank {0:03d}: Cell measures done".format(self.rank)) - return None - - def _str2char(self, data): - - if self.strlen is None: - msg = "String data could not be converted into chars while writing." - msg += " Please, set the maximum string length (set_strlen) before writing." - raise RuntimeError(msg) - - # Get final shape by adding strlen at the end - data_new_shape = data.shape + (self.strlen, ) - - # nD (2D, 3D, 4D) data as 1D string array - data = data.flatten() - - # Split strings into chars (S1) - data_aux = stringtochar(array([v.encode("ascii", "ignore") for v in data]).astype("S" + str(self.strlen))) - data_aux = data_aux.reshape(data_new_shape) - - return data_aux - - def _create_variables(self, netcdf, chunking=False): - """ - Create the netCDF file variables. - - Parameters - ---------- - netcdf : Dataset - netcdf4-python open dataset. - chunking : bool - Indicates if you want to chunk the output netCDF. - """ - - for i, (var_name, var_dict) in enumerate(self.variables.items()): - if isinstance(var_dict["data"], int) and var_dict["data"] == 0: - var_dims = ("time", "lev",) + self._var_dim - var_dtype = float32 - else: - # Get dimensions - if (var_dict["data"] is None) or (len(var_dict["data"].shape) == 4): - var_dims = ("time", "lev",) + self._var_dim - else: - var_dims = self._var_dim - - # Get data type - if "dtype" in var_dict.keys(): - var_dtype = var_dict["dtype"] - if (var_dict["data"] is not None) and (var_dtype != var_dict["data"].dtype): - msg = "WARNING!!! " - msg += "Different data types for variable {0}. ".format(var_name) - msg += "Input dtype={0}. Data dtype={1}.".format(var_dtype, var_dict["data"].dtype) - warn(msg) - sys.stderr.flush() - try: - var_dict["data"] = var_dict["data"].astype(var_dtype) - except Exception as e: # TODO: Detect exception - print(e) - raise TypeError("It was not possible to cast the data to the input dtype.") - else: - var_dtype = var_dict["data"].dtype - if var_dtype is object: - raise TypeError("Data dtype is object. Define dtype explicitly as dictionary key 'dtype'") - - if var_dict["data"] is not None: - - # Ensure data is of type numpy array (to create NES) - if not isinstance(var_dict["data"], (ndarray, generic)): - try: - var_dict["data"] = array(var_dict["data"]) - except AttributeError: - raise AttributeError("Data for variable {0} must be a numpy array.".format(var_name)) - - # Convert list of strings to chars for parallelization - if issubdtype(var_dtype, character): - var_dict["data_aux"] = self._str2char(var_dict["data"]) - var_dims += ("strlen",) - var_dtype = "S1" - - if self.info: - print("Rank {0:03d}: Writing {1} var ({2}/{3})".format( - self.rank, var_name, i + 1, len(self.variables))) - - if not chunking: - var = netcdf.createVariable(var_name, var_dtype, var_dims, - zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - else: - if self.balanced: - raise NotImplementedError("A balanced data cannot be chunked.") - if self.master: - chunk_size = var_dict["data"].shape - else: - chunk_size = None - chunk_size = self.comm.bcast(chunk_size, root=0) - var = netcdf.createVariable(var_name, var_dtype, var_dims, - zlib=self.zip_lvl > 0, complevel=self.zip_lvl, - chunksizes=chunk_size) - if self.info: - print("Rank {0:03d}: Var {1} created ({2}/{3})".format( - self.rank, var_name, i + 1, len(self.variables))) - if self.size > 1: - var.set_collective(True) - if self.info: - print("Rank {0:03d}: Var {1} collective ({2}/{3})".format( - self.rank, var_name, i + 1, len(self.variables))) - - for att_name, att_value in var_dict.items(): - if att_name == "data": - if att_value is not None: - if self.info: - print("Rank {0:03d}: Filling {1}".format(self.rank, var_name)) - if "data_aux" in var_dict.keys(): - att_value = var_dict["data_aux"] - if isinstance(att_value, int) and att_value == 0: - var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], - self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], - self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = 0 - - elif len(att_value.shape) == 5: - if "strlen" in var_dims: - var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], - self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], - self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], - :] = att_value - else: - raise NotImplementedError("It is not possible to write 5D variables.") - - elif len(att_value.shape) == 4: - var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], - self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], - self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value - - elif len(att_value.shape) == 3: - if "strlen" in var_dims: - var[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], - :] = att_value - else: - raise NotImplementedError("It is not possible to write 3D variables.") - - if self.info: - print("Rank {0:03d}: Var {1} data ({2}/{3})".format( - self.rank, var_name, i + 1, len(self.variables))) - - elif att_name not in ["chunk_size", "var_dims", "dimensions", "dtype", "data_aux"]: - var.setncattr(att_name, att_value) - - if "data_aux" in var_dict.keys(): - del var_dict["data_aux"] - - self._set_var_crs(var) - if self.info: - print("Rank {0:03d}: Var {1} completed ({2}/{3})".format( - self.rank, var_name, i + 1, len(self.variables))) - - return None - - def append_time_step_data(self, i_time, out_format="DEFAULT"): - """ - Fill the netCDF data for the indicated index time. - - Parameters - ---------- - i_time : int - index of the time step to write - out_format : str - Indicates the output format type to change the units (if needed) - """ - if self.serial_nc is not None: - try: - data = self._gather_data(self.variables) - except KeyError: - # Key Error means string data - data = self.__gather_data_py_object(self.variables) - if self.master: - self.serial_nc.variables = data - self.serial_nc.append_time_step_data(i_time, out_format=out_format) - self.comm.Barrier() - else: - if out_format == "MONARCH": - self.variables = to_monarch_units(self) - elif out_format == "CMAQ": - self.variables = to_cmaq_units(self) - elif out_format == "WRF_CHEM": - self.variables = to_wrf_chem_units(self) - for i, (var_name, var_dict) in enumerate(self.variables.items()): - for att_name, att_value in var_dict.items(): - if att_name == "data": - - if att_value is not None: - if self.info: - print("Rank {0:03d}: Filling {1}".format(self.rank, var_name)) - var = self.dataset.variables[var_name] - if isinstance(att_value, int) and att_value == 0: - var[i_time, - self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], - self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = 0 - elif len(att_value.shape) == 4: - var[i_time, - self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], - self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value - - elif len(att_value.shape) == 3: - raise NotImplementedError("It is not possible to write 3D variables.") - else: - raise NotImplementedError("SHAPE APPEND ERROR: {0}".format(att_value.shape)) - if self.info: - print("Rank {0:03d}: Var {1} data ({2}/{3})".format( - self.rank, var_name, i + 1, len(self.variables))) - else: - raise ValueError("Cannot append None Data for {0}".format(var_name)) - else: - # Metadata already writen - pass - - return None - - def _create_centre_coordinates(self, **kwargs): - """ - Calculate centre latitudes and longitudes from grid details. - - Must be implemented on inner classes - - Returns - ---------- - centre_lat : dict - Dictionary with data of centre latitudes in 1D - centre_lon : dict - Dictionary with data of centre longitudes in 1D - """ - - return None - - def _create_metadata(self, netcdf): - """ - Must be implemented on inner class. - """ - - return None - - @staticmethod - def _set_var_crs(var): - """ - Must be implemented on inner class. - - Parameters - ---------- - var : Variable - netCDF4-python variable object. - """ - - return None - - def __to_netcdf_py(self, path, chunking=False, keep_open=False): - """ - Create the NetCDF using netcdf4-python methods. - - Parameters - ---------- - path : str - Path to the output netCDF file. - chunking: bool - Indicates if you want to chunk the output netCDF. - keep_open : bool - Indicates if you want to keep open the NetCDH to fill the data by time-step - """ - - # Open NetCDF - if self.info: - print("Rank {0:03d}: Creating {1}".format(self.rank, path)) - if self.size > 1: - netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=True, comm=self.comm, info=MPI.Info()) - else: - netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=False) - if self.info: - print("Rank {0:03d}: NetCDF ready to write".format(self.rank)) - - # Create dimensions - self._create_dimensions(netcdf) - - # Create dimension variables - self._create_dimension_variables(netcdf) - if self.info: - print("Rank {0:03d}: Dimensions done".format(self.rank)) - - # Create cell measures - self._create_cell_measures(netcdf) - - # Create variables - self._create_variables(netcdf, chunking=chunking) - - # Create metadata - self._create_metadata(netcdf) - - # Close NetCDF - if self.global_attrs is not None: - for att_name, att_value in self.global_attrs.items(): - netcdf.setncattr(att_name, att_value) - netcdf.setncattr("Conventions", "CF-1.7") - - if keep_open: - self.dataset = netcdf - else: - netcdf.close() - - return None - - def __to_netcdf_cams_ra(self, path): - return to_netcdf_cams_ra(self, path) - - def to_netcdf(self, path, compression_level=0, serial=False, info=False, chunking=False, nc_type="NES", - keep_open=False): - """ - Write the netCDF output file. - - Parameters - ---------- - path : str - Path to the output netCDF file. - compression_level : int - Level of compression (0 to 9) Default: 0 (no compression). - serial : bool - Indicates if you want to write in serial or not. Default: False. - info : bool - Indicates if you want to print the information of each writing step by stdout Default: False. - chunking : bool - Indicates if you want a chunked netCDF output. Only available with non-serial writes. Default: False. - nc_type : str - Type to NetCDf to write. "CAMS_RA" or "NES" - keep_open : bool - Indicates if you want to keep open the NetCDH to fill the data by time-step - """ - nc_type = nc_type - old_info = self.info - self.info = info - self.serial_nc = None - self.zip_lvl = compression_level - - # if serial: - if serial and self.size > 1: - try: - data = self._gather_data(self.variables) - except KeyError: - data = self.__gather_data_py_object(self.variables) - try: - c_measures = self._gather_data(self.cell_measures) - except KeyError: - c_measures = self.__gather_data_py_object(self.cell_measures) - if self.master: - new_nc = self.copy(copy_vars=False) - new_nc.set_communicator(MPI.COMM_SELF) - new_nc.variables = data - new_nc.cell_measures = c_measures - if nc_type in ["NES", "DEFAULT"]: - new_nc.__to_netcdf_py(path, keep_open=keep_open) - elif nc_type == "CAMS_RA": - new_nc.__to_netcdf_cams_ra(path) - elif nc_type == "MONARCH": - to_netcdf_monarch(new_nc, path, chunking=chunking, keep_open=keep_open) - elif nc_type == "CMAQ": - to_netcdf_cmaq(new_nc, path, keep_open=keep_open) - elif nc_type == "WRF_CHEM": - to_netcdf_wrf_chem(new_nc, path, keep_open=keep_open) - else: - msg = f"Unknown NetCDF type '{nc_type}'. " - msg += "Use CAMS_RA, MONARCH or NES (or DEFAULT)" - raise ValueError(msg) - self.serial_nc = new_nc - else: - self.serial_nc = True - else: - if nc_type in ["NES", "DEFAULT"]: - self.__to_netcdf_py(path, chunking=chunking, keep_open=keep_open) - elif nc_type == "CAMS_RA": - self.__to_netcdf_cams_ra(path) - elif nc_type == "MONARCH": - to_netcdf_monarch(self, path, chunking=chunking, keep_open=keep_open) - elif nc_type == "CMAQ": - to_netcdf_cmaq(self, path, keep_open=keep_open) - elif nc_type == "WRF_CHEM": - to_netcdf_wrf_chem(self, path, keep_open=keep_open) - else: - msg = f"Unknown NetCDF type '{nc_type}''. " - msg += "Use CAMS_RA, MONARCH or NES (or DEFAULT)" - raise ValueError(msg) - - self.info = old_info - - return None - - def __to_grib2(self, path, grib_keys, grib_template_path, lat_flip=True, info=False): - """ - Private method to write output file with grib2 format. - - Parameters - ---------- - path : str - Path to the output file. - grib_keys : dict - Dictionary with the grib2 keys. - grib_template_path : str - Path to the grib2 file to use as template. - info : bool - Indicates if you want to print extra information during the process. - """ - - from eccodes import codes_grib_new_from_file - from eccodes import codes_keys_iterator_new - from eccodes import codes_keys_iterator_next - from eccodes import codes_keys_iterator_get_name - from eccodes import codes_get_string - from eccodes import codes_keys_iterator_delete - from eccodes import codes_clone - from eccodes import codes_set - from eccodes import codes_set_values - from eccodes import codes_write - from eccodes import codes_release - - fout = open(path, "wb") - - # read template - fin = open(grib_template_path, "rb") - - gid = codes_grib_new_from_file(fin) - if gid is None: - sys.exit(1) - - iterid = codes_keys_iterator_new(gid, "ls") - while codes_keys_iterator_next(iterid): - keyname = codes_keys_iterator_get_name(iterid) - keyval = codes_get_string(gid, keyname) - if info: - print("%s = %s" % (keyname, keyval)) - - codes_keys_iterator_delete(iterid) - for var_name, var_info in self.variables.items(): - for i_time, time in enumerate(self.time): - for i_lev, lev in enumerate(self.lev["data"]): - clone_id = codes_clone(gid) - - # Adding grib2 keys to file - for key, value in grib_keys.items(): - if value not in ["", "None", None, nan]: - try: - codes_set(clone_id, key, value) - except Exception as e: - print(f"Something went wrong while writing the Grib key '{key}': {value}") - raise e - - # Time dependent keys - if "dataTime" in grib_keys.keys() and grib_keys["dataTime"] in ["", "None", None, nan]: - codes_set(clone_id, "dataTime", int(i_time * 100)) - if "stepRange" in grib_keys.keys() and grib_keys["stepRange"] in ["", "None", None, nan]: - n_secs = (time - self.get_full_times()[0]).total_seconds() - codes_set(clone_id, "stepRange", int(n_secs // 3600)) - if "forecastTime" in grib_keys.keys() and grib_keys["forecastTime"] in ["", "None", None, nan]: - n_secs = (time - self.get_full_times()[0]).total_seconds() - codes_set(clone_id, "forecastTime", int(n_secs)) - - # Level dependent keys - if "typeOfFirstFixedSurface" in grib_keys.keys() and \ - grib_keys["typeOfFirstFixedSurface"] in ["", "None", None, nan]: - if float(lev) == 0: - codes_set(clone_id, "typeOfFirstFixedSurface", 1) - # grib_keys["typeOfFirstFixedSurface"] = 1 - else: - codes_set(clone_id, "typeOfFirstFixedSurface", 103) - # grib_keys["typeOfFirstFixedSurface"] = 103 - if "level" in grib_keys.keys() and grib_keys["level"] in ["", "None", None, nan]: - codes_set(clone_id, "level", float(lev)) - - newval = var_info["data"][i_time, i_lev, :, :] - if lat_flip: - newval = flipud(newval) - - # TODO Check default NaN Value - newval[isnan(newval)] = 0. - - codes_set_values(clone_id, array(newval.ravel(), dtype="float64")) - codes_write(clone_id, fout) - del newval - codes_release(gid) - fout.close() - fin.close() - - return None - - def to_grib2(self, path, grib_keys, grib_template_path, lat_flip=True, info=False): - """ - Write output file with grib2 format. - - Parameters - ---------- - path : str - Path to the output file. - grib_keys : dict - Dictionary with the grib2 keys. - grib_template_path : str - Path to the grib2 file to use as template. - lat_flip : bool - Indicates if the latitude values (and data) has to be flipped - info : bool - Indicates if you want to print extra information during the process. - """ - - # if serial: - if self.parallel_method in ["X", "Y"] and self.size > 1: - try: - data = self._gather_data(self.variables) - except KeyError: - data = self.__gather_data_py_object(self.variables) - try: - c_measures = self._gather_data(self.cell_measures) - except KeyError: - c_measures = self.__gather_data_py_object(self.cell_measures) - if self.master: - new_nc = self.copy(copy_vars=False) - new_nc.set_communicator(MPI.COMM_SELF) - new_nc.variables = data - new_nc.cell_measures = c_measures - new_nc.__to_grib2(path, grib_keys, grib_template_path, lat_flip=lat_flip, info=info) - else: - self.__to_grib2(path, grib_keys, grib_template_path, lat_flip=lat_flip, info=info) - - return None - - def create_shapefile(self): - """ - Create spatial GeoDataFrame (shapefile). - - Returns - ------- - shapefile : GeoPandasDataFrame - Shapefile dataframe. - """ - - if self.shapefile is None: - - if self.lat_bnds is None or self.lon_bnds is None: - self.create_spatial_bounds() - - # Reshape arrays to create geometry - aux_shape = (self.lat_bnds["data"].shape[0], self.lon_bnds["data"].shape[0], 4) - lon_bnds_aux = empty(aux_shape) - lon_bnds_aux[:, :, 0] = self.lon_bnds["data"][newaxis, :, 0] - lon_bnds_aux[:, :, 1] = self.lon_bnds["data"][newaxis, :, 1] - lon_bnds_aux[:, :, 2] = self.lon_bnds["data"][newaxis, :, 1] - lon_bnds_aux[:, :, 3] = self.lon_bnds["data"][newaxis, :, 0] - - lon_bnds = lon_bnds_aux - del lon_bnds_aux - - lat_bnds_aux = empty(aux_shape) - lat_bnds_aux[:, :, 0] = self.lat_bnds["data"][:, newaxis, 0] - lat_bnds_aux[:, :, 1] = self.lat_bnds["data"][:, newaxis, 0] - lat_bnds_aux[:, :, 2] = self.lat_bnds["data"][:, newaxis, 1] - lat_bnds_aux[:, :, 3] = self.lat_bnds["data"][:, newaxis, 1] - - lat_bnds = lat_bnds_aux - del lat_bnds_aux - - aux_b_lats = lat_bnds.reshape((lat_bnds.shape[0] * lat_bnds.shape[1], lat_bnds.shape[2])) - aux_b_lons = lon_bnds.reshape((lon_bnds.shape[0] * lon_bnds.shape[1], lon_bnds.shape[2])) - - # Create dataframe cointaining all polygons - geometry = [] - for i in range(aux_b_lons.shape[0]): - geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), - (aux_b_lons[i, 1], aux_b_lats[i, 1]), - (aux_b_lons[i, 2], aux_b_lats[i, 2]), - (aux_b_lons[i, 3], aux_b_lats[i, 3]), - (aux_b_lons[i, 0], aux_b_lats[i, 0])])) - - fids = self.get_fids() - gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=geometry, crs="EPSG:4326") - self.shapefile = gdf - - else: - gdf = self.shapefile - - return gdf - - def write_shapefile(self, path): - """ - Save spatial GeoDataFrame (shapefile). - - Parameters - ---------- - path : str - Path to the output file. - """ - - if self.shapefile is None: - raise ValueError("Shapefile was not created.") - - if self.size == 1: - # In serial, avoid gather - self.shapefile.to_file(path) - else: - # In parallel - data = self.comm.gather(self.shapefile, root=0) - if self.master: - data = concat(data) - data.to_file(path) - - return None - - def to_shapefile(self, path, time=None, lev=None, var_list=None, info=True): - """ - Create shapefile from NES data. - - 1. Create grid shapefile. - 2. Add variables to shapefile (as independent function). - 3. Write shapefile. - - Parameters - ---------- - path : str - Path to the output file. - time : datetime - Time stamp to select. - lev : int - Vertical level to select. - var_list : List, str, None - List (or single string) of the variables to be loaded and saved in the shapefile. - info: bool - Flag to allow/suppress warnings when the 'time' or 'lev' parameters are None. Default is True. - """ - - # If list is not defined, get all variables - if var_list is None: - var_list = list(self.variables.keys()) - else: - if isinstance(var_list, str): - var_list = [var_list] - - # Add warning for unloaded variables - unloaded_vars = [] - for var_name in var_list: - if self.variables[var_name]["data"] is None: - unloaded_vars.append(var_name) - if len(unloaded_vars) > 0: - raise ValueError("The variables {0} need to be loaded/created before using to_shapefile.".format( - unloaded_vars)) - - # Select first vertical level (if needed) - if lev is None: - if info: - msg = "No vertical level has been specified. The first one will be selected." - warn(msg) - sys.stderr.flush() - idx_lev = 0 - else: - if lev not in self.lev["data"]: - raise ValueError("Level {} is not available. Choose from {}".format(lev, self.lev["data"])) - idx_lev = lev - - # Select first time (if needed) - if time is None: - if info: - msg = "No time has been specified. The first one will be selected." - warn(msg) - sys.stderr.flush() - idx_time = 0 - else: - if time not in self.time: - raise ValueError("Time {} is not available. Choose from {}".format(time, self.time)) - idx_time = self.time.index(time) - - # Create shapefile - self.create_shapefile() - - # Load variables from original file and get data for selected time / level - self.add_variables_to_shapefile(var_list, idx_lev, idx_time) - - # Write shapefile - self.write_shapefile(path) - - return None - - def add_variables_to_shapefile(self, var_list, idx_lev=0, idx_time=0): - """ - Add variables data to shapefile. - - var_list : List or str - Variables to be loaded and saved in the shapefile. - idx_lev : int - Index of vertical level for which the data will be saved in the shapefile. - idx_time : int - Index of time for which the data will be saved in the shapefile. - """ - - for var_name in var_list: - self.shapefile[var_name] = self.variables[var_name]["data"][idx_time, idx_lev, :].ravel() - - return None - - def get_centroids_from_coordinates(self): - """ - Get centroids from geographical coordinates. - - Returns - ------- - centroids_gdf: GeoPandasDataFrame - Centroids dataframe. - """ - - # Get centroids from coordinates - centroids = [] - for lat_ind in range(0, len(self.lat["data"])): - for lon_ind in range(0, len(self.lon["data"])): - centroids.append(Point(self.lon["data"][lon_ind], - self.lat["data"][lat_ind])) - - # Create dataframe containing all points - fids = self.get_fids() - centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=centroids, crs="EPSG:4326") - - return centroids_gdf - - def __gather_data_py_object(self, data_to_gather): - """ - Gather all the variable data into the MPI rank 0 to perform a serial write. - - Returns - ------- - data_list: dict - Variables dictionary with all the data from all the ranks. - """ - - data_list = deepcopy(data_to_gather) - for var_name in data_list.keys(): - try: - # noinspection PyArgumentList - data_aux = self.comm.gather(data_list[var_name]["data"], root=0) - if self.rank == 0: - shp_len = len(data_list[var_name]["data"].shape) - add_dimension = False # to Add a dimension - if self.parallel_method == "Y": - if shp_len == 2: - # if is a 2D concatenate over first axis - axis = 0 - elif shp_len == 3: - # if is a 3D concatenate over second axis - axis = 1 - else: - # if is a 4D concatenate over third axis - axis = 2 - elif self.parallel_method == "X": - if shp_len == 2: - # if is a 2D concatenate over second axis - axis = 1 - elif shp_len == 3: - # if is a 3D concatenate over third axis - axis = 2 - else: - # if is a 4D concatenate over forth axis - axis = 3 - elif self.parallel_method == "T": - if shp_len == 2: - # if is a 2D add dimension - add_dimension = True - axis = None # Not used - elif shp_len == 3: - # if is a 3D concatenate over first axis - axis = 0 - else: - # if is a 4D concatenate over second axis - axis = 0 - else: - raise NotImplementedError( - "Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( - meth=self.parallel_method, accept=["X", "Y", "T"])) - if add_dimension: - data_list[var_name]["data"] = stack(data_aux) - else: - data_list[var_name]["data"] = concatenate(data_aux, axis=axis) - except Exception as e: - msg = f"**ERROR** an error has occurred while gathering the '{var_name}' variable.\n" - print(msg) - sys.stderr.write(msg) - print(e) - sys.stderr.write(str(e)) - sys.stderr.flush() - self.comm.Abort(1) - - return data_list - - def _gather_data(self, data_to_gather): - """ - Gather all the variable data into the MPI rank 0 to perform a serial write. - - Returns - ------- - data_to_gather: dict - Variables to gather. - """ - - data_list = deepcopy(data_to_gather) - for var_name in data_list.keys(): - if self.info and self.master: - print("Gathering {0}".format(var_name)) - if data_list[var_name]["data"] is None: - data_list[var_name]["data"] = None - elif isinstance(data_list[var_name]["data"], int) and data_list[var_name]["data"] == 0: - data_list[var_name]["data"] = 0 - else: - shp_len = len(data_list[var_name]["data"].shape) - # Collect local array sizes using the gather communication pattern - rank_shapes = array(self.comm.gather(data_list[var_name]["data"].shape, root=0)) - sendbuf = data_list[var_name]["data"].flatten() - sendcounts = array(self.comm.gather(len(sendbuf), root=0)) - if self.master: - recvbuf = empty(sum(sendcounts), dtype=type(sendbuf.max())) - else: - recvbuf = None - self.comm.Gatherv(sendbuf=sendbuf, recvbuf=(recvbuf, sendcounts), root=0) - if self.master: - recvbuf = split(recvbuf, cumsum(sendcounts)) - # TODO ask - # I don"t understand why it is giving one more split - if len(recvbuf) > len(sendcounts): - recvbuf = recvbuf[:-1] - for i, shape in enumerate(rank_shapes): - recvbuf[i] = recvbuf[i].reshape(shape) - add_dimension = False # to Add a dimension - if self.parallel_method == "Y": - if shp_len == 2: - # if is a 2D concatenate over first axis - axis = 0 - elif shp_len == 3: - # if is a 3D concatenate over second axis - axis = 1 - else: - # if is a 4D concatenate over third axis - axis = 2 - elif self.parallel_method == "X": - if shp_len == 2: - # if is a 2D concatenate over second axis - axis = 1 - elif shp_len == 3: - # if is a 3D concatenate over third axis - axis = 2 - else: - # if is a 4D concatenate over forth axis - axis = 3 - elif self.parallel_method == "T": - if shp_len == 2: - # if is a 2D add dimension - add_dimension = True - axis = None # Not used - elif shp_len == 3: - # if is a 3D concatenate over first axis - axis = 0 - else: - # if is a 4D concatenate over second axis - axis = 0 - else: - raise NotImplementedError( - "Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( - meth=self.parallel_method, accept=["X", "Y", "T"])) - if add_dimension: - data_list[var_name]["data"] = stack(recvbuf) - else: - data_list[var_name]["data"] = concatenate(recvbuf, axis=axis) - - return data_list - - # ================================================================================================================== - # Extra Methods - # ================================================================================================================== - @staticmethod - def lon_lat_to_cartesian_ecef(lon, lat): - """ - # Convert observational/model geographic longitude/latitude coordinates to cartesian ECEF (Earth Centred, - # Earth Fixed) coordinates, assuming WGS84 datum and ellipsoid, and that all heights = 0. - # ECEF coordinates represent positions (in meters) as X, Y, Z coordinates, approximating the earth surface - # as an ellipsoid of revolution. - # This conversion is for the subsequent calculation of Euclidean distances of the model grid cell centres - # from each observational station. - # Defining the distance between two points on the earth's surface as simply the Euclidean distance - # between the two lat/lon pairs could lead to inaccurate results depending on the distance - # between two points (i.e. 1 deg. of longitude varies with latitude). - - Parameters - ---------- - lon : array - Longitude values. - lat : array - Latitude values. - """ - - lla = Proj(proj="latlong", ellps="WGS84", datum="WGS84") - ecef = Proj(proj="geocent", ellps="WGS84", datum="WGS84") - # x, y, z = pyproj.transform(lla, ecef, lon, lat, zeros(lon.shape), radians=False) - # Deprecated: https://pyproj4.github.io/pyproj/stable/gotchas.html#upgrading-to-pyproj-2-from-pyproj-1 - transformer = Transformer.from_proj(lla, ecef) - x, y, z = transformer.transform(lon, lat, zeros(lon.shape), radians=False) - return column_stack([x, y, z]) - - def add_4d_vertical_info(self, info_to_add): - """ - To add the vertical information from other source. - - Parameters - ---------- - info_to_add : nes.Nes, str - Nes object with the vertical information as variable or str with the path to the NetCDF file that contains - the vertical data. - """ - - return vertical_interpolation.add_4d_vertical_info(self, info_to_add) - - def interpolate_vertical(self, new_levels, new_src_vertical=None, kind="linear", extrapolate=None, info=None, - overwrite=False): - """ - Vertical interpolation function. - - Parameters - ---------- - self : Nes - Source Nes object. - new_levels : List - A List of new vertical levels. - new_src_vertical : nes.Nes, str - Nes object with the vertical information as variable or str with the path to the NetCDF file that contains - the vertical data. - kind : str - Vertical methods type. - extrapolate : bool or tuple or None or number or NaN - If bool: - - If True, both extrapolation options are set to "extrapolate". - - If False, extrapolation options are set to ("bottom", "top"). - If tuple: - - The first element represents the extrapolation option for the lower bound. - - The second element represents the extrapolation option for the upper bound. - - If any element is bool: - - If True, it represents "extrapolate". - - If False: - - If it"s the first element, it represents "bottom". - - If it"s the second element, it represents "top". - - If any element is None, it is replaced with numpy.nan. - - Other numeric values are kept as they are. - - If any element is NaN, it is kept as NaN. - If None: - - Both extrapolation options are set to (NaN, NaN). - If number: - - Both extrapolation options are set to the provided number. - If NaN: - - Both extrapolation options are set to NaN. - info: None, bool - Indicates if you want to print extra information. - overwrite: bool - Indicates if you want to compute the vertical interpolation in the same object or not. - """ - - return vertical_interpolation.interpolate_vertical( - self, new_levels, new_src_vertical=new_src_vertical, kind=kind, extrapolate_options=extrapolate, info=info, - overwrite=overwrite) - - def interpolate_horizontal(self, dst_grid, weight_matrix_path=None, kind="NearestNeighbour", n_neighbours=4, - info=False, to_providentia=False, only_create_wm=False, wm=None, flux=False): - """ - Horizontal methods from the current grid to another one. - - Parameters - ---------- - dst_grid : nes.Nes - Final projection Nes object. - weight_matrix_path : str, None - Path to the weight matrix to read/create. - kind : str - Kind of horizontal methods. choices = ["NearestNeighbour", "Conservative"]. - n_neighbours: int - Used if kind == NearestNeighbour. Number of nearest neighbours to interpolate. Default: 4. - info: bool - Indicates if you want to print extra info during the methods process. - to_providentia : bool - Indicates if we want the interpolated grid in Providentia format. - only_create_wm : bool - Indicates if you want to only create the Weight Matrix. - wm : Nes - Weight matrix Nes File. - flux : bool - Indicates if you want to calculate the weight matrix for flux variables. - """ - - return horizontal_interpolation.interpolate_horizontal( - self, dst_grid, weight_matrix_path=weight_matrix_path, kind=kind, n_neighbours=n_neighbours, info=info, - to_providentia=to_providentia, only_create_wm=only_create_wm, wm=wm, flux=flux) - - def spatial_join(self, ext_shp, method=None, var_list=None, info=False, apply_bbox=True): - """ - Compute overlay intersection of two GeoPandasDataFrames. - - Parameters - ---------- - ext_shp : GeoPandasDataFrame or str - File or path from where the data will be obtained on the intersection. - method : str - Overlay method. Accepted values: ["nearest", "intersection", "centroid"]. - var_list : List or None - Variables that will be included in the resulting shapefile. - info : bool - Indicates if you want to print the process info. - apply_bbox : bool - Indicates if you want to reduce the shapefile to a bbox. - """ - - return spatial_join(self, ext_shp=ext_shp, method=method, var_list=var_list, info=info, - apply_bbox=apply_bbox) - - def calculate_grid_area(self, overwrite=True): - """ - Get coordinate bounds and call function to calculate the area of each cell of a grid. - - Parameters - ---------- - self : nes.Nes - Source projection Nes Object. - overwrite : bool - Indicates if we want to overwrite the grid area. - """ - - if ("cell_area" not in self.cell_measures.keys()) or overwrite: - grid_area = cell_measures.calculate_grid_area(self) - grid_area = grid_area.reshape([self.lat["data"].shape[0], self.lon["data"].shape[-1]]) - self.cell_measures["cell_area"] = {"data": grid_area} - else: - grid_area = self.cell_measures["cell_area"]["data"] - - return grid_area - - @staticmethod - def calculate_geometry_area(geometry_list, earth_radius_minor_axis=6356752.3142, - earth_radius_major_axis=6378137.0): - """ - Get coordinate bounds and call function to calculate the area of each cell of a set of geometries. - - Parameters - ---------- - geometry_list : List - A List with polygon geometries. - earth_radius_minor_axis : float - Radius of the minor axis of the Earth. - earth_radius_major_axis : float - Radius of the major axis of the Earth. - """ - - return cell_measures.calculate_geometry_area(geometry_list, earth_radius_minor_axis=earth_radius_minor_axis, - earth_radius_major_axis=earth_radius_major_axis) - - @staticmethod - def get_earth_radius(ellps): - """ - Get minor and major axis of Earth. - - Parameters - ---------- - ellps : str - Spatial reference system. - """ - - # WGS84 with radius defined in Cartopy source code - earth_radius_dict = {"WGS84": [6356752.3142, 6378137.0]} - - return earth_radius_dict[ellps] - - def create_providentia_exp_centre_coordinates(self): - """ - Calculate centre latitudes and longitudes from original coordinates and store as 2D arrays. - - Returns - ---------- - model_centre_lat : dict - Dictionary with data of centre coordinates for latitude in 2D (latitude, longitude). - model_centre_lon : dict - Dictionary with data of centre coordinates for longitude in 2D (latitude, longitude). - """ - - raise NotImplementedError("create_providentia_exp_centre_coordinates function is not implemented by default") - - # noinspection DuplicatedCode - def create_providentia_exp_grid_edge_coordinates(self): - """ - Calculate grid edge latitudes and longitudes and get model grid outline. - - Returns - ---------- - grid_edge_lat : dict - Dictionary with data of grid edge latitudes. - grid_edge_lon : dict - Dictionary with data of grid edge longitudes. - """ - raise NotImplementedError("create_providentia_exp_grid_edge_coordinates function is not implemented by default") diff --git a/build/lib/nes/nc_projections/latlon_nes.py b/build/lib/nes/nc_projections/latlon_nes.py deleted file mode 100644 index 35d68c8..0000000 --- a/build/lib/nes/nc_projections/latlon_nes.py +++ /dev/null @@ -1,395 +0,0 @@ -#!/usr/bin/env python - -from numpy import float64, linspace, meshgrid, mean, diff, append, flip, repeat, concatenate, vstack -from pyproj import Proj -from .default_nes import Nes - - -class LatLonNes(Nes): - """ - - Attributes - ---------- - _var_dim : tuple - A Tuple with the name of the Y and X dimensions for the variables. - ("lat", "lon") for a regular latitude-longitude projection. - _lat_dim : tuple - A Tuple with the name of the dimensions of the Latitude values. - ("lat", ) for a regular latitude-longitude projection. - _lon_dim : tuple - A Tuple with the name of the dimensions of the Longitude values. - ("lon", ) for a regular latitude-longitude projection. - """ - def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="Y", - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, - balanced=False, times=None, **kwargs): - """ - Initialize the LatLonNes class. - - Parameters - ---------- - comm: MPI.COMM - MPI Communicator. - path: str - Path to the NetCDF to initialize the object. - info: bool - Indicates if you want to get reading/writing info. - dataset: Dataset - NetCDF4-python Dataset to initialize the class. - parallel_method : str - Indicates the parallelization method that you want. Default: "Y". - Accepted values: ["X", "Y", "T"]. - avoid_first_hours : int - Number of hours to remove from first time steps. - avoid_last_hours : int - Number of hours to remove from last time steps. - first_level : int - Index of the first level to use. - last_level : int, None - Index of the last level to use. None if it is the last. - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - balanced : bool - Indicates if you want a balanced parallelization or not. - Balanced dataset cannot be written in chunking mode. - times : list, None - List of times to substitute the current ones while creation. - """ - - super(LatLonNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, - parallel_method=parallel_method, balanced=balanced, - avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=create_nes, - times=times, **kwargs) - - if create_nes: - # Dimensions screening - self.lat = self._get_coordinate_values(self.get_full_latitudes(), "Y") - self.lon = self._get_coordinate_values(self.get_full_longitudes(), "X") - - # Set axis limits for parallel writing - self.write_axis_limits = self._get_write_axis_limits() - - self._var_dim = ("lat", "lon") - self._lat_dim = ("lat",) - self._lon_dim = ("lon",) - - self.free_vars("crs") - - @staticmethod - def new(comm=None, path=None, info=False, dataset=None, parallel_method="Y", - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, - balanced=False, times=None, **kwargs): - """ - Initialize the Nes class. - - Parameters - ---------- - comm: MPI.COMM - MPI Communicator. - path: str - Path to the NetCDF to initialize the object. - info: bool - Indicates if you want to get reading/writing info. - dataset: Dataset - NetCDF4-python Dataset to initialize the class. - parallel_method : str - Indicates the parallelization method that you want. Default: "Y". - Accepted values: ["X", "Y", "T"]. - avoid_first_hours : int - Number of hours to remove from first time steps. - avoid_last_hours : int - Number of hours to remove from last time steps. - first_level : int - Index of the first level to use. - last_level : int, None - Index of the last level to use. None if it is the last. - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - balanced : bool - Indicates if you want a balanced parallelization or not. - Balanced dataset cannot be written in chunking mode. - times : list, None - List of times to substitute the current ones while creation. - """ - - new = LatLonNes(comm=comm, path=path, info=info, dataset=dataset, - parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, - avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, - create_nes=create_nes, balanced=balanced, times=times, **kwargs) - - return new - - @staticmethod - def _get_pyproj_projection(): - """ - Get projection data as in Pyproj library. - - Returns - ---------- - projection : pyproj.Proj - Grid projection. - """ - - projection = Proj(proj="latlong", ellps="WGS84",) - - return projection - - # noinspection DuplicatedCode - def _get_projection_data(self, create_nes, **kwargs): - """ - Retrieves projection data based on grid details. - - Parameters - ---------- - create_nes : bool - Flag indicating whether to create new object (True) or use existing (False). - **kwargs : dict - Additional keyword arguments for specifying projection details. - - Returns - ------- - Dict[str, Any] - A dictionary containing projection data with the following keys: - - "grid_mapping_name" : str - Type of grid mapping (e.g., "latitude_longitude"). - - "semi_major_axis" : float - Semi-major axis of the Earth's ellipsoid. - - "inverse_flattening" : int - Inverse flattening parameter. - - "inc_lat" : float - Increment in latitude. - - "inc_lon" : float - Increment in longitude. - - "lat_orig" : float - Origin latitude of the grid. - - "lon_orig" : float - Origin longitude of the grid. - - "n_lat" : int - Number of grid points along latitude. - - "n_lon" : int - Number of grid points along longitude. - - Notes - ----- - Depending on the `create_nes` flag and input `kwargs`, the method constructs - or retrieves projection data. If `create_nes` is True, the method initializes - projection details based on provided arguments such as increments (`inc_lat`, `inc_lon`), - and if additional keyword arguments (`lat_orig`, `lon_orig`, `n_lat`, `n_lon`) are not provided, - defaults for the global domain are used. If `create_nes` is False, the method checks for - an existing "crs" variable in `self.variables` and retrieves its data, freeing the "crs" variable - afterward to optimize memory usage. - - """ - if create_nes: - projection_data = {"grid_mapping_name": "latitude_longitude", - "semi_major_axis": self.earth_radius[1], - "inverse_flattening": 0, - "inc_lat": kwargs["inc_lat"], - "inc_lon": kwargs["inc_lon"], - } - # Global domain - if len(kwargs) == 2: - projection_data["lat_orig"] = -90 - projection_data["lon_orig"] = -180 - projection_data["n_lat"] = int(180 // float64(projection_data["inc_lat"])) - projection_data["n_lon"] = int(360 // float64(projection_data["inc_lon"])) - # Other domains - else: - projection_data["lat_orig"] = kwargs["lat_orig"] - projection_data["lon_orig"] = kwargs["lon_orig"] - projection_data["n_lat"] = kwargs["n_lat"] - projection_data["n_lon"] = kwargs["n_lon"] - else: - if "crs" in self.variables.keys(): - projection_data = self.variables["crs"] - self.free_vars("crs") - else: - projection_data = {"grid_mapping_name": "latitude_longitude", - "semi_major_axis": self.earth_radius[1], - "inverse_flattening": 0, - } - - if "dtype" in projection_data.keys(): - del projection_data["dtype"] - - if "data" in projection_data.keys(): - del projection_data["data"] - - if "dimensions" in projection_data.keys(): - del projection_data["dimensions"] - - return projection_data - - def _create_dimensions(self, netcdf): - """ - Create "spatial_nv" dimensions and the super dimensions "lev", "time", "time_nv", "lon" and "lat". - - Parameters - ---------- - netcdf : Dataset - NetCDF object. - """ - - super(LatLonNes, self)._create_dimensions(netcdf) - - netcdf.createDimension("lon", len(self.get_full_longitudes()["data"])) - netcdf.createDimension("lat", len(self.get_full_latitudes()["data"])) - - # Create spatial_nv (number of vertices) dimension - if (self.lat_bnds is not None) and (self.lon_bnds is not None): - netcdf.createDimension("spatial_nv", 2) - - return None - - def _create_centre_coordinates(self, **kwargs): - """ - Calculate centre latitudes and longitudes from grid details. - - Returns - ---------- - centre_lat : dict - Dictionary with data of centre latitudes in 1D - centre_lon : dict - Dictionary with data of centre longitudes in 1D - """ - - # Get grid resolution - inc_lat = float64(self.projection_data["inc_lat"]) - inc_lon = float64(self.projection_data["inc_lon"]) - - # Get coordinates origen - lat_orig = float64(self.projection_data["lat_orig"]) - lon_orig = float64(self.projection_data["lon_orig"]) - - # Get number of coordinates - n_lat = int(self.projection_data["n_lat"]) - n_lon = int(self.projection_data["n_lon"]) - - # Calculate centre latitudes - lat_c_orig = lat_orig + (inc_lat / 2) - centre_lat = linspace(lat_c_orig, lat_c_orig + (inc_lat * (n_lat - 1)), n_lat, dtype=float64) - - # Calculate centre longitudes - lon_c_orig = lon_orig + (inc_lon / 2) - centre_lon = linspace(lon_c_orig, lon_c_orig + (inc_lon * (n_lon - 1)), n_lon, dtype=float64) - - return {"data": centre_lat}, {"data": centre_lon} - - def create_providentia_exp_centre_coordinates(self): - """ - Calculate centre latitudes and longitudes from original coordinates and store as 2D arrays. - - Returns - ---------- - model_centre_lat : dict - Dictionary with data of centre coordinates for latitude in 2D (latitude, longitude). - model_centre_lon : dict - Dictionary with data of centre coordinates for longitude in 2D (latitude, longitude). - """ - - model_centre_lon_data, model_centre_lat_data = meshgrid(self.lon["data"], self.lat["data"]) - - # Calculate centre latitudes - model_centre_lat = {"data": model_centre_lat_data} - - # Calculate centre longitudes - model_centre_lon = {"data": model_centre_lon_data} - - return model_centre_lat, model_centre_lon - - # noinspection DuplicatedCode - def create_providentia_exp_grid_edge_coordinates(self): - """ - Calculate grid edge latitudes and longitudes and get model grid outline. - - Returns - ---------- - grid_edge_lat : dict - Dictionary with data of grid edge latitudes. - grid_edge_lon : dict - Dictionary with data of grid edge longitudes. - """ - - # Get grid resolution - inc_lon = abs(mean(diff(self.lon["data"]))) - inc_lat = abs(mean(diff(self.lat["data"]))) - - # Get bounds - lat_bounds = self._create_single_spatial_bounds(self.lat["data"], inc_lat) - lon_bounds = self._create_single_spatial_bounds(self.lon["data"], inc_lon) - - # Get latitudes for grid edge - left_edge_lat = append(lat_bounds.flatten()[::2], lat_bounds.flatten()[-1]) - right_edge_lat = flip(left_edge_lat, 0) - top_edge_lat = repeat(lat_bounds[-1][-1], len(self.lon["data"]) - 1) - bottom_edge_lat = repeat(lat_bounds[0][0], len(self.lon["data"])) - lat_grid_edge = concatenate((left_edge_lat, top_edge_lat, right_edge_lat, bottom_edge_lat)) - - # Get longitudes for grid edge - left_edge_lon = repeat(lon_bounds[0][0], len(self.lat["data"]) + 1) - top_edge_lon = lon_bounds.flatten()[1:-1:2] - right_edge_lon = repeat(lon_bounds[-1][-1], len(self.lat["data"]) + 1) - bottom_edge_lon = flip(lon_bounds.flatten()[:-1:2], 0) - lon_grid_edge = concatenate((left_edge_lon, top_edge_lon, right_edge_lon, bottom_edge_lon)) - - # Create grid outline by stacking the edges in both coordinates - model_grid_outline = vstack((lon_grid_edge, lat_grid_edge)).T - grid_edge_lat = {"data": model_grid_outline[:, 1]} - grid_edge_lon = {"data": model_grid_outline[:, 0]} - - return grid_edge_lat, grid_edge_lon - - @staticmethod - def _set_var_crs(var): - """ - Set the grid_mapping to "crs". - - Parameters - ---------- - var : Variable - netCDF4-python variable object. - """ - - var.grid_mapping = "crs" - var.coordinates = "lat lon" - - return None - - def _create_metadata(self, netcdf): - """ - Create the "crs" variable for the rotated latitude longitude grid_mapping. - - Parameters - ---------- - netcdf : Dataset - netcdf4-python Dataset. - """ - - if self.projection_data is not None: - mapping = netcdf.createVariable("crs", "i") - mapping.grid_mapping_name = self.projection_data["grid_mapping_name"] - mapping.semi_major_axis = self.projection_data["semi_major_axis"] - mapping.inverse_flattening = self.projection_data["inverse_flattening"] - - return None - - def to_grib2(self, path, grib_keys, grib_template_path, lat_flip=False, info=False): - """ - Write output file with grib2 format. - - Parameters - ---------- - lat_flip : bool - Indicates if the latitudes have to be flipped - path : str - Path to the output file. - grib_keys : dict - Dictionary with the grib2 keys. - grib_template_path : str - Path to the grib2 file to use as template. - info : bool - Indicates if you want to print extra information during the process. - """ - - return super(LatLonNes, self).to_grib2(path, grib_keys, grib_template_path, lat_flip=lat_flip, info=info) diff --git a/build/lib/nes/nc_projections/lcc_nes.py b/build/lib/nes/nc_projections/lcc_nes.py deleted file mode 100644 index f9eda6e..0000000 --- a/build/lib/nes/nc_projections/lcc_nes.py +++ /dev/null @@ -1,630 +0,0 @@ -#!/usr/bin/env python - -from numpy import float64, linspace, array, mean, diff, append, flip, repeat, concatenate, vstack -from geopandas import GeoDataFrame -from pandas import Index -from pyproj import Proj -from copy import deepcopy -from typing import Dict, Any -from shapely.geometry import Polygon, Point -from .default_nes import Nes - - -class LCCNes(Nes): - """ - - Attributes - ---------- - _full_y : dict - Y coordinates dictionary with the complete "data" key for all the values and the rest of the attributes. - _full_x : dict - X coordinates dictionary with the complete "data" key for all the values and the rest of the attributes. - y : dict - Y coordinates dictionary with the portion of "data" corresponding to the rank values. - x : dict - X coordinates dictionary with the portion of "data" corresponding to the rank values. - _var_dim : tuple - A Tuple with the name of the Y and X dimensions for the variables. - ("y", "x", ) for an LCC projection. - _lat_dim : tuple - A Tuple with the name of the dimensions of the Latitude values. - ("y", "x", ) for an LCC projection. - _lon_dim : tuple - ATuple with the name of the dimensions of the Longitude values. - ("y", "x") for an LCC projection. - """ - def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="Y", - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, - balanced=False, times=None, **kwargs): - """ - Initialize the LCCNes class - - Parameters - ---------- - comm: MPI.COMM - MPI Communicator. - path: str - Path to the NetCDF to initialize the object. - info: bool - Indicates if you want to get reading/writing info. - dataset: Dataset - NetCDF4-python Dataset to initialize the class. - parallel_method : str - Indicates the parallelization method that you want. Default: "Y". - Accepted values: ["X", "Y", "T"]. - avoid_first_hours : int - Number of hours to remove from first time steps. - avoid_last_hours : int - Number of hours to remove from last time steps. - first_level : int - Index of the first level to use. - last_level : int, None - Index of the last level to use. None if it is the last. - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - balanced : bool - Indicates if you want a balanced parallelization or not. - Balanced dataset cannot be written in chunking mode. - times : list, None - List of times to substitute the current ones while creation. - """ - self._full_y = None - self._full_x = None - - super(LCCNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, - parallel_method=parallel_method, balanced=balanced, - avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=create_nes, - times=times, **kwargs) - - if create_nes: - # Dimensions screening - self.lat = self._get_coordinate_values(self.get_full_latitudes(), "Y") - self.lon = self._get_coordinate_values(self.get_full_longitudes(), "X") - else: - # Complete dimensions - self._full_y = self._get_coordinate_dimension("y") - self._full_x = self._get_coordinate_dimension("x") - - # Dimensions screening - self.y = self._get_coordinate_values(self.get_full_y(), "Y") - self.x = self._get_coordinate_values(self.get_full_x(), "X") - - # Set axis limits for parallel writing - self.write_axis_limits = self._get_write_axis_limits() - - self._var_dim = ("y", "x") - self._lat_dim = ("y", "x") - self._lon_dim = ("y", "x") - - self.free_vars("crs") - - @staticmethod - def new(comm=None, path=None, info=False, dataset=None, parallel_method="Y", - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, - balanced=False, times=None, **kwargs): - """ - Initialize the Nes class. - - Parameters - ---------- - comm: MPI.COMM - MPI Communicator. - path: str - Path to the NetCDF to initialize the object. - info: bool - Indicates if you want to get reading/writing info. - dataset: Dataset - NetCDF4-python Dataset to initialize the class. - parallel_method : str - Indicates the parallelization method that you want. Default: "Y". - Accepted values: ["X", "Y", "T"]. - avoid_first_hours : int - Number of hours to remove from first time steps. - avoid_last_hours : int - Number of hours to remove from last time steps. - first_level : int - Index of the first level to use. - last_level : int, None - Index of the last level to use. None if it is the last. - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - balanced : bool - Indicates if you want a balanced parallelization or not. - Balanced dataset cannot be written in chunking mode. - times : list, None - List of times to substitute the current ones while creation. - """ - - new = LCCNes(comm=comm, path=path, info=info, dataset=dataset, - parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, - avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, - create_nes=create_nes, balanced=balanced, times=times, **kwargs) - - return new - - def get_full_y(self) -> Dict[str, Any]: - """ - Retrieve the complete Y information. - - Returns - ------- - Dict[str, Any] - A dictionary containing the complete latitude data and its attributes. - The dictionary structure is: - { - "data": ndarray, # Array of latitude values. - attr_name: attr_value, # Latitude attributes. - ... - } - """ - data = self.comm.bcast(self._full_y) - - return data - - def get_full_x(self) -> Dict[str, Any]: - """ - Retrieve the complete X information. - - Returns - ------- - Dict[str, Any] - A dictionary containing the complete longitude data and its attributes. - The dictionary structure is: - { - "data": ndarray, # Array of longitude values. - attr_name: attr_value, # Longitude attributes. - ... - } - """ - data = self.comm.bcast(self._full_x) - return data - - def set_full_y(self, data: Dict[str, Any]) -> None: - """ - Set the complete Y information. - - Parameters - ---------- - data : Dict[str, Any] - A dictionary containing the complete latitude data and its attributes. - The dictionary structure is: - { - "data": ndarray, # Array of latitude values. - attr_name: attr_value, # Latitude attributes. - ... - } - """ - if self.master: - self._full_y = data - return None - - def set_full_x(self, data: Dict[str, Any]) -> None: - """ - Set the complete rotated longitude information. - - Parameters - ---------- - data : Dict[str, Any] - A dictionary containing the complete longitude data and its attributes. - The dictionary structure is: - { - "data": ndarray, # Array of longitude values. - attr_name: attr_value, # Longitude attributes. - ... - } - """ - if self.master: - self._full_x = data - return None - - # noinspection DuplicatedCode - def _filter_coordinates_selection(self): - """ - Use the selection limits to filter y, x, time, lev, lat, lon, lon_bnds and lat_bnds. - """ - - idx = self._get_idx_intervals() - - self.y = self._get_coordinate_values(self.get_full_y(), "Y") - self.x = self._get_coordinate_values(self.get_full_x(), "X") - - self.set_full_y({'data': self.y["data"][idx["idx_y_min"]:idx["idx_y_max"]]}) - self.set_full_x({'data': self.x["data"][idx["idx_x_min"]:idx["idx_x_max"]]}) - - super(LCCNes, self)._filter_coordinates_selection() - - return None - - def _get_pyproj_projection(self): - """ - Get projection data as in Pyproj library. - - Returns - ---------- - projection : pyproj.Proj - Grid projection. - """ - - projection = Proj(proj="lcc", - ellps="WGS84", - R=self.earth_radius[0], - lat_1=float64(self.projection_data["standard_parallel"][0]), - lat_2=float64(self.projection_data["standard_parallel"][1]), - lon_0=float64(self.projection_data["longitude_of_central_meridian"]), - lat_0=float64(self.projection_data["latitude_of_projection_origin"]), - to_meter=1, - x_0=0, - y_0=0, - a=self.earth_radius[1], - k_0=1.0, - ) - - return projection - - def _get_projection_data(self, create_nes, **kwargs): - """ - Retrieves projection data based on grid details. - - Parameters - ---------- - create_nes : bool - Flag indicating whether to create new object (True) or use existing (False). - **kwargs : dict - Additional keyword arguments for specifying projection details. """ - if create_nes: - projection_data = {"grid_mapping_name": "lambert_conformal_conic", - "standard_parallel": [kwargs["lat_1"], kwargs["lat_2"]], - "longitude_of_central_meridian": kwargs["lon_0"], - "latitude_of_projection_origin": kwargs["lat_0"], - "x_0": kwargs["x_0"], "y_0": kwargs["y_0"], - "inc_x": kwargs["inc_x"], "inc_y": kwargs["inc_y"], - "nx": kwargs["nx"], "ny": kwargs["ny"], - } - else: - if "Lambert_Conformal" in self.variables.keys(): - projection_data = self.variables["Lambert_Conformal"] - self.free_vars("Lambert_Conformal") - elif "Lambert_conformal" in self.variables.keys(): - projection_data = self.variables["Lambert_conformal"] - self.free_vars("Lambert_conformal") - else: - # We will never have this condition since the LCC grid will never be correctly detected - # since the function __is_lcc in load_nes only detects LCC grids when there is Lambert_conformal - msg = "There is no variable called Lambert_Conformal, projection has not been defined." - raise RuntimeError(msg) - - if "dtype" in projection_data.keys(): - del projection_data["dtype"] - - if "data" in projection_data.keys(): - del projection_data["data"] - - if "dimensions" in projection_data.keys(): - del projection_data["dimensions"] - - if isinstance(projection_data["standard_parallel"], str): - projection_data["standard_parallel"] = [projection_data["standard_parallel"].split(", ")[0], - projection_data["standard_parallel"].split(", ")[1]] - - return projection_data - - # noinspection DuplicatedCode - def _create_dimensions(self, netcdf): - """ - Create "y", "x" and "spatial_nv" dimensions and the super dimensions "lev", "time", "time_nv", "lon" and "lat" - - Parameters - ---------- - netcdf : Dataset - NetCDF object. - """ - - super(LCCNes, self)._create_dimensions(netcdf) - - # Create y and x dimensions - netcdf.createDimension("y", len(self.get_full_y()["data"])) - netcdf.createDimension("x", len(self.get_full_x()["data"])) - - # Create spatial_nv (number of vertices) dimension - if (self.lat_bnds is not None) and (self.lon_bnds is not None): - netcdf.createDimension("spatial_nv", 4) - - return None - - # noinspection DuplicatedCode - def _create_dimension_variables(self, netcdf): - """ - Create the "y" and "x" variables. - - Parameters - ---------- - netcdf : Dataset - NetCDF object. - """ - - super(LCCNes, self)._create_dimension_variables(netcdf) - - # LCC Y COORDINATES - full_y = self.get_full_y() - y = netcdf.createVariable("y", full_y["data"].dtype, ("y",)) - y.long_name = "y coordinate of projection" - if "units" in full_y.keys(): - y.units = full_y["units"] - else: - y.units = "m" - y.standard_name = "projection_y_coordinate" - if self.size > 1: - y.set_collective(True) - y[:] = full_y["data"] - - # LCC X COORDINATES - full_x = self.get_full_x() - x = netcdf.createVariable("x", full_x["data"].dtype, ("x",)) - x.long_name = "x coordinate of projection" - if "units" in full_x.keys(): - x.units = full_x["units"] - else: - x.units = "m" - x.standard_name = "projection_x_coordinate" - if self.size > 1: - x.set_collective(True) - x[:] = full_x["data"] - - return None - - # noinspection DuplicatedCode - def _create_centre_coordinates(self, **kwargs): - """ - Calculate centre latitudes and longitudes from grid details. - - Parameters - ---------- - netcdf : Dataset - NetCDF object. - """ - if self.master: - # Get projection details on x - x_0 = float64(self.projection_data["x_0"]) - inc_x = float64(self.projection_data["inc_x"]) - nx = int(self.projection_data["nx"]) - - # Get projection details on y - y_0 = float64(self.projection_data["y_0"]) - inc_y = float64(self.projection_data["inc_y"]) - ny = int(self.projection_data["ny"]) - - # Create a regular grid in metres (1D) - self._full_x = {"data": linspace(x_0 + (inc_x / 2), x_0 + (inc_x / 2) + (inc_x * (nx - 1)), nx, - dtype=float64)} - self._full_y = {"data": linspace(y_0 + (inc_y / 2), y_0 + (inc_y / 2) + (inc_y * (ny - 1)), ny, - dtype=float64)} - - # Create a regular grid in metres (1D to 2D) - x = array([self._full_x["data"]] * len(self._full_y["data"])) - y = array([self._full_y["data"]] * len(self._full_x["data"])).T - - # Calculate centre latitudes and longitudes (UTM to LCC) - centre_lon, centre_lat = self.projection(x, y, inverse=True) - - return {"data": centre_lat}, {"data": centre_lon} - else: - return None, None - - def create_providentia_exp_centre_coordinates(self): - """ - Calculate centre latitudes and longitudes from original coordinates and store as 2D arrays. - - Returns - ---------- - model_centre_lat : dict - Dictionary with data of centre coordinates for latitude in 2D (latitude, longitude). - model_centre_lon : dict - Dictionary with data of centre coordinates for longitude in 2D (latitude, longitude). - """ - - # Get centre latitudes - model_centre_lat = self.lat - - # Get centre longitudes - model_centre_lon = self.lon - - return model_centre_lat, model_centre_lon - - # noinspection DuplicatedCode - def create_providentia_exp_grid_edge_coordinates(self): - """ - Calculate grid edge latitudes and longitudes and get model grid outline. - - Returns - ---------- - grid_edge_lat : dict - Dictionary with data of grid edge latitudes. - grid_edge_lon : dict - Dictionary with data of grid edge longitudes. - """ - # Get grid resolution - inc_x = abs(mean(diff(self.x["data"]))) - inc_y = abs(mean(diff(self.y["data"]))) - - # Get bounds for rotated coordinates - y_bnds = self._create_single_spatial_bounds(self.y["data"], inc_y) - x_bnds = self._create_single_spatial_bounds(self.x["data"], inc_x) - - # Get rotated latitudes for grid edge - left_edge_y = append(y_bnds.flatten()[::2], y_bnds.flatten()[-1]) - right_edge_y = flip(left_edge_y, 0) - top_edge_y = repeat(y_bnds[-1][-1], len(self.x["data"]) - 1) - bottom_edge_y = repeat(y_bnds[0][0], len(self.x["data"])) - y_grid_edge = concatenate((left_edge_y, top_edge_y, right_edge_y, bottom_edge_y)) - - # Get rotated longitudes for grid edge - left_edge_x = repeat(x_bnds[0][0], len(self.y["data"]) + 1) - top_edge_x = x_bnds.flatten()[1:-1:2] - right_edge_x = repeat(x_bnds[-1][-1], len(self.y["data"]) + 1) - bottom_edge_x = flip(x_bnds.flatten()[:-1:2], 0) - x_grid_edge = concatenate((left_edge_x, top_edge_x, right_edge_x, bottom_edge_x)) - - # Get edges for regular coordinates - grid_edge_lon_data, grid_edge_lat_data = self.projection(x_grid_edge, y_grid_edge, inverse=True) - - # Create grid outline by stacking the edges in both coordinates - model_grid_outline = vstack((grid_edge_lon_data, grid_edge_lat_data)).T - grid_edge_lat = {"data": model_grid_outline[:, 1]} - grid_edge_lon = {"data": model_grid_outline[:, 0]} - - return grid_edge_lat, grid_edge_lon - - # noinspection DuplicatedCode - def create_spatial_bounds(self): - """ - Calculate longitude and latitude bounds and set them. - """ - - # Calculate LCC coordinates bounds - full_x = self.get_full_x() - full_y = self.get_full_y() - inc_x = abs(mean(diff(full_x["data"]))) - x_bnds = self._create_single_spatial_bounds(array([full_x["data"]] * len(full_y["data"])), - inc_x, spatial_nv=4) - - inc_y = abs(mean(diff(full_y["data"]))) - y_bnds = self._create_single_spatial_bounds(array([full_y["data"]] * len(full_x["data"])).T, - inc_y, spatial_nv=4, inverse=True) - - # Transform LCC bounds to regular bounds - lon_bnds, lat_bnds = self.projection(x_bnds, y_bnds, inverse=True) - - # Obtain regular coordinates bounds - self.set_full_latitudes_boundaries({"data": deepcopy(lat_bnds)}) - self.lat_bnds = {"data": lat_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], - :]} - - self.set_full_longitudes_boundaries({"data": deepcopy(lon_bnds)}) - self.lon_bnds = {"data": lon_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], - :]} - - return None - - @staticmethod - def _set_var_crs(var): - """ - Set the grid_mapping to "Lambert_Conformal". - - Parameters - ---------- - var : Variable - netCDF4-python variable object. - """ - - var.grid_mapping = "Lambert_Conformal" - var.coordinates = "lat lon" - - return None - - def _create_metadata(self, netcdf): - """ - Create the "crs" variable for the lambert conformal grid_mapping. - - Parameters - ---------- - netcdf : Dataset - netcdf4-python Dataset - """ - - if self.projection_data is not None: - mapping = netcdf.createVariable("Lambert_Conformal", "i") - mapping.grid_mapping_name = self.projection_data["grid_mapping_name"] - mapping.standard_parallel = self.projection_data["standard_parallel"] - mapping.longitude_of_central_meridian = self.projection_data["longitude_of_central_meridian"] - mapping.latitude_of_projection_origin = self.projection_data["latitude_of_projection_origin"] - - return None - - def to_grib2(self, path, grib_keys, grib_template_path, lat_flip=False, info=False): - """ - Write output file with grib2 format. - - Parameters - ---------- - lat_flip : bool - Indicates if the latitudes need to be flipped Up-Down or Down-Up. Default False. - path : str - Path to the output file. - grib_keys : dict - Dictionary with the grib2 keys. - grib_template_path : str - Path to the grib2 file to use as template. - info : bool - Indicates if you want to print extra information during the process. - """ - - raise NotImplementedError("Grib2 format cannot be written in a Lambert Conformal Conic projection.") - - # noinspection DuplicatedCode - def create_shapefile(self): - """ - Create spatial GeoDataFrame (shapefile). - - Returns - ------- - shapefile : GeoPandasDataFrame - Shapefile dataframe. - """ - - if self.shapefile is None: - - # Get latitude and longitude cell boundaries - if self.lat_bnds is None or self.lon_bnds is None: - self.create_spatial_bounds() - - # Reshape arrays to create geometry - aux_b_lat = self.lat_bnds["data"].reshape((self.lat_bnds["data"].shape[0] * self.lat_bnds["data"].shape[1], - self.lat_bnds["data"].shape[2])) - aux_b_lon = self.lon_bnds["data"].reshape((self.lon_bnds["data"].shape[0] * self.lon_bnds["data"].shape[1], - self.lon_bnds["data"].shape[2])) - - # Get polygons from bounds - geometry = [] - for i in range(aux_b_lon.shape[0]): - geometry.append(Polygon([(aux_b_lon[i, 0], aux_b_lat[i, 0]), - (aux_b_lon[i, 1], aux_b_lat[i, 1]), - (aux_b_lon[i, 2], aux_b_lat[i, 2]), - (aux_b_lon[i, 3], aux_b_lat[i, 3]), - (aux_b_lon[i, 0], aux_b_lat[i, 0])])) - - # Create dataframe containing all polygons - fids = self.get_fids() - gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=geometry, crs="EPSG:4326") - self.shapefile = gdf - - else: - gdf = self.shapefile - - return gdf - - # noinspection DuplicatedCode - def get_centroids_from_coordinates(self): - """ - Get centroids from geographical coordinates. - - Returns - ------- - centroids_gdf: GeoPandasDataFrame - Centroids dataframe. - """ - - # Get centroids from coordinates - centroids = [] - for lat_ind in range(0, self.lon["data"].shape[0]): - for lon_ind in range(0, self.lon["data"].shape[1]): - centroids.append(Point(self.lon["data"][lat_ind, lon_ind], - self.lat["data"][lat_ind, lon_ind])) - - # Create dataframe containing all points - fids = self.get_fids() - centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=centroids, crs="EPSG:4326") - - return centroids_gdf diff --git a/build/lib/nes/nc_projections/mercator_nes.py b/build/lib/nes/nc_projections/mercator_nes.py deleted file mode 100644 index 520f9bb..0000000 --- a/build/lib/nes/nc_projections/mercator_nes.py +++ /dev/null @@ -1,610 +0,0 @@ -#!/usr/bin/env python - -from numpy import float64, linspace, array, mean, diff, append, flip, repeat, concatenate, vstack -from geopandas import GeoDataFrame -from pandas import Index -from pyproj import Proj -from copy import deepcopy -from typing import Dict, Any -from shapely.geometry import Polygon, Point -from nes.nc_projections.default_nes import Nes - - -class MercatorNes(Nes): - """ - - Attributes - ---------- - _full_y : dict - Y coordinates dictionary with the complete "data" key for all the values and the rest of the attributes. - _full_x : dict - X coordinates dictionary with the complete "data" key for all the values and the rest of the attributes. - y : dict - Y coordinates dictionary with the portion of "data" corresponding to the rank values. - x : dict - X coordinates dictionary with the portion of "data" corresponding to the rank values. - _var_dim : tuple - A Tuple with the name of the Y and X dimensions for the variables. - ("y", "x") for a Mercator projection. - _lat_dim : tuple - A Tuple with the name of the dimensions of the Latitude values. - ("y", "x") for a Mercator projection. - _lon_dim : tuple - A Tuple with the name of the dimensions of the Longitude values. - ("y", "x") for a Mercator projection. - """ - def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="Y", - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, - balanced=False, times=None, **kwargs): - """ - Initialize the MercatorNes class - - Parameters - ---------- - comm: MPI.COMM - MPI Communicator. - path: str - Path to the NetCDF to initialize the object. - info: bool - Indicates if you want to get reading/writing info. - dataset: Dataset - NetCDF4-python Dataset to initialize the class. - parallel_method : str - Indicates the parallelization method that you want. Default: "Y". - Accepted values: ["X", "Y", "T"]. - avoid_first_hours : int - Number of hours to remove from first time steps. - avoid_last_hours : int - Number of hours to remove from last time steps. - balanced : bool - Indicates if you want a balanced parallelization or not. - Balanced dataset cannot be written in chunking mode. - first_level : int - Index of the first level to use. - last_level : int, None - Index of the last level to use. None if it is the last. - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - times : list, None - List of times to substitute the current ones while creation. - - """ - self._full_y = None - self._full_x = None - - super(MercatorNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, - parallel_method=parallel_method, balanced=balanced, - avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=create_nes, - times=times, **kwargs) - - if create_nes: - # Dimensions screening - self.lat = self._get_coordinate_values(self.get_full_latitudes(), "Y") - self.lon = self._get_coordinate_values(self.get_full_longitudes(), "X") - else: - # Complete dimensions - self._full_y = self._get_coordinate_dimension("y") - self._full_x = self._get_coordinate_dimension("x") - - # Dimensions screening - self.y = self._get_coordinate_values(self.get_full_y(), "Y") - self.x = self._get_coordinate_values(self.get_full_x(), "X") - - # Set axis limits for parallel writing - self.write_axis_limits = self._get_write_axis_limits() - - self._var_dim = ("y", "x") - self._lat_dim = ("y", "x") - self._lon_dim = ("y", "x") - - self.free_vars("crs") - - @staticmethod - def new(comm=None, path=None, info=False, dataset=None, parallel_method="Y", - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, - balanced=False, times=None, **kwargs): - """ - Initialize the Nes class. - - Parameters - ---------- - comm: MPI.COMM - MPI Communicator. - path: str - Path to the NetCDF to initialize the object. - info: bool - Indicates if you want to get reading/writing info. - dataset: Dataset - NetCDF4-python Dataset to initialize the class. - parallel_method : str - Indicates the parallelization method that you want. Default: "Y". - Accepted values: ["X", "Y", "T"]. - avoid_first_hours : int - Number of hours to remove from first time steps. - avoid_last_hours : int - Number of hours to remove from last time steps. - first_level : int - Index of the first level to use. - last_level : int, None - Index of the last level to use. None if it is the last. - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - balanced : bool - Indicates if you want a balanced parallelization or not. - Balanced dataset cannot be written in chunking mode. - times : list, None - List of times to substitute the current ones while creation. - """ - - new = MercatorNes(comm=comm, path=path, info=info, dataset=dataset, - parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, - avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, - create_nes=create_nes, balanced=balanced, times=times, **kwargs) - - return new - - def get_full_y(self) -> Dict[str, Any]: - """ - Retrieve the complete Y information. - - Returns - ------- - Dict[str, Any] - A dictionary containing the complete latitude data and its attributes. - The dictionary structure is: - { - "data": ndarray, # Array of latitude values. - attr_name: attr_value, # Latitude attributes. - ... - } - """ - data = self.comm.bcast(self._full_y) - - return data - - def get_full_x(self) -> Dict[str, Any]: - """ - Retrieve the complete X information. - - Returns - ------- - Dict[str, Any] - A dictionary containing the complete longitude data and its attributes. - The dictionary structure is: - { - "data": ndarray, # Array of longitude values. - attr_name: attr_value, # Longitude attributes. - ... - } - """ - data = self.comm.bcast(self._full_x) - return data - - def set_full_y(self, data: Dict[str, Any]) -> None: - """ - Set the complete Y information. - - Parameters - ---------- - data : Dict[str, Any] - A dictionary containing the complete latitude data and its attributes. - The dictionary structure is: - { - "data": ndarray, # Array of latitude values. - attr_name: attr_value, # Latitude attributes. - ... - } - """ - if self.master: - self._full_y = data - return None - - def set_full_x(self, data: Dict[str, Any]) -> None: - """ - Set the complete rotated longitude information. - - Parameters - ---------- - data : Dict[str, Any] - A dictionary containing the complete longitude data and its attributes. - The dictionary structure is: - { - "data": ndarray, # Array of longitude values. - attr_name: attr_value, # Longitude attributes. - ... - } - """ - if self.master: - self._full_x = data - return None - - # noinspection DuplicatedCode - def _filter_coordinates_selection(self): - """ - Use the selection limits to filter y, x, time, lev, lat, lon, lon_bnds and lat_bnds. - """ - - idx = self._get_idx_intervals() - - self.y = self._get_coordinate_values(self.get_full_y(), "Y") - self.x = self._get_coordinate_values(self.get_full_x(), "X") - - self.set_full_y({'data': self.y["data"][idx["idx_y_min"]:idx["idx_y_max"]]}) - self.set_full_x({'data': self.x["data"][idx["idx_x_min"]:idx["idx_x_max"]]}) - - super(MercatorNes, self)._filter_coordinates_selection() - - return None - - def _get_pyproj_projection(self): - """ - Get projection data as in Pyproj library. - - Returns - ---------- - projection : pyproj.Proj - Grid projection. - """ - - projection = Proj(proj="merc", - a=self.earth_radius[1], - b=self.earth_radius[0], - lat_ts=float64(self.projection_data["standard_parallel"]), - lon_0=float64(self.projection_data["longitude_of_projection_origin"]),) - - return projection - - # noinspection DuplicatedCode - def _get_projection_data(self, create_nes, **kwargs): - """ - Retrieves projection data based on grid details. - - Parameters - ---------- - create_nes : bool - Flag indicating whether to create new object (True) or use existing (False). - **kwargs : dict - Additional keyword arguments for specifying projection details. - """ - if create_nes: - projection_data = {"grid_mapping_name": "mercator", - "standard_parallel": kwargs["lat_ts"], - "longitude_of_projection_origin": kwargs["lon_0"], - "x_0": kwargs["x_0"], "y_0": kwargs["y_0"], - "inc_x": kwargs["inc_x"], "inc_y": kwargs["inc_y"], - "nx": kwargs["nx"], "ny": kwargs["ny"], - } - else: - if "mercator" in self.variables.keys(): - projection_data = self.variables["mercator"] - self.free_vars("mercator") - - else: - msg = "There is no variable called mercator, projection has not been defined." - raise RuntimeError(msg) - - if "dtype" in projection_data.keys(): - del projection_data["dtype"] - - if "data" in projection_data.keys(): - del projection_data["data"] - - if "dimensions" in projection_data.keys(): - del projection_data["dimensions"] - - return projection_data - - # noinspection DuplicatedCode - def _create_dimensions(self, netcdf): - """ - Create "y", "x" and "spatial_nv" dimensions and the super dimensions "lev", "time", "time_nv", "lon" and "lat" - - Parameters - ---------- - netcdf : Dataset - NetCDF object. - """ - - super(MercatorNes, self)._create_dimensions(netcdf) - - # Create y and x dimensions - netcdf.createDimension("y", len(self.get_full_y()["data"])) - netcdf.createDimension("x", len(self.get_full_x()["data"])) - - # Create spatial_nv (number of vertices) dimension - if (self.lat_bnds is not None) and (self.lon_bnds is not None): - netcdf.createDimension("spatial_nv", 4) - - return None - - # noinspection DuplicatedCode - def _create_dimension_variables(self, netcdf): - """ - Create the "y" and "x" variables. - - Parameters - ---------- - netcdf : Dataset - NetCDF object. - """ - - super(MercatorNes, self)._create_dimension_variables(netcdf) - - # MERCATOR Y COORDINATES - full_y = self.get_full_y() - y = netcdf.createVariable("y", full_y["data"].dtype, ("y",)) - y.long_name = "y coordinate of projection" - if "units" in full_y.keys(): - y.units = full_y["units"] - else: - y.units = "m" - y.standard_name = "projection_y_coordinate" - if self.size > 1: - y.set_collective(True) - y[:] = full_y["data"] - - # MERCATOR X COORDINATES - full_x = self.get_full_x() - x = netcdf.createVariable("x", full_x["data"].dtype, ("x",)) - x.long_name = "x coordinate of projection" - if "units" in full_x.keys(): - x.units = full_x["units"] - else: - x.units = "m" - x.standard_name = "projection_x_coordinate" - if self.size > 1: - x.set_collective(True) - x[:] = full_x["data"] - - return None - - # noinspection DuplicatedCode - def _create_centre_coordinates(self, **kwargs): - """ - Calculate centre latitudes and longitudes from grid details. - """ - if self.master: - # Get projection details on x - x_0 = float64(self.projection_data["x_0"]) - inc_x = float64(self.projection_data["inc_x"]) - nx = int(self.projection_data["nx"]) - - # Get projection details on y - y_0 = float64(self.projection_data["y_0"]) - inc_y = float64(self.projection_data["inc_y"]) - ny = int(self.projection_data["ny"]) - - # Create a regular grid in metres (1D) - self._full_x = {"data": linspace(x_0 + (inc_x / 2), x_0 + (inc_x / 2) + (inc_x * (nx - 1)), nx, - dtype=float64)} - self._full_y = {"data": linspace(y_0 + (inc_y / 2), y_0 + (inc_y / 2) + (inc_y * (ny - 1)), ny, - dtype=float64)} - - # Create a regular grid in metres (1D to 2D) - x = array([self._full_x["data"]] * len(self._full_y["data"])) - y = array([self._full_y["data"]] * len(self._full_x["data"])).T - - # Calculate centre latitudes and longitudes (UTM to Mercator) - centre_lon, centre_lat = self.projection(x, y, inverse=True) - - return {"data": centre_lat}, {"data": centre_lon} - else: - return None, None - - def create_providentia_exp_centre_coordinates(self): - """ - Calculate centre latitudes and longitudes from original coordinates and store as 2D arrays. - - Returns - ---------- - model_centre_lat : dict - Dictionary with data of centre coordinates for latitude in 2D (latitude, longitude). - model_centre_lon : dict - Dictionary with data of centre coordinates for longitude in 2D (latitude, longitude). - """ - - # Get centre latitudes - model_centre_lat = self.lat - - # Get centre longitudes - model_centre_lon = self.lon - - return model_centre_lat, model_centre_lon - - # noinspection DuplicatedCode - def create_providentia_exp_grid_edge_coordinates(self): - """ - Calculate grid edge latitudes and longitudes and get model grid outline. - - Returns - ---------- - grid_edge_lat : dict - Dictionary with data of grid edge latitudes. - grid_edge_lon : dict - Dictionary with data of grid edge longitudes. - """ - - # Get grid resolution - inc_x = abs(mean(diff(self.x["data"]))) - inc_y = abs(mean(diff(self.y["data"]))) - - # Get bounds for rotated coordinates - y_bounds = self._create_single_spatial_bounds(self.y["data"], inc_y) - x_bounds = self._create_single_spatial_bounds(self.x["data"], inc_x) - - # Get rotated latitudes for grid edge - left_edge_y = append(y_bounds.flatten()[::2], y_bounds.flatten()[-1]) - right_edge_y = flip(left_edge_y, 0) - top_edge_y = repeat(y_bounds[-1][-1], len(self.x["data"]) - 1) - bottom_edge_y = repeat(y_bounds[0][0], len(self.x["data"])) - y_grid_edge = concatenate((left_edge_y, top_edge_y, right_edge_y, bottom_edge_y)) - - # Get rotated longitudes for grid edge - left_edge_x = repeat(x_bounds[0][0], len(self.y["data"]) + 1) - top_edge_x = x_bounds.flatten()[1:-1:2] - right_edge_x = repeat(x_bounds[-1][-1], len(self.y["data"]) + 1) - bottom_edge_x = flip(x_bounds.flatten()[:-1:2], 0) - x_grid_edge = concatenate((left_edge_x, top_edge_x, right_edge_x, bottom_edge_x)) - - # Get edges for regular coordinates - grid_edge_lon_data, grid_edge_lat_data = self.projection(x_grid_edge, y_grid_edge, inverse=True) - - # Create grid outline by stacking the edges in both coordinates - model_grid_outline = vstack((grid_edge_lon_data, grid_edge_lat_data)).T - grid_edge_lat = {"data": model_grid_outline[:, 1]} - grid_edge_lon = {"data": model_grid_outline[:, 0]} - - return grid_edge_lat, grid_edge_lon - - # noinspection DuplicatedCode - def create_spatial_bounds(self): - """ - Calculate longitude and latitude bounds and set them. - """ - - # Calculate Mercator coordinates bounds - full_x = self.get_full_x() - full_y = self.get_full_y() - inc_x = abs(mean(diff(full_x["data"]))) - x_bnds = self._create_single_spatial_bounds(array([full_x["data"]] * len(full_y["data"])), - inc_x, spatial_nv=4) - - inc_y = abs(mean(diff(full_y["data"]))) - y_bnds = self._create_single_spatial_bounds(array([full_y["data"]] * len(full_x["data"])).T, - inc_y, spatial_nv=4, inverse=True) - - # Transform Mercator bounds to regular bounds - lon_bnds, lat_bnds = self.projection(x_bnds, y_bnds, inverse=True) - - # Obtain regular coordinates bounds - self.set_full_latitudes_boundaries({"data": deepcopy(lat_bnds)}) - self.lat_bnds = {"data": lat_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], - :]} - self.set_full_longitudes_boundaries({"data": deepcopy(lon_bnds)}) - self.lon_bnds = {"data": lon_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], - :]} - - return None - - @staticmethod - def _set_var_crs(var): - """ - Set the grid_mapping to "mercator". - - Parameters - ---------- - var : Variable - netCDF4-python variable object. - """ - - var.grid_mapping = "mercator" - var.coordinates = "lat lon" - - return None - - def _create_metadata(self, netcdf): - """ - Create the "crs" variable for the Mercator grid_mapping. - - Parameters - ---------- - netcdf : Dataset - netcdf4-python Dataset. - """ - - if self.projection_data is not None: - mapping = netcdf.createVariable("mercator", "i") - mapping.grid_mapping_name = self.projection_data["grid_mapping_name"] - mapping.standard_parallel = self.projection_data["standard_parallel"] - mapping.longitude_of_projection_origin = self.projection_data["longitude_of_projection_origin"] - - return None - - def to_grib2(self, path, grib_keys, grib_template_path, lat_flip=False, info=False): - """ - Write output file with grib2 format. - - Parameters - ---------- - lat_flip : bool - Indicates if you want to flip latitudes Up-Down - path : str - Path to the output file. - grib_keys : dict - Dictionary with the grib2 keys. - grib_template_path : str - Path to the grib2 file to use as template. - info : bool - Indicates if you want to print extra information during the process. - """ - - raise NotImplementedError("Grib2 format cannot be written in a Mercator projection.") - - # noinspection DuplicatedCode - def create_shapefile(self): - """ - Create spatial GeoDataFrame (shapefile). - - Returns - ------- - shapefile : GeoPandasDataFrame - Shapefile dataframe. - """ - - if self.shapefile is None: - - # Get latitude and longitude cell boundaries - if self.lat_bnds is None or self.lon_bnds is None: - self.create_spatial_bounds() - - # Reshape arrays to create geometry - aux_b_lat = self.lat_bnds["data"].reshape((self.lat_bnds["data"].shape[0] * self.lat_bnds["data"].shape[1], - self.lat_bnds["data"].shape[2])) - aux_b_lon = self.lon_bnds["data"].reshape((self.lon_bnds["data"].shape[0] * self.lon_bnds["data"].shape[1], - self.lon_bnds["data"].shape[2])) - - # Get polygons from bounds - geometry = [] - for i in range(aux_b_lon.shape[0]): - geometry.append(Polygon([(aux_b_lon[i, 0], aux_b_lat[i, 0]), - (aux_b_lon[i, 1], aux_b_lat[i, 1]), - (aux_b_lon[i, 2], aux_b_lat[i, 2]), - (aux_b_lon[i, 3], aux_b_lat[i, 3]), - (aux_b_lon[i, 0], aux_b_lat[i, 0])])) - - # Create dataframe containing all polygons - fids = self.get_fids() - gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=geometry, crs="EPSG:4326") - self.shapefile = gdf - - else: - gdf = self.shapefile - - return gdf - - # noinspection DuplicatedCode - def get_centroids_from_coordinates(self): - """ - Get centroids from geographical coordinates. - - Returns - ------- - centroids_gdf: GeoPandasDataFrame - Centroids dataframe. - """ - - # Get centroids from coordinates - centroids = [] - for lat_ind in range(0, self.lon["data"].shape[0]): - for lon_ind in range(0, self.lon["data"].shape[1]): - centroids.append(Point(self.lon["data"][lat_ind, lon_ind], - self.lat["data"][lat_ind, lon_ind])) - - # Create dataframe containing all points - fids = self.get_fids() - centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=centroids, crs="EPSG:4326") - - return centroids_gdf diff --git a/build/lib/nes/nc_projections/points_nes.py b/build/lib/nes/nc_projections/points_nes.py deleted file mode 100644 index 29022b5..0000000 --- a/build/lib/nes/nc_projections/points_nes.py +++ /dev/null @@ -1,755 +0,0 @@ -#!/usr/bin/env python - -import sys -from warnings import warn -from numpy import float64, arange, array, ndarray, generic, issubdtype, character, concatenate -from pandas import Index -from geopandas import GeoDataFrame, points_from_xy -from pyproj import Proj -from copy import deepcopy -from netCDF4 import date2num -from .default_nes import Nes - - -class PointsNes(Nes): - """ - - Attributes - ---------- - _var_dim : tuple - A Tuple with the name of the Y and X dimensions for the variables. - ("lat", "lon", ) for a points grid. - _lat_dim : tuple - A Tuple with the name of the dimensions of the Latitude values. - ("lat", ) for a points grid. - _lon_dim : tuple - A Tuple with the name of the dimensions of the Longitude values. - ("lon", ) for a points grid. - _station : tuple - A Tuple with the name of the dimensions of the station values. - ("station", ) for a points grid. - """ - - def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="X", - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, - balanced=False, times=None, **kwargs): - """ - Initialize the PointsNes class. - - Parameters - ---------- - comm: MPI.Comm - MPI Communicator. - path: str - Path to the NetCDF to initialize the object. - info: bool - Indicates if you want to get reading/writing info. - dataset: Dataset or None - NetCDF4-python Dataset to initialize the class. - parallel_method : str - Indicates the parallelization method that you want. Default: "X". - accepted values: ["X", "T"]. - strlen: int - Maximum length of strings in NetCDF. Default: 75. - avoid_first_hours : int - Number of hours to remove from first time steps. - avoid_last_hours : int - Number of hours to remove from last time steps. - first_level : int - Index of the first level to use. - last_level : int, None - Index of the last level to use. None if it is the last. - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - balanced : bool - Indicates if you want a balanced parallelization or not. - Balanced dataset cannot be written in chunking mode. - times : list, None - List of times to substitute the current ones while creation. - """ - - super(PointsNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, - parallel_method=parallel_method, - avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=create_nes, - times=times, balanced=balanced, **kwargs) - - if create_nes: - # Dimensions screening - self.lat = self._get_coordinate_values(self.get_full_latitudes(), "X") - self.lon = self._get_coordinate_values(self.get_full_longitudes(), "X") - - # Complete dimensions - self._station = {"data": arange(len(self.get_full_longitudes()["data"]))} - - # Dimensions screening - self.station = self._get_coordinate_values(self._station, "X") - - # Set axis limits for parallel writing - self.write_axis_limits = self._get_write_axis_limits() - - self._var_dim = ("station",) - self._lat_dim = ("station",) - self._lon_dim = ("station",) - - @staticmethod - def new(comm=None, path=None, info=False, dataset=None, parallel_method="X", - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, - create_nes=False, balanced=False, times=None, **kwargs): - """ - Initialize the Nes class. - - Parameters - ---------- - comm: MPI.COMM - MPI Communicator. - path: str - Path to the NetCDF to initialize the object. - info: bool - Indicates if you want to get reading/writing info. - dataset: Dataset - NetCDF4-python Dataset to initialize the class. - parallel_method : str - Indicates the parallelization method that you want. Default: "X". - accepted values: ["X", "T"]. - avoid_first_hours : int - Number of hours to remove from first time steps. - avoid_last_hours : int - Number of hours to remove from last time steps. - first_level : int - Index of the first level to use. - last_level : int, None - Index of the last level to use. None if it is the last. - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - balanced : bool - Indicates if you want a balanced parallelization or not. - Balanced dataset cannot be written in chunking mode. - times : list, None - List of times to substitute the current ones while creation. - """ - - new = PointsNes(comm=comm, path=path, info=info, dataset=dataset, - parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, - avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, - create_nes=create_nes, balanced=balanced, times=times, **kwargs) - - return new - - @staticmethod - def _get_pyproj_projection(): - """ - Get projection data as in Pyproj library. - - Returns - ---------- - projection : pyproj.Proj - Grid projection. - """ - - projection = Proj(proj="latlong", ellps="WGS84",) - - return projection - - def _get_projection_data(self, create_nes, **kwargs): - """ - Retrieves projection data based on grid details. - - Parameters - ---------- - create_nes : bool - Flag indicating whether to create new object (True) or use existing (False). - **kwargs : dict - Additional keyword arguments for specifying projection details. - """ - - return None - - def _create_dimensions(self, netcdf): - """ - Create "time", "time_nv", "station" and "strlen" dimensions. - - Parameters - ---------- - netcdf : Dataset - NetCDF object. - """ - - # Create time dimension - netcdf.createDimension("time", None) - - # Create time_nv (number of vertices) dimension - if self.time_bnds is not None: - netcdf.createDimension("time_nv", 2) - - # Create station dimension - # The number of longitudes is equal to the number of stations - netcdf.createDimension("station", len(self.get_full_longitudes()["data"])) - - # Create string length dimension - if self.strlen is not None: - netcdf.createDimension("strlen", self.strlen) - - return None - - # noinspection DuplicatedCode - def _create_dimension_variables(self, netcdf): - """ - Create the "time", "time_bnds", "station", "lat", "lat_bnds", "lon" and "lon_bnds" variables. - - Parameters - ---------- - netcdf : Dataset - NetCDF object. - """ - - # TIMES - time_var = netcdf.createVariable("time", float64, ("time",), zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - time_var.units = "hours since {0}".format( - self.get_full_times()[self._get_time_id(self.hours_start, first=True)].strftime("%Y-%m-%d %H:%M:%S")) - time_var.standard_name = "time" - time_var.calendar = "standard" - time_var.long_name = "time" - if self.time_bnds is not None: - time_var.bounds = "time_bnds" - if self.size > 1: - time_var.set_collective(True) - time_var[:] = date2num(self.get_full_times()[self._get_time_id(self.hours_start, first=True): - self._get_time_id(self.hours_end, first=False)], - time_var.units, time_var.calendar) - - # TIME BOUNDS - if self.time_bnds is not None: - time_bnds_var = netcdf.createVariable("time_bnds", float64, ("time", "time_nv",), zlib=self.zip_lvl, - complevel=self.zip_lvl) - if self.size > 1: - time_bnds_var.set_collective(True) - time_bnds_var[:] = date2num(self.get_full_time_bnds(), time_var.units, calendar="standard") - - # STATIONS - stations = netcdf.createVariable("station", float64, ("station",), zlib=self.zip_lvl > 0, - complevel=self.zip_lvl) - stations.units = "" - stations.axis = "X" - stations.long_name = "" - stations.standard_name = "station" - if self.size > 1: - stations.set_collective(True) - stations[:] = self._station["data"] - - # LATITUDES - lat = netcdf.createVariable("lat", float64, self._lat_dim, zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - lat.units = "degrees_north" - lat.axis = "Y" - lat.long_name = "latitude coordinate" - lat.standard_name = "latitude" - if self.lat_bnds is not None: - lat.bounds = "lat_bnds" - if self.size > 1: - lat.set_collective(True) - lat[:] = self.get_full_latitudes()["data"] - - # LONGITUDES - lon = netcdf.createVariable("lon", float64, self._lon_dim, zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - lon.units = "degrees_east" - lon.axis = "X" - lon.long_name = "longitude coordinate" - lon.standard_name = "longitude" - if self.lon_bnds is not None: - lon.bounds = "lon_bnds" - if self.size > 1: - lon.set_collective(True) - lon[:] = self.get_full_longitudes()["data"] - - return None - - # noinspection DuplicatedCode - def _get_coordinate_values(self, coordinate_info, coordinate_axis, bounds=False): - """ - Get the coordinate data of the current portion. - - Parameters - ---------- - coordinate_info : dict, list - Dictionary with the "data" key with the coordinate variable values. and the attributes as other keys. - coordinate_axis : str - Name of the coordinate to extract. Accepted values: ["X"]. - bounds : bool - Boolean variable to know if there are coordinate bounds. - Returns - ------- - values : dict - Dictionary with the portion of data corresponding to the rank. - """ - - if coordinate_info is None: - return None - - if not isinstance(coordinate_info, dict): - values = {"data": deepcopy(coordinate_info)} - else: - values = deepcopy(coordinate_info) - - coordinate_len = len(values["data"].shape) - if bounds: - coordinate_len -= 1 - - if coordinate_axis == "X": - if coordinate_len == 1: - values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] - elif coordinate_len == 2: - values["data"] = values["data"][self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], - self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] - else: - raise NotImplementedError("The coordinate has wrong dimensions: {dim}".format( - dim=values["data"].shape)) - - return values - - def _read_variable(self, var_name): - """ - Read the corresponding variable data according to the current rank. - - Parameters - ---------- - var_name : str - Name of the variable to read. - - Returns - ------- - data: array - Portion of the variable data corresponding to the rank. - """ - - nc_var = self.dataset.variables[var_name] - var_dims = nc_var.dimensions - - # Read data in 1 or 2 dimensions - if len(var_dims) < 2: - data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] - elif len(var_dims) == 2: - if "strlen" in var_dims: - data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], :] - data = array(["".join(i.tobytes().decode("ascii").replace("\x00", "")) for i in data], dtype=object) - else: - data = nc_var[self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], - self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] - else: - raise NotImplementedError("Error with {0}. Only can be read netCDF with 2 dimensions or less".format( - var_name)) - - # Unmask array - data = self._unmask_array(data) - - return data - - # noinspection DuplicatedCode - def _create_variables(self, netcdf, chunking=False): - """ - Create the netCDF file variables. - - Parameters - ---------- - netcdf : Dataset - netcdf4-python open Dataset. - chunking : bool - Indicates if you want to chunk the output netCDF. - """ - - if self.variables is not None: - for i, (var_name, var_dict) in enumerate(self.variables.items()): - # Get data type - if "dtype" in var_dict.keys(): - var_dtype = var_dict["dtype"] - if (var_dict["data"] is not None) and (var_dtype != var_dict["data"].dtype): - msg = "WARNING!!! " - msg += "Different data types for variable {0}. ".format(var_name) - msg += "Input dtype={0}. Data dtype={1}.".format(var_dtype, var_dict["data"].dtype) - warn(msg) - sys.stderr.flush() - try: - var_dict["data"] = var_dict["data"].astype(var_dtype) - except Exception: # TODO: Detect exception - raise TypeError("It was not possible to cast the data to the input dtype.") - else: - var_dtype = var_dict["data"].dtype - if var_dtype is object: - raise TypeError("Data dtype is object. Define dtype explicitly as dictionary key 'dtype'") - - # Get dimensions when reading datasets - if "dimensions" in var_dict.keys(): - var_dims = var_dict["dimensions"] - # Get dimensions when creating new datasets - else: - if len(var_dict["data"].shape) == 1: - # For data that depends only on station (e.g. station_code) - var_dims = self._var_dim - else: - # For data that is dependent on time and station (e.g. PM10) - var_dims = ("time",) + self._var_dim - - if var_dict["data"] is not None: - - # Ensure data is of type numpy array (to create NES) - if not isinstance(var_dict["data"], (ndarray, generic)): - try: - var_dict["data"] = array(var_dict["data"]) - except AttributeError: - raise AttributeError("Data for variable {0} must be a numpy array.".format(var_name)) - - # Convert list of strings to chars for parallelization - if issubdtype(var_dtype, character): - var_dict["data_aux"] = self._str2char(var_dict["data"]) - var_dims += ("strlen",) - var_dtype = "S1" - - if self.info: - print("Rank {0:03d}: Writing {1} var ({2}/{3})".format(self.rank, var_name, i + 1, - len(self.variables))) - if not chunking: - var = netcdf.createVariable(var_name, var_dtype, var_dims, - zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - else: - if self.balanced: - raise NotImplementedError("A balanced data cannot be chunked.") - if self.master: - chunk_size = var_dict["data"].shape - else: - chunk_size = None - chunk_size = self.comm.bcast(chunk_size, root=0) - var = netcdf.createVariable(var_name, var_dtype, var_dims, - zlib=self.zip_lvl > 0, complevel=self.zip_lvl, - chunksizes=chunk_size) - - if self.info: - print("Rank {0:03d}: Var {1} created ({2}/{3})".format( - self.rank, var_name, i + 1, len(self.variables))) - if self.size > 1: - var.set_collective(True) - if self.info: - print("Rank {0:03d}: Var {1} collective ({2}/{3})".format( - self.rank, var_name, i + 1, len(self.variables))) - - for att_name, att_value in var_dict.items(): - if att_name == "data": - if self.info: - print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) - if "data_aux" in var_dict.keys(): - att_value = var_dict["data_aux"] - if len(att_value.shape) == 1: - try: - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value - except IndexError: - raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape, - att_value.shape)) - except ValueError: - raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape, - att_value.shape)) - elif len(att_value.shape) == 2: - if "strlen" in var_dims: - try: - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :] = att_value - except IndexError: - raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :].shape, - att_value.shape)) - except ValueError: - raise ValueError( - "Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( - var[self.write_axis_limits["x_min"]: - self.write_axis_limits["x_max"]].shape, - att_value.shape)) - else: - try: - var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value - except IndexError: - out_shape = var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape - raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( - out_shape, att_value.shape)) - except ValueError: - out_shape = var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape - raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( - out_shape, att_value.shape)) - - if self.info: - print("Rank {0:03d}: Var {1} data ({2}/{3})".format(self.rank, var_name, i + 1, - len(self.variables))) - elif att_name not in ["chunk_size", "var_dims", "dimensions", "dtype", "data_aux"]: - var.setncattr(att_name, att_value) - - if "data_aux" in var_dict.keys(): - del var_dict["data_aux"] - - self._set_var_crs(var) - if self.info: - print("Rank {0:03d}: Var {1} completed ({2}/{3})".format(self.rank, var_name, i + 1, - len(self.variables))) - - return None - - # noinspection DuplicatedCode - def _gather_data(self, data_to_gather): - """ - Gather all the variable data into the MPI rank 0 to perform a serial write. - - Returns - ------- - data_to_gather: dict - Variables to gather. - """ - data_list = deepcopy(data_to_gather) - for var_name, var_info in data_list.items(): - try: - # noinspection PyArgumentList - data_aux = self.comm.gather(data_list[var_name]["data"], root=0) - if self.rank == 0: - shp_len = len(data_list[var_name]["data"].shape) - if self.parallel_method == "X": - # concatenate over station - if shp_len == 1: - # dimensions = (station) - axis = 0 - elif shp_len == 2: - if "strlen" in var_info["dimensions"]: - # dimensions = (station, strlen) - axis = 0 - else: - # dimensions = (time, station) - axis = 1 - else: - msg = "The points NetCDF must have " - msg += "surface values (without levels)." - raise NotImplementedError(msg) - elif self.parallel_method == "T": - # concatenate over time - if shp_len == 1: - # dimensions = (station) - axis = None - elif shp_len == 2: - if "strlen" in var_info["dimensions"]: - # dimensions = (station, strlen) - axis = None - else: - # dimensions = (time, station) - axis = 0 - else: - msg = "The points NetCDF must only have surface values (without levels)." - raise NotImplementedError(msg) - else: - raise NotImplementedError( - "Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( - meth=self.parallel_method, accept=["X", "T"])) - data_list[var_name]["data"] = concatenate(data_aux, axis=axis) - except Exception as e: - msg = f"**ERROR** an error has occurred while gathering the '{var_name}' variable.\n" - print(msg) - sys.stderr.write(msg) - print(e) - sys.stderr.write(str(e)) - sys.stderr.flush() - self.comm.Abort(1) - - return data_list - - def _create_centre_coordinates(self, **kwargs): - """ - Calculate centre latitudes and longitudes from points. - - Parameters - ---------- - netcdf : Dataset - NetCDF object. - """ - - # Calculate centre latitudes - centre_lat = kwargs["lat"] - - # Calculate centre longitudes - centre_lon = kwargs["lon"] - - return {"data": centre_lat}, {"data": centre_lon} - - def _create_metadata(self, netcdf): - """ - Create metadata variables - - Parameters - ---------- - netcdf : Dataset - NetCDF object. - """ - - return None - - def create_spatial_bounds(self): - """ - Calculate longitude and latitude bounds and set them. - """ - - raise NotImplementedError("Spatial bounds cannot be created for points datasets.") - - def to_providentia(self, model_centre_lon, model_centre_lat, grid_edge_lon, grid_edge_lat): - """ - Transform a PointsNes into a PointsNesProvidentia object - - Returns - ---------- - points_nes_providentia : nes.Nes - Points Nes Providentia Object - """ - - from .points_nes_providentia import PointsNesProvidentia - - points_nes_providentia = PointsNesProvidentia(comm=self.comm, - info=self.info, - balanced=self.balanced, - parallel_method=self.parallel_method, - avoid_first_hours=self.hours_start, - avoid_last_hours=self.hours_end, - first_level=self.first_level, - last_level=self.last_level, - create_nes=True, - times=self.time, - model_centre_lon=model_centre_lon, - model_centre_lat=model_centre_lat, - grid_edge_lon=grid_edge_lon, - grid_edge_lat=grid_edge_lat, - lat=self.lat["data"], - lon=self.lon["data"] - ) - - # Convert dimensions (time, lev, lat, lon) to (station, time) for interpolated variables and reshape data - variables = {} - interpolated_variables = deepcopy(self.variables) - for var_name, var_info in interpolated_variables.items(): - variables[var_name] = {} - # ("time", "lev", "lat", "lon") or ("time", "lat", "lon") to ("station", "time") - if len(var_info["dimensions"]) != len(var_info["data"].shape): - variables[var_name]["data"] = var_info["data"].T - variables[var_name]["dimensions"] = ("station", "time") - else: - variables[var_name]["data"] = var_info["data"] - variables[var_name]["dimensions"] = var_info["dimensions"] - - # Set variables - points_nes_providentia.variables = variables - - return points_nes_providentia - - def to_grib2(self, path, grib_keys, grib_template_path, lat_flip=False, info=False): - """ - Write output file with grib2 format. - - Parameters - ---------- - lat_flip : bool - Indicates if you want to flip the latitude direction. - path : str - Path to the output file. - grib_keys : dict - Dictionary with the grib2 keys. - grib_template_path : str - Path to the grib2 file to use as template. - info : bool - Indicates if you want to print extra information during the process. - """ - - raise NotImplementedError("Grib2 format cannot be written with point data.") - - def create_shapefile(self): - """ - Create spatial GeoDataFrame (shapefile). - - Returns - ------- - shapefile : GeoPandasDataFrame - Shapefile dataframe. - """ - - if self.shapefile is None: - - # Create dataframe containing all points - gdf = self.get_centroids_from_coordinates() - self.shapefile = gdf - - else: - gdf = self.shapefile - - return gdf - - def get_centroids_from_coordinates(self): - """ - Get centroids from geographical coordinates. - - Returns - ------- - centroids_gdf: GeoPandasDataFrame - Centroids dataframe. - """ - - # Get centroids from coordinates - centroids = points_from_xy(self.lon["data"], self.lat["data"]) - - # Create dataframe containing all points - fids = arange(len(self.get_full_longitudes()["data"])) - fids = fids[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] - centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids), - geometry=centroids, - crs="EPSG:4326") - - return centroids_gdf - - def add_variables_to_shapefile(self, var_list, idx_lev=0, idx_time=0): - """ - Add variables data to shapefile. - - var_list : list, str - List (or single string) of the variables to be loaded and saved in the shapefile. - idx_lev : int - Index of vertical level for which the data will be saved in the shapefile. - idx_time : int - Index of time for which the data will be saved in the shapefile. - """ - - if idx_lev != 0: - msg = "Error: Points dataset has no level (Level: {0}).".format(idx_lev) - raise ValueError(msg) - - for var_name in var_list: - # station as dimension - if len(self.variables[var_name]["dimensions"]) == 1: - self.shapefile[var_name] = self.variables[var_name]["data"][:].ravel() - # station and time as dimensions - else: - self.shapefile[var_name] = self.variables[var_name]["data"][idx_time, :].ravel() - - return None - - @staticmethod - def _get_axis_index_(axis): - if axis == "T": - value = 0 - elif axis == "X": - value = 1 - else: - raise ValueError("Unknown axis: {0}".format(axis)) - return value - - @staticmethod - def _set_var_crs(var): - """ - Set the grid_mapping - - Parameters - ---------- - var : Variable - netCDF4-python variable object. - """ - var.coordinates = "lat lon" - - return None diff --git a/build/lib/nes/nc_projections/points_nes_ghost.py b/build/lib/nes/nc_projections/points_nes_ghost.py deleted file mode 100644 index 0df1c75..0000000 --- a/build/lib/nes/nc_projections/points_nes_ghost.py +++ /dev/null @@ -1,818 +0,0 @@ -#!/usr/bin/env python - -import sys -from warnings import warn -from numpy import float64, empty, ndarray, generic, array, issubdtype, character, concatenate, int64 -from netCDF4 import date2num -from copy import deepcopy -from .points_nes import PointsNes - - -class PointsNesGHOST(PointsNes): - """ - - Attributes - ---------- - _qa : dict - Quality flags (GHOST checks) dictionary with the complete "data" key for all the values and the rest of the - attributes. - _flag : dict - Data flags (given by data provider) dictionary with the complete "data" key for all the values and the rest of - the attributes. - _qa : dict - Quality flags (GHOST checks) dictionary with the portion of "data" corresponding to the rank values. - _flag : dict - Data flags (given by data provider) dictionary with the portion of "data" corresponding to the rank values. - """ - - def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="X", - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, - balanced=False, times=None, **kwargs): - """ - Initialize the PointsNesGHOST class. - - Parameters - ---------- - comm: MPI.COMM - MPI Communicator. - path: str - Path to the NetCDF to initialize the object. - info: bool - Indicates if you want to get reading/writing info. - dataset: Dataset - NetCDF4-python Dataset to initialize the class. - parallel_method : str - Indicates the parallelization method that you want. Default: "X". - Accepted values: ["X"]. - avoid_first_hours : int - Number of hours to remove from first time steps. - avoid_last_hours : int - Number of hours to remove from last time steps. - first_level : int - Index of the first level to use. - last_level : int, None - Index of the last level to use. None if it is the last. - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - balanced : bool - Indicates if you want a balanced parallelization or not. - Balanced dataset cannot be written in chunking mode. - times : list, None - List of times to substitute the current ones while creation. - """ - - super(PointsNesGHOST, self).__init__(comm=comm, path=path, info=info, dataset=dataset, - parallel_method=parallel_method, - avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=create_nes, - times=times, balanced=balanced, **kwargs) - - # Complete dimensions - self._flag = self._get_coordinate_dimension(["flag"]) - self._qa = self._get_coordinate_dimension(["qa"]) - - # Dimensions screening - self.flag = self._get_coordinate_values(self._flag, "X") - self.qa = self._get_coordinate_values(self._qa, "X") - - @staticmethod - def new(comm=None, path=None, info=False, dataset=None, parallel_method="X", - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, - balanced=False, times=None, **kwargs): - """ - Initialize the PointsNesGHOST class. - - Parameters - ---------- - comm: MPI.COMM - MPI Communicator. - path: str - Path to the NetCDF to initialize the object. - info: bool - Indicates if you want to get reading/writing info. - dataset: Dataset - NetCDF4-python Dataset to initialize the class. - parallel_method : str - Indicates the parallelization method that you want. Default: "X". - Accepted values: ["X"]. - avoid_first_hours : int - Number of hours to remove from first time steps. - avoid_last_hours : int - Number of hours to remove from last time steps. - first_level : int - Index of the first level to use. - last_level : int, None - Index of the last level to use. None if it is the last. - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - balanced : bool - Indicates if you want a balanced parallelization or not. - Balanced dataset cannot be written in chunking mode. - times : list, None - List of times to substitute the current ones while creation. - """ - - new = PointsNesGHOST(comm=comm, path=path, info=info, dataset=dataset, - parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, - avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, - create_nes=create_nes, balanced=balanced, times=times, **kwargs) - - return new - - def _create_dimensions(self, netcdf): - """ - Create "N_flag_codes" and "N_qa_codes" dimensions and the super dimensions - "time", "time_nv", "station", and "strlen". - - Parameters - ---------- - netcdf : Dataset - NetCDF object. - """ - - super(PointsNesGHOST, self)._create_dimensions(netcdf) - - # Create N_flag_codes and N_qa_codes dimensions - netcdf.createDimension("N_flag_codes", self._flag["data"].shape[2]) - netcdf.createDimension("N_qa_codes", self._qa["data"].shape[2]) - - return None - - # noinspection DuplicatedCode - def _create_dimension_variables(self, netcdf): - """ - Create the "time", "time_bnds", "station", "lat", "lat_bnds", "lon" and "lon_bnds" variables. - - Parameters - ---------- - netcdf : Dataset - NetCDF object. - """ - - # TIMES - time_var = netcdf.createVariable("time", float64, ("time",), zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - time_var.units = "hours since {0}".format( - self.get_full_times()[self._get_time_id(self.hours_start, first=True)].strftime("%Y-%m-%d %H:%M:%S")) - time_var.standard_name = "time" - time_var.calendar = "standard" - time_var.long_name = "time" - if self.time_bnds is not None: - time_var.bounds = "time_bnds" - if self.size > 1: - time_var.set_collective(True) - time_var[:] = date2num(self.get_full_times()[self._get_time_id(self.hours_start, first=True): - self._get_time_id(self.hours_end, first=False)], - time_var.units, time_var.calendar) - - # TIME BOUNDS - if self.time_bnds is not None: - time_bnds_var = netcdf.createVariable("time_bnds", float64, ("time", "time_nv",), zlib=self.zip_lvl, - complevel=self.zip_lvl) - if self.size > 1: - time_bnds_var.set_collective(True) - time_bnds_var[:] = date2num(self.get_full_time_bnds(), time_var.units, calendar="standard") - - # STATIONS - stations = netcdf.createVariable("station", float64, ("station",), zlib=self.zip_lvl > 0, - complevel=self.zip_lvl) - stations.units = "" - stations.axis = "X" - stations.long_name = "" - stations.standard_name = "station" - if self.size > 1: - stations.set_collective(True) - stations[:] = self._station["data"] - - # LATITUDES - lat = netcdf.createVariable("latitude", float64, self._lat_dim, zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - lat.units = "degrees_north" - lat.axis = "Y" - lat.long_name = "latitude coordinate" - lat.standard_name = "latitude" - if self.lat_bnds is not None: - lat.bounds = "lat_bnds" - if self.size > 1: - lat.set_collective(True) - lat[:] = self.get_full_latitudes()["data"] - - # LONGITUDES - lon = netcdf.createVariable("longitude", float64, self._lon_dim, zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - lon.units = "degrees_east" - lon.axis = "X" - lon.long_name = "longitude coordinate" - lon.standard_name = "longitude" - if self.lon_bnds is not None: - lon.bounds = "lon_bnds" - if self.size > 1: - lon.set_collective(True) - lon[:] = self.get_full_longitudes()["data"] - - def erase_flags(self): - - first_time_idx = self._get_time_id(self.hours_start, first=True) - last_time_idx = self._get_time_id(self.hours_end, first=False) - t_len = last_time_idx - first_time_idx - - self._qa["data"] = empty((len(self.get_full_longitudes()["data"]), t_len, 0)) - self._flag["data"] = empty((len(self.get_full_longitudes()["data"]), t_len, 0)) - - return None - - # noinspection DuplicatedCode - def _get_coordinate_values(self, coordinate_info, coordinate_axis, bounds=False): - """ - Get the coordinate data of the current portion. - - Parameters - ---------- - coordinate_info : dict, list - Dictionary with the "data" key with the coordinate variable values. and the attributes as other keys. - coordinate_axis : str - Name of the coordinate to extract. Accepted values: ["X"]. - bounds : bool - Boolean variable to know if there are coordinate bounds. - Returns - ------- - values : dict - Dictionary with the portion of data corresponding to the rank. - """ - - if coordinate_info is None: - return None - - if not isinstance(coordinate_info, dict): - values = {"data": deepcopy(coordinate_info)} - else: - values = deepcopy(coordinate_info) - - coordinate_len = len(values["data"].shape) - if bounds: - coordinate_len -= 1 - - if coordinate_axis == "X": - if coordinate_len == 1: - values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] - elif coordinate_len == 2: - values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], - self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"]] - elif coordinate_len == 3: - values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], - self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], :] - else: - raise NotImplementedError("The coordinate has wrong dimensions: {dim}".format( - dim=values["data"].shape)) - - return values - - # noinspection DuplicatedCode - def _read_variable(self, var_name): - """ - Read the corresponding variable data according to the current rank. - - Parameters - ---------- - var_name : str - Name of the variable to read. - - Returns - ------- - data: array - Portion of the variable data corresponding to the rank. - """ - - nc_var = self.dataset.variables[var_name] - var_dims = nc_var.dimensions - - # Read data in 1 or 2 dimensions - if len(var_dims) < 2: - data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] - elif len(var_dims) == 2: - data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], - self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"]] - elif len(var_dims) == 3: - data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], - self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], - :] - else: - raise NotImplementedError("Error with {0}. Only can be read netCDF with 3 dimensions or less".format( - var_name)) - - # Unmask array - data = self._unmask_array(data) - - return data - - # noinspection DuplicatedCode - def _create_variables(self, netcdf, chunking=False): - """ - Create the netCDF file variables. - - Parameters - ---------- - netcdf : Dataset - netcdf4-python open Dataset. - chunking : bool - Indicates if you want to chunk the output netCDF. - """ - - if self.variables is not None: - for i, (var_name, var_dict) in enumerate(self.variables.items()): - # Get data type - if "dtype" in var_dict.keys(): - var_dtype = var_dict["dtype"] - if (var_dict["data"] is not None) and (var_dtype != var_dict["data"].dtype): - msg = "WARNING!!! " - msg += "Different data types for variable {0}. ".format(var_name) - msg += "Input dtype={0}. Data dtype={1}.".format(var_dtype, var_dict["data"].dtype) - warn(msg) - sys.stderr.flush() - try: - var_dict["data"] = var_dict["data"].astype(var_dtype) - except Exception: - raise TypeError("It was not possible to cast the data to the input dtype.") - else: - var_dtype = var_dict["data"].dtype - if var_dtype is object: - raise TypeError("Data dtype is object. Define dtype explicitly as dictionary key 'dtype'") - - # Get dimensions when reading datasets - if "dimensions" in var_dict.keys(): - var_dims = var_dict["dimensions"] - # Get dimensions when creating new datasets - else: - if len(var_dict["data"].shape) == 1: - # For data that depends only on station (e.g. station_code) - var_dims = self._var_dim - else: - # For data that is dependent on time and station (e.g. PM10) - var_dims = self._var_dim + ("time",) - - if var_dict["data"] is not None: - - # Ensure data is of type numpy array (to create NES) - if not isinstance(var_dict["data"], (ndarray, generic)): - try: - var_dict["data"] = array(var_dict["data"]) - except AttributeError: - raise AttributeError("Data for variable {0} must be a numpy array.".format(var_name)) - - # Convert list of strings to chars for parallelization - if issubdtype(var_dtype, character): - var_dict["data_aux"] = self._str2char(var_dict["data"]) - var_dims += ("strlen",) - var_dtype = "S1" - - if self.info: - print("Rank {0:03d}: Writing {1} var ({2}/{3})".format(self.rank, var_name, i + 1, - len(self.variables))) - - if not chunking: - var = netcdf.createVariable(var_name, var_dtype, var_dims, - zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - else: - if self.master: - chunk_size = var_dict["data"].shape - else: - chunk_size = None - chunk_size = self.comm.bcast(chunk_size, root=0) - var = netcdf.createVariable(var_name, var_dtype, var_dims, zlib=self.zip_lvl > 0, - complevel=self.zip_lvl, chunksizes=chunk_size) - - if self.info: - print("Rank {0:03d}: Var {1} created ({2}/{3})".format( - self.rank, var_name, i + 1, len(self.variables))) - if self.size > 1: - var.set_collective(True) - if self.info: - print("Rank {0:03d}: Var {1} collective ({2}/{3})".format( - self.rank, var_name, i + 1, len(self.variables))) - - for att_name, att_value in var_dict.items(): - if att_name == "data": - if self.info: - print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) - if "data_aux" in var_dict.keys(): - att_value = var_dict["data_aux"] - if len(att_value.shape) == 1: - try: - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value - except IndexError: - raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape, - att_value.shape)) - except ValueError: - raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape, - att_value.shape)) - elif len(att_value.shape) == 2: - if "strlen" in var_dims: - try: - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :] = att_value - except IndexError: - raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :].shape, - att_value.shape)) - except ValueError: - raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :].shape, - att_value.shape)) - else: - try: - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], - self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"]] = att_value - except IndexError: - raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], - self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"]].shape, - att_value.shape)) - except ValueError: - out_shape = var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], - self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"]].shape - raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( - out_shape, att_value.shape)) - elif len(att_value.shape) == 3: - try: - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], - self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], - :] = att_value - except IndexError: - raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], - self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], - :].shape, - att_value.shape)) - except ValueError: - raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], - self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], - :].shape, - att_value.shape)) - - if self.info: - print("Rank {0:03d}: Var {1} data ({2}/{3})".format(self.rank, var_name, i + 1, - len(self.variables))) - - elif att_name not in ["chunk_size", "var_dims", "dimensions", "dtype", "data_aux"]: - var.setncattr(att_name, att_value) - - if "data_aux" in var_dict.keys(): - del var_dict["data_aux"] - - self._set_var_crs(var) - if self.info: - print("Rank {0:03d}: Var {1} completed ({2}/{3})".format(self.rank, var_name, i + 1, - len(self.variables))) - - return None - - # noinspection DuplicatedCode - def _gather_data(self, data_to_gather): - """ - Gather all the variable data into the MPI rank 0 to perform a serial write. - - Returns - ------- - data_to_gather: dict - Variables to gather. - """ - - data_list = deepcopy(data_to_gather) - for var_name, var_info in data_list.items(): - try: - # noinspection PyArgumentList - data_aux = self.comm.gather(data_list[var_name]["data"], root=0) - if self.rank == 0: - shp_len = len(data_list[var_name]["data"].shape) - # concatenate over station - if self.parallel_method == "X": - if shp_len == 1: - # dimensions = (station) - axis = 0 - elif shp_len == 2: - # dimensions = (station, strlen) or - # dimensions = (station, time) - axis = 0 - else: - msg = "The points NetCDF must have " - msg += "surface values (without levels)." - raise NotImplementedError(msg) - elif self.parallel_method == "T": - # concatenate over time - if shp_len == 1: - # dimensions = (station) - axis = None - elif shp_len == 2: - if "strlen" in var_info["dimensions"]: - # dimensions = (station, strlen) - axis = None - else: - # dimensions = (station, time) - axis = 1 - else: - msg = "The points NetCDF must have " - msg += "surface values (without levels)." - raise NotImplementedError(msg) - else: - raise NotImplementedError( - "Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( - meth=self.parallel_method, accept=["X", "T"])) - data_list[var_name]["data"] = concatenate(data_aux, axis=axis) - except Exception as e: - msg = f"**ERROR** an error has occurred while gathering the '{var_name}' variable.\n" - print(msg) - sys.stderr.write(msg) - print(e) - sys.stderr.write(str(e)) - sys.stderr.flush() - self.comm.Abort(1) - - return data_list - - def _create_metadata(self, netcdf): - """ - Create metadata variables. - - Parameters - ---------- - netcdf : Dataset - NetCDF object. - """ - - # N FLAG CODES - flag = netcdf.createVariable("flag", int64, ("station", "time", "N_flag_codes",), - zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - flag.units = "" - flag.axis = "" - flag.long_name = "" - flag.standard_name = "flag" - if self.size > 1: - flag.set_collective(True) - flag[:] = self._flag["data"] - - # N QA CODES - qa = netcdf.createVariable("qa", int64, ("station", "time", "N_qa_codes",), - zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - qa.units = "" - qa.axis = "" - qa.long_name = "" - qa.standard_name = "N_qa_codes" - if self.size > 1: - qa.set_collective(True) - qa[:] = self._qa["data"] - - return None - - def to_netcdf(self, path, compression_level=0, serial=False, info=False, chunking=False, nc_type="NES", - keep_open=False): - """ - Write the netCDF output file. - - Parameters - ---------- - keep_open : bool - nc_type : str - path : str - Path to the output netCDF file. - compression_level : int - Level of compression (0 to 9) Default: 0 (no compression). - serial : bool - Indicates if you want to write in serial or not. Default: False. - info : bool - Indicates if you want to print the information of each writing step by stdout Default: False. - chunking : bool - Indicates if you want a chunked netCDF output. Only available with non-serial writes. Default: False. - """ - - if (not serial) and (self.size > 1): - msg = "WARNING!!! " - msg += "GHOST datasets cannot be written in parallel yet. " - msg += "Changing to serial mode." - warn(msg) - sys.stderr.flush() - - super(PointsNesGHOST, self).to_netcdf(path, compression_level=compression_level, - serial=True, info=info, chunking=chunking) - - return None - - def to_points(self): - """ - Transform a PointsNesGHOST into a PointsNes object - - Returns - ---------- - points_nes : nes.Nes - Points Nes Object (without GHOST metadata variables) - """ - - points_nes = PointsNes(comm=self.comm, - info=self.info, - balanced=self.balanced, - parallel_method=self.parallel_method, - avoid_first_hours=self.hours_start, - avoid_last_hours=self.hours_end, - first_level=self.first_level, - last_level=self.last_level, - create_nes=True, - lat=self.lat["data"], - lon=self.lon["data"], - times=self.time - ) - - # The version attribute in GHOST files prior to 1.3.3 is called data_version, after it is version - if "version" in self.global_attrs: - ghost_version = self.global_attrs["version"] - elif "data_version" in self.global_attrs: - ghost_version = self.global_attrs["data_version"] - else: - ghost_version = "0.0.0" - metadata_variables = self.get_standard_metadata(ghost_version) - self.free_vars(metadata_variables) - self.free_vars("station") - points_nes.variables = deepcopy(self.variables) - - return points_nes - - @staticmethod - def get_standard_metadata(ghost_version): - """ - Get all possible GHOST variables for each version. - - Parameters - ---------- - ghost_version : str - Version of GHOST file. - - Returns - ---------- - metadata_variables[GHOST_version] : list - A List of metadata variables for a certain GHOST version - """ - - # This metadata variables are - metadata_variables = {"1.4": ["GHOST_version", "station_reference", "station_timezone", "latitude", "longitude", - "altitude", "sampling_height", "measurement_altitude", "ellipsoid", - "horizontal_datum", "vertical_datum", "projection", "distance_to_building", - "distance_to_kerb", "distance_to_junction", "distance_to_source", "street_width", - "street_type", "daytime_traffic_speed", "daily_passing_vehicles", "data_level", - "climatology", "station_name", "city", "country", - "administrative_country_division_1", "administrative_country_division_2", - "population", "representative_radius", "network", "associated_networks", - "area_classification", "station_classification", "main_emission_source", - "land_use", "terrain", "measurement_scale", - "ESDAC_Iwahashi_landform_classification", - "ESDAC_modal_Iwahashi_landform_classification_5km", - "ESDAC_modal_Iwahashi_landform_classification_25km", - "ESDAC_Meybeck_landform_classification", - "ESDAC_modal_Meybeck_landform_classification_5km", - "ESDAC_modal_Meybeck_landform_classification_25km", - "GHSL_settlement_model_classification", - "GHSL_modal_settlement_model_classification_5km", - "GHSL_modal_settlement_model_classification_25km", - "Joly-Peuch_classification_code", "Koppen-Geiger_classification", - "Koppen-Geiger_modal_classification_5km", - "Koppen-Geiger_modal_classification_25km", - "MODIS_MCD12C1_v6_IGBP_land_use", "MODIS_MCD12C1_v6_modal_IGBP_land_use_5km", - "MODIS_MCD12C1_v6_modal_IGBP_land_use_25km", "MODIS_MCD12C1_v6_UMD_land_use", - "MODIS_MCD12C1_v6_modal_UMD_land_use_5km", - "MODIS_MCD12C1_v6_modal_UMD_land_use_25km", "MODIS_MCD12C1_v6_LAI", - "MODIS_MCD12C1_v6_modal_LAI_5km", "MODIS_MCD12C1_v6_modal_LAI_25km", - "WMO_region", "WWF_TEOW_terrestrial_ecoregion", "WWF_TEOW_biogeographical_realm", - "WWF_TEOW_biome", "UMBC_anthrome_classification", - "UMBC_modal_anthrome_classification_5km", - "UMBC_modal_anthrome_classification_25km", - "EDGAR_v4.3.2_annual_average_BC_emissions", - "EDGAR_v4.3.2_annual_average_CO_emissions", - "EDGAR_v4.3.2_annual_average_NH3_emissions", - "EDGAR_v4.3.2_annual_average_NMVOC_emissions", - "EDGAR_v4.3.2_annual_average_NOx_emissions", - "EDGAR_v4.3.2_annual_average_OC_emissions", - "EDGAR_v4.3.2_annual_average_PM10_emissions", - "EDGAR_v4.3.2_annual_average_biogenic_PM2.5_emissions", - "EDGAR_v4.3.2_annual_average_fossilfuel_PM2.5_emissions", - "EDGAR_v4.3.2_annual_average_SO2_emissions", "ASTER_v3_altitude", - "ETOPO1_altitude", "ETOPO1_max_altitude_difference_5km", - "GHSL_built_up_area_density", "GHSL_average_built_up_area_density_5km", - "GHSL_average_built_up_area_density_25km", "GHSL_max_built_up_area_density_5km", - "GHSL_max_built_up_area_density_25km", "GHSL_population_density", - "GHSL_average_population_density_5km", "GHSL_average_population_density_25km", - "GHSL_max_population_density_5km", "GHSL_max_population_density_25km", - "GPW_population_density", "GPW_average_population_density_5km", - "GPW_average_population_density_25km", "GPW_max_population_density_5km", - "GPW_max_population_density_25km", - "NOAA-DMSP-OLS_v4_nighttime_stable_lights", - "NOAA-DMSP-OLS_v4_average_nighttime_stable_lights_5km", - "NOAA-DMSP-OLS_v4_average_nighttime_stable_lights_25km", - "NOAA-DMSP-OLS_v4_max_nighttime_stable_lights_5km", - "NOAA-DMSP-OLS_v4_max_nighttime_stable_lights_25km", - "OMI_level3_column_annual_average_NO2", - "OMI_level3_column_cloud_screened_annual_average_NO2", - "OMI_level3_tropospheric_column_annual_average_NO2", - "OMI_level3_tropospheric_column_cloud_screened_annual_average_NO2", - "GSFC_coastline_proximity", "primary_sampling_type", - "primary_sampling_instrument_name", - "primary_sampling_instrument_documented_flow_rate", - "primary_sampling_instrument_reported_flow_rate", - "primary_sampling_process_details", "primary_sampling_instrument_manual_name", - "primary_sampling_further_details", "sample_preparation_types", - "sample_preparation_techniques", "sample_preparation_process_details", - "sample_preparation_further_details", "measurement_methodology", - "measuring_instrument_name", "measuring_instrument_sampling_type", - "measuring_instrument_documented_flow_rate", - "measuring_instrument_reported_flow_rate", "measuring_instrument_process_details", - "measuring_instrument_process_details", "measuring_instrument_manual_name", - "measuring_instrument_further_details", "measuring_instrument_reported_units", - "measuring_instrument_reported_lower_limit_of_detection", - "measuring_instrument_documented_lower_limit_of_detection", - "measuring_instrument_reported_upper_limit_of_detection", - "measuring_instrument_documented_upper_limit_of_detection", - "measuring_instrument_reported_uncertainty", - "measuring_instrument_documented_uncertainty", - "measuring_instrument_reported_accuracy", - "measuring_instrument_documented_accuracy", - "measuring_instrument_reported_precision", - "measuring_instrument_documented_precision", - "measuring_instrument_reported_zero_drift", - "measuring_instrument_documented_zero_drift", - "measuring_instrument_reported_span_drift", - "measuring_instrument_documented_span_drift", - "measuring_instrument_reported_zonal_drift", - "measuring_instrument_documented_zonal_drift", - "measuring_instrument_reported_measurement_resolution", - "measuring_instrument_documented_measurement_resolution", - "measuring_instrument_reported_absorption_cross_section", - "measuring_instrument_documented_absorption_cross_section", - "measuring_instrument_inlet_information", - "measuring_instrument_calibration_scale", - "network_provided_volume_standard_temperature", - "network_provided_volume_standard_pressure", "retrieval_algorithm", - "principal_investigator_name", "principal_investigator_institution", - "principal_investigator_email_address", "contact_name", - "contact_institution", "contact_email_address", "meta_update_stamp", - "data_download_stamp", "data_revision_stamp", "network_sampling_details", - "network_uncertainty_details", "network_maintenance_details", - "network_qa_details", "network_miscellaneous_details", "data_licence", - "process_warnings", "temporal_resolution", - "reported_lower_limit_of_detection_per_measurement", - "reported_upper_limit_of_detection_per_measurement", - "reported_uncertainty_per_measurement", "derived_uncertainty_per_measurement", - "day_night_code", "weekday_weekend_code", "season_code", - "hourly_native_representativity_percent", "hourly_native_max_gap_percent", - "daily_native_representativity_percent", "daily_representativity_percent", - "daily_native_max_gap_percent", "daily_max_gap_percent", - "monthly_native_representativity_percent", "monthly_representativity_percent", - "monthly_native_max_gap_percent", "monthly_max_gap_percent", - "annual_native_representativity_percent", "annual_native_max_gap_percent", - "all_representativity_percent", "all_max_gap_percent"], - } - - return metadata_variables[ghost_version] - - # noinspection DuplicatedCode - def add_variables_to_shapefile(self, var_list, idx_lev=0, idx_time=0): - """ - Add variables data to shapefile. - - var_list : list, str - List (or single string) of the variables to be loaded and saved in the shapefile. - idx_lev : int - Index of vertical level for which the data will be saved in the shapefile. - idx_time : int - Index of time for which the data will be saved in the shapefile. - """ - - if idx_lev != 0: - msg = "Error: Points dataset has no level (Level: {0}).".format(idx_lev) - raise ValueError(msg) - - for var_name in var_list: - # station as dimension - if len(self.variables[var_name]["dimensions"]) == 1: - self.shapefile[var_name] = self.variables[var_name]["data"][:].ravel() - # station and time as dimensions - else: - self.shapefile[var_name] = self.variables[var_name]["data"][:, idx_time].ravel() - - return None - - @staticmethod - def _get_axis_index_(axis): - if axis == "T": - value = 1 - elif axis == "X": - value = 0 - else: - raise ValueError("Unknown axis: {0}".format(axis)) - return value - - @staticmethod - def _set_var_crs(var): - """ - Set the grid_mapping - - Parameters - ---------- - var : Variable - netCDF4-python variable object. - """ - return None diff --git a/build/lib/nes/nc_projections/points_nes_providentia.py b/build/lib/nes/nc_projections/points_nes_providentia.py deleted file mode 100644 index ad3fc56..0000000 --- a/build/lib/nes/nc_projections/points_nes_providentia.py +++ /dev/null @@ -1,650 +0,0 @@ -#!/usr/bin/env python - -import sys -from warnings import warn -from copy import deepcopy -from numpy import ndarray, generic, array, issubdtype, character, concatenate -from .points_nes import PointsNes - - -class PointsNesProvidentia(PointsNes): - """ - - Attributes - ---------- - _model_centre_lon : dict - Model centre longitudes dictionary with the complete "data" key for all the values and the rest of the - attributes. - _model_centre_lat : dict - Model centre latitudes dictionary with the complete "data" key for all the values and the rest of the - attributes. - _grid_edge_lon : dict - Grid edge longitudes dictionary with the complete "data" key for all the values and the rest of the - attributes. - _grid_edge_lat : dict - Grid edge latitudes dictionary with the complete "data" key for all the values and the rest of the - attributes. - model_centre_lon : dict - Model centre longitudes dictionary with the portion of "data" corresponding to the rank values. - model_centre_lat : dict - Model centre latitudes dictionary with the portion of "data" corresponding to the rank values. - grid_edge_lon : dict - Grid edge longitudes dictionary with the portion of "data" corresponding to the rank values. - grid_edge_lat : dict - Grid edge latitudes dictionary with the portion of "data" corresponding to the rank values. - """ - def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="X", - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, - balanced=False, times=None, model_centre_lon=None, model_centre_lat=None, grid_edge_lon=None, - grid_edge_lat=None, - **kwargs): - """ - Initialize the PointsNesProvidentia class - - Parameters - ---------- - comm: MPI.COMM - MPI Communicator. - path: str - Path to the NetCDF to initialize the object. - info: bool - Indicates if you want to get reading/writing info. - dataset: Dataset - NetCDF4-python Dataset to initialize the class. - parallel_method : str - Indicates the parallelization method that you want. Default: "X". - Accepted values: ["X"]. - avoid_first_hours : int - Number of hours to remove from first time steps. - avoid_last_hours : int - Number of hours to remove from last time steps. - first_level : int - Index of the first level to use. - last_level : int, None - Index of the last level to use. None if it is the last. - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - balanced : bool - Indicates if you want a balanced parallelization or not. - Balanced dataset cannot be written in chunking mode. - times : list, None - List of times to substitute the current ones while creation. - model_centre_lon : dict - Model centre longitudes dictionary with the portion of "data" corresponding to the rank values. - model_centre_lat : dict - Model centre latitudes dictionary with the portion of "data" corresponding to the rank values. - grid_edge_lon : dict - Grid edge longitudes dictionary with the portion of "data" corresponding to the rank values. - grid_edge_lat : dict - Grid edge latitudes dictionary with the portion of "data" corresponding to the rank values. - """ - - super(PointsNesProvidentia, self).__init__(comm=comm, path=path, info=info, dataset=dataset, - parallel_method=parallel_method, - avoid_first_hours=avoid_first_hours, - avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, - create_nes=create_nes, times=times, balanced=balanced, **kwargs) - - if create_nes: - # Complete dimensions - self._model_centre_lon = model_centre_lon - self._model_centre_lat = model_centre_lat - self._grid_edge_lon = grid_edge_lon - self._grid_edge_lat = grid_edge_lat - else: - # Complete dimensions - self._model_centre_lon = self._get_coordinate_dimension(["model_centre_longitude"]) - self._model_centre_lat = self._get_coordinate_dimension(["model_centre_latitude"]) - self._grid_edge_lon = self._get_coordinate_dimension(["grid_edge_longitude"]) - self._grid_edge_lat = self._get_coordinate_dimension(["grid_edge_latitude"]) - - # Dimensions screening - self.model_centre_lon = self._get_coordinate_values(self._model_centre_lon, "") - self.model_centre_lat = self._get_coordinate_values(self._model_centre_lat, "") - self.grid_edge_lon = self._get_coordinate_values(self._grid_edge_lon, "") - self.grid_edge_lat = self._get_coordinate_values(self._grid_edge_lat, "") - - @staticmethod - def new(comm=None, path=None, info=False, dataset=None, parallel_method="X", - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, - create_nes=False, balanced=False, times=None, - model_centre_lon=None, model_centre_lat=None, grid_edge_lon=None, grid_edge_lat=None, - **kwargs): - """ - Initialize the PointsNesProvidentia class. - - Parameters - ---------- - comm: MPI.COMM - MPI Communicator. - path: str - Path to the NetCDF to initialize the object. - info: bool - Indicates if you want to get reading/writing info. - dataset: Dataset - NetCDF4-python Dataset to initialize the class. - parallel_method : str - Indicates the parallelization method that you want. Default: "X". - Accepted values: ["X"]. - avoid_first_hours : int - Number of hours to remove from first time steps. - avoid_last_hours : int - Number of hours to remove from last time steps. - first_level : int - Index of the first level to use - last_level : int, None - Index of the last level to use. None if it is the last. - balanced : bool - Indicates if you want a balanced parallelization or not. - Balanced dataset cannot be written in chunking mode. - times : list, None - List of times to substitute the current ones while creation. - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - model_centre_lon : dict - Model centre longitudes dictionary with the portion of "data" corresponding to the rank values. - model_centre_lat : dict - Model centre latitudes dictionary with the portion of "data" corresponding to the rank values. - grid_edge_lon : dict - Grid edge longitudes dictionary with the portion of "data" corresponding to the rank values. - grid_edge_lat : dict - Grid edge latitudes dictionary with the portion of "data" corresponding to the rank values. - """ - - new = PointsNesProvidentia(comm=comm, path=path, info=info, dataset=dataset, - parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, - avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, - create_nes=create_nes, balanced=balanced, times=times, - model_centre_lon=model_centre_lon, model_centre_lat=model_centre_lat, - grid_edge_lon=grid_edge_lon, grid_edge_lat=grid_edge_lat, **kwargs) - - return new - - def _create_dimensions(self, netcdf): - """ - Create "grid_edge", "model_latitude" and "model_longitude" dimensions and the super dimensions - "time", "time_nv", "station", and "strlen". - - Parameters - ---------- - netcdf : Dataset - NetCDF object. - """ - - super(PointsNesProvidentia, self)._create_dimensions(netcdf) - - # Create grid_edge, model_latitude and model_longitude dimensions - netcdf.createDimension("grid_edge", len(self._grid_edge_lon["data"])) - netcdf.createDimension("model_latitude", self._model_centre_lon["data"].shape[0]) - netcdf.createDimension("model_longitude", self._model_centre_lon["data"].shape[1]) - - return None - - def _create_dimension_variables(self, netcdf): - """ - Create the "model_centre_lon", model_centre_lat", "grid_edge_lon" and "grid_edge_lat" variables. - - Parameters - ---------- - netcdf : Dataset - NetCDF object. - """ - - super(PointsNesProvidentia, self)._create_dimension_variables(netcdf) - - # MODEL CENTRE LONGITUDES - model_centre_lon = netcdf.createVariable("model_centre_longitude", "f8", - ("model_latitude", "model_longitude",), - zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - model_centre_lon.units = "degrees_east" - model_centre_lon.axis = "X" - model_centre_lon.long_name = "model centre longitude" - model_centre_lon.standard_name = "model centre longitude" - if self.size > 1: - model_centre_lon.set_collective(True) - msg = "2D meshed grid centre longitudes with " - msg += "{} longitudes in {} bands of latitude".format(self._model_centre_lon["data"].shape[1], - self._model_centre_lat["data"].shape[0]) - model_centre_lon.description = msg - model_centre_lon[:] = self._model_centre_lon["data"] - - # MODEL CENTRE LATITUDES - model_centre_lat = netcdf.createVariable("model_centre_latitude", "f8", - ("model_latitude", "model_longitude",), - zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - model_centre_lat.units = "degrees_north" - model_centre_lat.axis = "Y" - model_centre_lat.long_name = "model centre latitude" - model_centre_lat.standard_name = "model centre latitude" - if self.size > 1: - model_centre_lat.set_collective(True) - msg = "2D meshed grid centre longitudes with " - msg += "{} longitudes in {} bands of latitude".format(self._model_centre_lon["data"].shape[1], - self._model_centre_lat["data"].shape[0]) - model_centre_lat[:] = self._model_centre_lat["data"] - - # GRID EDGE DOMAIN LONGITUDES - grid_edge_lon = netcdf.createVariable("grid_edge_longitude", "f8", "grid_edge") - grid_edge_lon.units = "degrees_east" - grid_edge_lon.axis = "X" - grid_edge_lon.long_name = "grid edge longitude" - grid_edge_lon.standard_name = "grid edge longitude" - if self.size > 1: - grid_edge_lon.set_collective(True) - msg = "Longitude coordinate along edge of grid domain " - msg += "(going clockwise around grid boundary from bottom-left corner)." - grid_edge_lon.description = msg - grid_edge_lon[:] = self._grid_edge_lon["data"] - - # GRID EDGE DOMAIN LATITUDES - grid_edge_lat = netcdf.createVariable("grid_edge_latitude", "f8", "grid_edge") - grid_edge_lat.units = "degrees_north" - grid_edge_lat.axis = "Y" - grid_edge_lat.long_name = "grid edge latitude" - grid_edge_lat.standard_name = "grid edge latitude" - if self.size > 1: - grid_edge_lat.set_collective(True) - msg = "Latitude coordinate along edge of grid domain " - msg += "(going clockwise around grid boundary from bottom-left corner)." - grid_edge_lat.description = msg - grid_edge_lat[:] = self._grid_edge_lat["data"] - - self.free_vars(["model_centre_longitude", "model_centre_latitude", "grid_edge_longitude", "grid_edge_latitude"]) - - # noinspection DuplicatedCode - def _get_coordinate_values(self, coordinate_info, coordinate_axis, bounds=False): - """ - Get the coordinate data of the current portion. - - Parameters - ---------- - coordinate_info : dict, list - Dictionary with the "data" key with the coordinate variable values. and the attributes as other keys. - coordinate_axis : str - Name of the coordinate to extract. Accepted values: ["X"]. - bounds : bool - Boolean variable to know if there are coordinate bounds. - Returns - ------- - values : dict - Dictionary with the portion of data corresponding to the rank. - """ - - if coordinate_info is None: - return None - - if not isinstance(coordinate_info, dict): - values = {"data": deepcopy(coordinate_info)} - else: - values = deepcopy(coordinate_info) - - coordinate_len = len(values["data"].shape) - if bounds: - coordinate_len -= 1 - - if coordinate_axis == "X": - if coordinate_len == 1: - values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] - elif coordinate_len == 2: - values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], - self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"]] - elif coordinate_len == 3: - values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], - self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], :] - else: - raise NotImplementedError("The coordinate has wrong dimensions: {dim}".format( - dim=values["data"].shape)) - elif coordinate_axis == "": - # pass for "model_centre_lon", "model_centre_lat", "grid_edge_lon" and "grid_edge_lat" - pass - - return values - - # noinspection DuplicatedCode - def _read_variable(self, var_name): - """ - Read the corresponding variable data according to the current rank. - - Parameters - ---------- - var_name : str - Name of the variable to read. - - Returns - ------- - data: array - Portion of the variable data corresponding to the rank. - """ - nc_var = self.dataset.variables[var_name] - var_dims = nc_var.dimensions - - # Read data in 1, 2 or 3 dimensions - if len(var_dims) < 2: - data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] - elif len(var_dims) == 2: - data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], - self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"]] - elif len(var_dims) == 3: - data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], - self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], - :] - else: - raise NotImplementedError("Error with {0}. Only can be read netCDF with 3 dimensions or less".format( - var_name)) - - # Unmask array - data = self._unmask_array(data) - - return data - - # noinspection DuplicatedCode - def _create_variables(self, netcdf, chunking=False): - """ - Create the netCDF file variables. - - Parameters - ---------- - netcdf : Dataset - netcdf4-python open Dataset. - chunking : bool - Indicates if you want to chunk the output netCDF. - """ - - if self.variables is not None: - for i, (var_name, var_dict) in enumerate(self.variables.items()): - # Get data type - if "dtype" in var_dict.keys(): - var_dtype = var_dict["dtype"] - if (var_dict["data"] is not None) and (var_dtype != var_dict["data"].dtype): - msg = "WARNING!!! " - msg += "Different data types for variable {0}. ".format(var_name) - msg += "Input dtype={0}. Data dtype={1}.".format(var_dtype, - var_dict["data"].dtype) - warn(msg) - sys.stderr.flush() - try: - var_dict["data"] = var_dict["data"].astype(var_dtype) - except Exception: # TODO: Detect exception - raise TypeError("It was not possible to cast the data to the input dtype.") - else: - var_dtype = var_dict["data"].dtype - if var_dtype is object: - raise TypeError("Data dtype is object. Define dtype explicitly as dictionary key 'dtype'") - - # Get dimensions when reading datasets - if "dimensions" in var_dict.keys(): - var_dims = var_dict["dimensions"] - # Get dimensions when creating new datasets - else: - if len(var_dict["data"].shape) == 1: - # For data that depends only on station (e.g. station_code) - var_dims = self._var_dim - else: - # For data that is dependent on time and station (e.g. PM10) - var_dims = self._var_dim + ("time",) - - if var_dict["data"] is not None: - - # Ensure data is of type numpy array (to create NES) - if not isinstance(var_dict["data"], (ndarray, generic)): - try: - var_dict["data"] = array(var_dict["data"]) - except AttributeError: - raise AttributeError("Data for variable {0} must be a numpy array.".format(var_name)) - - # Convert list of strings to chars for parallelization - if issubdtype(var_dtype, character): - var_dict["data_aux"] = self._str2char(var_dict["data"]) - var_dims += ("strlen",) - var_dtype = "S1" - - if self.info: - print("Rank {0:03d}: Writing {1} var ({2}/{3})".format(self.rank, var_name, i + 1, - len(self.variables))) - - if not chunking: - var = netcdf.createVariable(var_name, var_dtype, var_dims, - zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - else: - if self.master: - chunk_size = var_dict["data"].shape - else: - chunk_size = None - chunk_size = self.comm.bcast(chunk_size, root=0) - var = netcdf.createVariable(var_name, var_dtype, var_dims, zlib=self.zip_lvl > 0, - complevel=self.zip_lvl, chunksizes=chunk_size) - - if self.info: - print("Rank {0:03d}: Var {1} created ({2}/{3})".format( - self.rank, var_name, i + 1, len(self.variables))) - if self.size > 1: - var.set_collective(True) - if self.info: - print("Rank {0:03d}: Var {1} collective ({2}/{3})".format( - self.rank, var_name, i + 1, len(self.variables))) - - for att_name, att_value in var_dict.items(): - if att_name == "data": - if self.info: - print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) - if "data_aux" in var_dict.keys(): - att_value = var_dict["data_aux"] - if len(att_value.shape) == 1: - try: - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value - except IndexError: - raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape, - att_value.shape)) - except ValueError: - raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape, - att_value.shape)) - elif len(att_value.shape) == 2: - if "strlen" in var_dims: - try: - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :] = att_value - except IndexError: - raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :].shape, - att_value.shape)) - except ValueError: - raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :].shape, - att_value.shape)) - else: - try: - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], - self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"]] = att_value - except IndexError: - raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], - self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"]].shape, - att_value.shape)) - except ValueError: - raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], - self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"]].shape, - att_value.shape)) - elif len(att_value.shape) == 3: - try: - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], - self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], - :] = att_value - except IndexError: - raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], - self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], - :].shape, - att_value.shape)) - except ValueError: - raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( - var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], - self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], - :].shape, - att_value.shape)) - - if self.info: - print("Rank {0:03d}: Var {1} data ({2}/{3})".format(self.rank, var_name, i + 1, - len(self.variables))) - elif att_name not in ["chunk_size", "var_dims", "dimensions", "dtype", "data_aux"]: - var.setncattr(att_name, att_value) - - if "data_aux" in var_dict.keys(): - del var_dict["data_aux"] - - self._set_var_crs(var) - if self.info: - print("Rank {0:03d}: Var {1} completed ({2}/{3})".format(self.rank, var_name, i + 1, - len(self.variables))) - - return None - - # noinspection DuplicatedCode - def _gather_data(self, data_to_gather): - """ - Gather all the variable data into the MPI rank 0 to perform a serial write. - - Returns - ------- - data_to_gather: dict - Variables to gather. - """ - - data_list = deepcopy(data_to_gather) - for var_name, var_info in data_list.items(): - try: - # noinspection PyArgumentList - data_aux = self.comm.gather(data_list[var_name]["data"], root=0) - if self.rank == 0: - shp_len = len(data_list[var_name]["data"].shape) - # concatenate over station - if self.parallel_method == "X": - if shp_len == 1: - # dimensions = (station) - axis = 0 - elif shp_len == 2: - # dimensions = (station, strlen) or - # dimensions = (station, time) - axis = 0 - else: - msg = "The points NetCDF must have " - msg += "surface values (without levels)." - raise NotImplementedError(msg) - elif self.parallel_method == "T": - # concatenate over time - if shp_len == 1: - # dimensions = (station) - axis = None - elif shp_len == 2: - if "strlen" in var_info["dimensions"]: - # dimensions = (station, strlen) - axis = None - else: - # dimensions = (station, time) - axis = 1 - else: - msg = "The points NetCDF must have " - msg += "surface values (without levels)." - raise NotImplementedError(msg) - else: - raise NotImplementedError( - "Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( - meth=self.parallel_method, accept=["X", "T"])) - data_list[var_name]["data"] = concatenate(data_aux, axis=axis) - except Exception as e: - msg = f"**ERROR** an error has occurred while gathering the '{var_name}' variable.\n" - print(msg) - sys.stderr.write(msg) - print(e) - sys.stderr.write(str(e)) - # print(e, file=sys.stderr) - sys.stderr.flush() - self.comm.Abort(1) - - return data_list - - def to_netcdf(self, path, compression_level=0, serial=False, info=False, chunking=False, nc_type="NES", - keep_open=False): - """ - Write the netCDF output file. - - Parameters - ---------- - path : str - Path to the output netCDF file. - compression_level : int - Level of compression (0 to 9) Default: 0 (no compression). - serial : bool - Indicates if you want to write in serial or not. Default: False. - info : bool - Indicates if you want to print the information of each writing step by stdout Default: False. - chunking : bool - Indicates if you want a chunked netCDF output. Only available with non-serial writes. Default: False. - nc_type : str - Type to NetCDf to write. "CAMS_RA" or "NES" - keep_open : bool - Indicates if you want to keep open the NetCDH to fill the data by time-step - """ - - if (not serial) and (self.size > 1): - msg = "WARNING!!! " - msg += "Providentia datasets cannot be written in parallel yet. " - msg += "Changing to serial mode." - warn(msg) - sys.stderr.flush() - - super(PointsNesProvidentia, self).to_netcdf(path, compression_level=compression_level, - serial=True, info=info, chunking=chunking) - - return None - - # noinspection DuplicatedCode - def add_variables_to_shapefile(self, var_list, idx_lev=0, idx_time=0): - """ - Add variables data to shapefile. - - var_list : list, str - List (or single string) of the variables to be loaded and saved in the shapefile. - idx_lev : int - Index of vertical level for which the data will be saved in the shapefile. - idx_time : int - Index of time for which the data will be saved in the shapefile. - """ - - if idx_lev != 0: - msg = "Error: Points dataset has no level (Level: {0}).".format(idx_lev) - raise ValueError(msg) - - for var_name in var_list: - # station as dimension - if len(self.variables[var_name]["dimensions"]) == 1: - self.shapefile[var_name] = self.variables[var_name]["data"][:].ravel() - # station and time as dimensions - else: - self.shapefile[var_name] = self.variables[var_name]["data"][:, idx_time].ravel() - - return None - - @staticmethod - def _get_axis_index_(axis): - if axis == "T": - value = 1 - elif axis == "X": - value = 0 - else: - raise ValueError("Unknown axis: {0}".format(axis)) - return value - - @staticmethod - def _set_var_crs(var): - """ - Set the grid_mapping - - Parameters - ---------- - var : Variable - netCDF4-python variable object. - """ - return None diff --git a/build/lib/nes/nc_projections/rotated_nes.py b/build/lib/nes/nc_projections/rotated_nes.py deleted file mode 100644 index c5c3794..0000000 --- a/build/lib/nes/nc_projections/rotated_nes.py +++ /dev/null @@ -1,694 +0,0 @@ -#!/usr/bin/env python - -from numpy import (float64, linspace, cos, sin, arcsin, arctan2, array, mean, diff, append, flip, repeat, concatenate, - vstack) -from math import pi -from geopandas import GeoDataFrame -from pandas import Index -from pyproj import Proj -from copy import deepcopy -from typing import Dict, Any -from shapely.geometry import Polygon, Point -from .default_nes import Nes - - -class RotatedNes(Nes): - """ - - Attributes - ---------- - _full_rlat : dict - Rotated latitudes dictionary with the complete "data" key for all the values and the rest of the attributes. - _full_rlon : dict - Rotated longitudes dictionary with the complete "data" key for all the values and the rest of the attributes. - rlat : dict - Rotated latitudes dictionary with the portion of "data" corresponding to the rank values. - rlon : dict - Rotated longitudes dictionary with the portion of "data" corresponding to the rank values. - _var_dim : tuple - A Tuple with the name of the Y and X dimensions for the variables. - ("rlat", "rlon") for a rotated projection. - _lat_dim : tuple - A Tuple with the name of the dimensions of the Latitude values. - ("rlat", "rlon") for a rotated projection. - _lon_dim : tuple - A Tuple with the name of the dimensions of the Longitude values. - ("rlat", "rlon") for a rotated projection. - """ - def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="Y", - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, - balanced=False, times=None, **kwargs): - """ - Initialize the RotatedNes class. - - Parameters - ---------- - comm: MPI.COMM - MPI Communicator. - path: str - Path to the NetCDF to initialize the object. - info: bool - Indicates if you want to get reading/writing info. - dataset: Dataset - NetCDF4-python Dataset to initialize the class. - parallel_method : str - Indicates the parallelization method that you want. Default: "Y". - Accepted values: ["X", "Y", "T"]. - avoid_first_hours : int - Number of hours to remove from first time steps. - avoid_last_hours : int - Number of hours to remove from last time steps. - first_level : int - Index of the first level to use. - last_level : int, None - Index of the last level to use. None if it is the last. - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - balanced : bool - Indicates if you want a balanced parallelization or not. - Balanced dataset cannot be written in chunking mode. - times : list, None - List of times to substitute the current ones while creation. - """ - self._full_rlat = None - self._full_rlon = None - - super(RotatedNes, self).__init__(comm=comm, path=path, - info=info, dataset=dataset, balanced=balanced, - parallel_method=parallel_method, - avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=create_nes, - times=times, **kwargs) - - if create_nes: - # Complete dimensions - # self._full_rlat, self._full_rlon = self._create_rotated_coordinates() - # Dimensions screening - self.lat = self._get_coordinate_values(self.get_full_latitudes(), "Y") - self.lon = self._get_coordinate_values(self.get_full_longitudes(), "X") - else: - # Complete dimensions - self._full_rlat = self._get_coordinate_dimension("rlat") - self._full_rlon = self._get_coordinate_dimension("rlon") - - # Dimensions screening - self.rlat = self._get_coordinate_values(self.get_full_rlat(), "Y") - self.rlon = self._get_coordinate_values(self.get_full_rlon(), "X") - - # Set axis limits for parallel writing - self.write_axis_limits = self._get_write_axis_limits() - - self._var_dim = ("rlat", "rlon") - self._lat_dim = ("rlat", "rlon") - self._lon_dim = ("rlat", "rlon") - - @staticmethod - def new(comm=None, path=None, info=False, dataset=None, parallel_method="Y", - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, - balanced=False, times=None, **kwargs): - """ - Initialize the Nes class. - - Parameters - ---------- - comm: MPI.COMM - MPI Communicator. - path: str - Path to the NetCDF to initialize the object. - info: bool - Indicates if you want to get reading/writing info. - dataset: Dataset - NetCDF4-python Dataset to initialize the class. - parallel_method : str - Indicates the parallelization method that you want. Default over Y axis - accepted values: ["X", "Y", "T"]. - avoid_first_hours : int - Number of hours to remove from first time steps. - avoid_last_hours : int - Number of hours to remove from last time steps. - first_level : int - Index of the first level to use. - last_level : int or None - Index of the last level to use. None if it is the last. - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - balanced : bool - Indicates if you want a balanced parallelization or not. - Balanced dataset cannot be written in chunking mode. - times : List[datetime] or None - List of times to substitute the current ones while creation. - """ - - new = RotatedNes(comm=comm, path=path, info=info, dataset=dataset, - parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, - avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, - create_nes=create_nes, balanced=balanced, times=times, **kwargs) - - return new - - def get_full_rlat(self) -> Dict[str, Any]: - """ - Retrieve the complete rotated latitude information. - - Returns - ------- - Dict[str, Any] - A dictionary containing the complete latitude data and its attributes. - The dictionary structure is: - { - "data": ndarray, # Array of latitude values. - attr_name: attr_value, # Latitude attributes. - ... - } - """ - data = self.comm.bcast(self._full_rlat) - - return data - - def get_full_rlon(self) -> Dict[str, Any]: - """ - Retrieve the complete rotated longitude information. - - Returns - ------- - Dict[str, Any] - A dictionary containing the complete longitude data and its attributes. - The dictionary structure is: - { - "data": ndarray, # Array of longitude values. - attr_name: attr_value, # Longitude attributes. - ... - } - """ - data = self.comm.bcast(self._full_rlon) - return data - - def set_full_rlat(self, data: Dict[str, Any]) -> None: - """ - Set the complete rotated latitude information. - - Parameters - ---------- - data : Dict[str, Any] - A dictionary containing the complete latitude data and its attributes. - The dictionary structure is: - { - "data": ndarray, # Array of latitude values. - attr_name: attr_value, # Latitude attributes. - ... - } - """ - if self.master: - self._full_rlat = data - return None - - def set_full_rlon(self, data: Dict[str, Any]) -> None: - """ - Set the complete rotated longitude information. - - Parameters - ---------- - data : Dict[str, Any] - A dictionary containing the complete longitude data and its attributes. - The dictionary structure is: - { - "data": ndarray, # Array of longitude values. - attr_name: attr_value, # Longitude attributes. - ... - } - """ - if self.master: - self._full_rlon = data - return None - - # noinspection DuplicatedCode - def _filter_coordinates_selection(self): - """ - Use the selection limits to filter rlat, rlon, time, lev, lat, lon, lon_bnds and lat_bnds. - """ - - idx = self._get_idx_intervals() - - full_rlat = self.get_full_rlat() - full_rlon = self.get_full_rlon() - - self.rlat = self._get_coordinate_values(full_rlat, "Y") - self.rlon = self._get_coordinate_values(full_rlon, "X") - - if self.master: - self.set_full_rlat({'data': full_rlat["data"][idx["idx_y_min"]:idx["idx_y_max"]]}) - self.set_full_rlon({'data': full_rlon["data"][idx["idx_x_min"]:idx["idx_x_max"]]}) - - super(RotatedNes, self)._filter_coordinates_selection() - - return None - - def _get_pyproj_projection(self): - """ - Get projection data as in Pyproj library. - - Returns - ---------- - projection : pyproj.Proj - Grid projection. - """ - - projection = Proj(proj="ob_tran", - o_proj="longlat", - ellps="WGS84", - R=self.earth_radius[0], - o_lat_p=float64(self.projection_data["grid_north_pole_latitude"]), - o_lon_p=float64(self.projection_data["grid_north_pole_longitude"]), - ) - - return projection - - # noinspection DuplicatedCode - def _get_projection_data(self, create_nes, **kwargs): - """ - Retrieves projection data based on grid details. - - Parameters - ---------- - create_nes : bool - Flag indicating whether to create new object (True) or use existing (False). - **kwargs : dict - Additional keyword arguments for specifying projection details. - """ - if create_nes: - projection_data = {"grid_mapping_name": "rotated_latitude_longitude", - "grid_north_pole_latitude": 90 - kwargs["centre_lat"], - "grid_north_pole_longitude": -180 + kwargs["centre_lon"], - "inc_rlat": kwargs["inc_rlat"], - "inc_rlon": kwargs["inc_rlon"], - "south_boundary": kwargs["south_boundary"], - "west_boundary": kwargs["west_boundary"], - } - else: - if "rotated_pole" in self.variables.keys(): - projection_data = self.variables["rotated_pole"] - self.free_vars("rotated_pole") - else: - msg = "There is no variable called rotated_pole, projection has not been defined." - raise RuntimeError(msg) - - if "dtype" in projection_data.keys(): - del projection_data["dtype"] - - if "data" in projection_data.keys(): - del projection_data["data"] - - if "dimensions" in projection_data.keys(): - del projection_data["dimensions"] - - return projection_data - - def _create_dimensions(self, netcdf): - """ - Create "rlat", "rlon" and "spatial_nv" dimensions and the dimensions "lev", "time", "time_nv", "lon" and "lat". - - Parameters - ---------- - netcdf : Dataset - NetCDF object. - """ - - super(RotatedNes, self)._create_dimensions(netcdf) - - shape = self.get_full_shape() - # Create rlat and rlon dimensions - netcdf.createDimension("rlon", shape[1]) - netcdf.createDimension("rlat", shape[0]) - - # Create spatial_nv (number of vertices) dimension - if (self.lat_bnds is not None) and (self.lon_bnds is not None): - netcdf.createDimension("spatial_nv", 4) - pass - - return None - - def _create_dimension_variables(self, netcdf): - """ - Create the "rlat" and "rlon" variables. - - Parameters - ---------- - netcdf : Dataset - NetCDF object. - """ - super(RotatedNes, self)._create_dimension_variables(netcdf) - - # ROTATED LATITUDES - full_rlat = self.get_full_rlat() - rlat = netcdf.createVariable("rlat", full_rlat["data"].dtype, ("rlat",)) - rlat.long_name = "latitude in rotated pole grid" - if "units" in full_rlat.keys(): - rlat.units = full_rlat["units"] - else: - rlat.units = "degrees" - rlat.standard_name = "grid_latitude" - if self.size > 1: - rlat.set_collective(True) - rlat[:] = full_rlat["data"] - - # ROTATED LONGITUDES - full_rlon = self.get_full_rlon() - rlon = netcdf.createVariable("rlon", full_rlon["data"].dtype, ("rlon",)) - rlon.long_name = "longitude in rotated pole grid" - if "units" in full_rlon.keys(): - rlon.units = full_rlon["units"] - else: - rlon.units = "degrees" - rlon.standard_name = "grid_longitude" - if self.size > 1: - rlon.set_collective(True) - rlon[:] = full_rlon["data"] - - return None - - def _create_rotated_coordinates(self): - """ - Calculate rotated latitudes and longitudes from grid details. - - Returns - ---------- - _rlat : dict - Rotated latitudes dictionary with the "data" key for all the values and the rest of the attributes. - _rlon : dict - Rotated longitudes dictionary with the "data" key for all the values and the rest of the attributes. - """ - # Get grid resolution - inc_rlon = float64(self.projection_data["inc_rlon"]) - inc_rlat = float64(self.projection_data["inc_rlat"]) - - # Get south and west boundaries - south_boundary = float64(self.projection_data["south_boundary"]) - west_boundary = float64(self.projection_data["west_boundary"]) - - # Calculate rotated latitudes - n_lat = int((abs(south_boundary) / inc_rlat) * 2 + 1) - rlat = linspace(south_boundary, south_boundary + (inc_rlat * (n_lat - 1)), n_lat, dtype=float64) - - # Calculate rotated longitudes - n_lon = int((abs(west_boundary) / inc_rlon) * 2 + 1) - rlon = linspace(west_boundary, west_boundary + (inc_rlon * (n_lon - 1)), n_lon, dtype=float64) - - return {"data": rlat}, {"data": rlon} - - def rotated2latlon(self, lon_deg, lat_deg, lon_min=-180): - """ - Calculate the unrotated coordinates using the rotated ones. - - Parameters - ---------- - lon_deg : array - Rotated longitude coordinate. - lat_deg : array - Rotated latitude coordinate. - lon_min : float - Minimum value for the longitudes: -180 (-180 to 180) or 0 (0 to 360). - - Returns - ---------- - almd : array - Unrotated longitudes. - aphd : array - Unrotated latitudes. - """ - - # Get centre coordinates - centre_lat = 90 - float64(self.projection_data["grid_north_pole_latitude"]) - centre_lon = float64(self.projection_data["grid_north_pole_longitude"]) + 180 - - # Convert to radians - degrees_to_radians = pi / 180. - tph0 = centre_lat * degrees_to_radians - tlm = lon_deg * degrees_to_radians - tph = lat_deg * degrees_to_radians - - tlm0d = -180 + centre_lon - ctph0 = cos(tph0) - stph0 = sin(tph0) - stlm = sin(tlm) - ctlm = cos(tlm) - stph = sin(tph) - ctph = cos(tph) - - # Calculate unrotated latitudes - sph = (ctph0 * stph) + (stph0 * ctph * ctlm) - sph[sph > 1.] = 1. - sph[sph < -1.] = -1. - aph = arcsin(sph) - aphd = aph / degrees_to_radians - - # Calculate rotated longitudes - anum = ctph * stlm - denom = (ctlm * ctph - stph0 * sph) / ctph0 - relm = arctan2(anum, denom) - pi - almd = relm / degrees_to_radians + tlm0d - almd[almd > (lon_min + 360)] -= 360 - almd[almd < lon_min] += 360 - - return almd, aphd - - def _create_centre_coordinates(self, **kwargs): - """ - Calculate centre latitudes and longitudes from grid details. - - Returns - ---------- - centre_lat : dict - Dictionary with data of centre coordinates for latitude in 2D (latitude, longitude). - centre_lon : dict - Dictionary with data of centre coordinates for longitude in 2D (latitude, longitude). - """ - if self.master: - # Complete dimensions - self._full_rlat, self._full_rlon = self._create_rotated_coordinates() - - # Calculate centre latitudes and longitudes (1D to 2D) - centre_lon, centre_lat = self.rotated2latlon( - array([self._full_rlon["data"]] * len(self._full_rlat["data"])), - array([self._full_rlat["data"]] * len(self._full_rlon["data"])).T) - - return {"data": centre_lat}, {"data": centre_lon} - else: - return None, None - - def create_providentia_exp_centre_coordinates(self): - """ - Calculate centre latitudes and longitudes from original coordinates and store as 2D arrays. - - Returns - ---------- - model_centre_lat : dict - Dictionary with data of centre coordinates for latitude in 2D (latitude, longitude). - model_centre_lon : dict - Dictionary with data of centre coordinates for longitude in 2D (latitude, longitude). - """ - - # Get centre latitudes - model_centre_lat = self.lat - - # Get centre longitudes - model_centre_lon = self.lon - - return model_centre_lat, model_centre_lon - - # noinspection DuplicatedCode - def create_providentia_exp_grid_edge_coordinates(self): - """ - Calculate grid edge latitudes and longitudes and get model grid outline. - - Returns - ---------- - grid_edge_lat : dict - Dictionary with data of grid edge latitudes. - grid_edge_lon : dict - Dictionary with data of grid edge longitudes. - """ - - # Get grid resolution - inc_rlon = abs(mean(diff(self.rlon["data"]))) - inc_rlat = abs(mean(diff(self.rlat["data"]))) - - # Get bounds for rotated coordinates - rlat_bounds = self._create_single_spatial_bounds(self.rlat["data"], inc_rlat) - rlon_bounds = self._create_single_spatial_bounds(self.rlon["data"], inc_rlon) - - # Get rotated latitudes for grid edge - left_edge_rlat = append(rlat_bounds.flatten()[::2], rlat_bounds.flatten()[-1]) - right_edge_rlat = flip(left_edge_rlat, 0) - top_edge_rlat = repeat(rlat_bounds[-1][-1], len(self.rlon["data"]) - 1) - bottom_edge_rlat = repeat(rlat_bounds[0][0], len(self.rlon["data"])) - rlat_grid_edge = concatenate((left_edge_rlat, top_edge_rlat, right_edge_rlat, bottom_edge_rlat)) - - # Get rotated longitudes for grid edge - left_edge_rlon = repeat(rlon_bounds[0][0], len(self.rlat["data"]) + 1) - top_edge_rlon = rlon_bounds.flatten()[1:-1:2] - right_edge_rlon = repeat(rlon_bounds[-1][-1], len(self.rlat["data"]) + 1) - bottom_edge_rlon = flip(rlon_bounds.flatten()[:-1:2], 0) - rlon_grid_edge = concatenate((left_edge_rlon, top_edge_rlon, right_edge_rlon, bottom_edge_rlon)) - - # Get edges for regular coordinates - grid_edge_lon_data, grid_edge_lat_data = self.rotated2latlon(rlon_grid_edge, rlat_grid_edge) - - # Create grid outline by stacking the edges in both coordinates - model_grid_outline = vstack((grid_edge_lon_data, grid_edge_lat_data)).T - - grid_edge_lat = {"data": model_grid_outline[:, 1]} - grid_edge_lon = {"data": model_grid_outline[:, 0]} - - return grid_edge_lat, grid_edge_lon - - # noinspection DuplicatedCode - def create_spatial_bounds(self): - """ - Calculate longitude and latitude bounds and set them. - """ - - # Calculate rotated coordinates bounds - full_rlat = self.get_full_rlat() - full_rlon = self.get_full_rlon() - inc_rlat = abs(mean(diff(full_rlat["data"]))) - rlat_bnds = self._create_single_spatial_bounds(array([full_rlat["data"]] * len(full_rlon["data"])).T, - inc_rlat, spatial_nv=4, inverse=True) - - inc_rlon = abs(mean(diff(full_rlon["data"]))) - rlon_bnds = self._create_single_spatial_bounds(array([full_rlon["data"]] * len(full_rlat["data"])), - inc_rlon, spatial_nv=4) - - # Transform rotated bounds to regular bounds - lon_bnds, lat_bnds = self.rotated2latlon(rlon_bnds, rlat_bnds) - - # Obtain regular coordinates bounds - self.set_full_latitudes_boundaries({"data": deepcopy(lat_bnds)}) - self.lat_bnds = {"data": lat_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], - :]} - - self.set_full_longitudes_boundaries({"data": deepcopy(lon_bnds)}) - self.lon_bnds = {"data": lon_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], - :]} - - return None - - @staticmethod - def _set_var_crs(var): - """ - Set the grid_mapping to "rotated_pole". - - Parameters - ---------- - var : Variable - netCDF4-python variable object. - """ - - var.grid_mapping = "rotated_pole" - var.coordinates = "lat lon" - - return None - - def _create_metadata(self, netcdf): - """ - Create the "crs" variable for the rotated latitude longitude grid_mapping. - - Parameters - ---------- - netcdf : Dataset - netcdf4-python Dataset. - """ - - if self.projection_data is not None: - mapping = netcdf.createVariable("rotated_pole", "i") - mapping.grid_mapping_name = self.projection_data["grid_mapping_name"] - mapping.grid_north_pole_latitude = self.projection_data["grid_north_pole_latitude"] - mapping.grid_north_pole_longitude = self.projection_data["grid_north_pole_longitude"] - - return None - - def to_grib2(self, path, grib_keys, grib_template_path, lat_flip=False, info=False): - """ - Write output file with grib2 format. - - Parameters - ---------- - lat_flip : bool - Indicates if you want to flip the latitude coordinates. - path : str - Path to the output file. - grib_keys : dict - Dictionary with the grib2 keys. - grib_template_path : str - Path to the grib2 file to use as template. - info : bool - Indicates if you want to print extra information during the process. - """ - - raise NotImplementedError("Grib2 format cannot be written in a Rotated pole projection.") - - # noinspection DuplicatedCode - def create_shapefile(self): - """ - Create spatial geodataframe (shapefile). - - Returns - ------- - shapefile : GeoPandasDataFrame - Shapefile dataframe. - """ - - if self.shapefile is None: - - if self.lat_bnds is None or self.lon_bnds is None: - self.create_spatial_bounds() - - # Reshape arrays to create geometry - aux_b_lats = self.lat_bnds["data"].reshape((self.lat_bnds["data"].shape[0] * self.lat_bnds["data"].shape[1], - self.lat_bnds["data"].shape[2])) - aux_b_lons = self.lon_bnds["data"].reshape((self.lon_bnds["data"].shape[0] * self.lon_bnds["data"].shape[1], - self.lon_bnds["data"].shape[2])) - - # Get polygons from bounds - geometry = [] - for i in range(aux_b_lons.shape[0]): - geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), - (aux_b_lons[i, 1], aux_b_lats[i, 1]), - (aux_b_lons[i, 2], aux_b_lats[i, 2]), - (aux_b_lons[i, 3], aux_b_lats[i, 3]), - (aux_b_lons[i, 0], aux_b_lats[i, 0])])) - - # Create dataframe cointaining all polygons - fids = self.get_fids() - gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=geometry, crs="EPSG:4326") - self.shapefile = gdf - - else: - gdf = self.shapefile - - return gdf - - # noinspection DuplicatedCode - def get_centroids_from_coordinates(self): - """ - Get centroids from geographical coordinates. - - Returns - ------- - centroids_gdf: GeoPandasDataFrame - Centroids dataframe. - """ - - # Get centroids from coordinates - centroids = [] - for lat_ind in range(0, self.lon["data"].shape[0]): - for lon_ind in range(0, self.lon["data"].shape[1]): - centroids.append(Point(self.lon["data"][lat_ind, lon_ind], - self.lat["data"][lat_ind, lon_ind])) - - # Create dataframe cointaining all points - fids = self.get_fids() - centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=centroids, crs="EPSG:4326") - - return centroids_gdf diff --git a/build/lib/nes/nc_projections/rotated_nested_nes.py b/build/lib/nes/nc_projections/rotated_nested_nes.py deleted file mode 100644 index 4517701..0000000 --- a/build/lib/nes/nc_projections/rotated_nested_nes.py +++ /dev/null @@ -1,147 +0,0 @@ -#!/usr/bin/env python - -from numpy import linspace, float64 -from netCDF4 import Dataset -from .rotated_nes import RotatedNes - - -class RotatedNestedNes(RotatedNes): - - def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="Y", - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, - balanced=False, times=None, **kwargs): - """ - Initialize the RotatedNestedNes class. - - Parameters - ---------- - comm: MPI.COMM - MPI Communicator. - path: str - Path to the NetCDF to initialize the object. - info: bool - Indicates if you want to get reading/writing info. - dataset: Dataset or None - NetCDF4-python Dataset to initialize the class. - parallel_method : str - Indicates the parallelization method that you want. Default: "Y". - Accepted values: ["X", "Y", "T"]. - avoid_first_hours : int - Number of hours to remove from first time steps. - avoid_last_hours : int - Number of hours to remove from last time steps. - first_level : int - Index of the first level to use. - last_level : int, None - Index of the last level to use. None if it is the last. - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - balanced : bool - Indicates if you want a balanced parallelization or not. - Balanced dataset cannot be written in chunking mode. - times : list, None - List of times to substitute the current ones while creation. - """ - - super(RotatedNestedNes, self).__init__(comm=comm, path=path, - info=info, dataset=dataset, balanced=balanced, - parallel_method=parallel_method, - avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=create_nes, - times=times, **kwargs) - - @staticmethod - def _get_parent_attributes(projection_data): - """ - Get projection attributes from parent grid. - - Parameters - ---------- - projection_data : dict - Dictionary with the projection information. - - Returns - ------- - projection_data : dict - Dictionary with the projection information, including parameters from the parent grid. - """ - - # Read variables from parent grid - netcdf = Dataset(projection_data["parent_grid_path"], mode="r") - rlat = netcdf.variables["rlat"][:] - rlon = netcdf.variables["rlon"][:] - rotated_pole = netcdf.variables["rotated_pole"] - - # j_parent_start starts at index 1, so we must subtract 1 - projection_data["inc_rlat"] = (rlat[1] - rlat[0]) / projection_data["parent_ratio"] - projection_data["1st_rlat"] = rlat[int(projection_data["j_parent_start"]) - 1] - - # i_parent_start starts at index 1, so we must subtract 1 - projection_data["inc_rlon"] = (rlon[1] - rlon[0]) / projection_data["parent_ratio"] - projection_data["1st_rlon"] = rlon[int(projection_data["i_parent_start"]) - 1] - - projection_data["grid_north_pole_longitude"] = rotated_pole.grid_north_pole_longitude - projection_data["grid_north_pole_latitude"] = rotated_pole.grid_north_pole_latitude - - netcdf.close() - - return projection_data - - def _get_projection_data(self, create_nes, **kwargs): - """ - Retrieves projection data based on grid details. - - Parameters - ---------- - create_nes : bool - Flag indicating whether to create new object (True) or use existing (False). - **kwargs : dict - Additional keyword arguments for specifying projection details. - """ - if create_nes: - projection_data = {"grid_mapping_name": "rotated_latitude_longitude", - "parent_grid_path": kwargs["parent_grid_path"], - "parent_ratio": kwargs["parent_ratio"], - "i_parent_start": kwargs["i_parent_start"], - "j_parent_start": kwargs["j_parent_start"], - "n_rlat": kwargs["n_rlat"], - "n_rlon": kwargs["n_rlon"] - } - projection_data = self._get_parent_attributes(projection_data) - else: - projection_data = super()._get_projection_data(create_nes, **kwargs) - - return projection_data - - def _create_rotated_coordinates(self): - """ - Calculate rotated latitudes and longitudes from grid details. - - Returns - ---------- - _rlat : dict - Rotated latitudes dictionary with the "data" key for all the values and the rest of the attributes. - _rlon : dict - Rotated longitudes dictionary with the "data" key for all the values and the rest of the attributes. - """ - - # Get grid resolution - inc_rlon = self.projection_data["inc_rlon"] - inc_rlat = self.projection_data["inc_rlat"] - - # Get number of rotated coordinates - n_rlat = self.projection_data["n_rlat"] - n_rlon = self.projection_data["n_rlon"] - - # Get first coordinates - first_rlat = self.projection_data["1st_rlat"] - first_rlon = self.projection_data["1st_rlon"] - - # Calculate rotated latitudes - rlat = linspace(first_rlat, first_rlat + (inc_rlat * (n_rlat - 1)), n_rlat, dtype=float64) - - # Calculate rotated longitudes - rlon = linspace(first_rlon, first_rlon + (inc_rlon * (n_rlon - 1)), n_rlon, dtype=float64) - - return {"data": rlat}, {"data": rlon} - \ No newline at end of file diff --git a/build/lib/nes/nes_formats/__init__.py b/build/lib/nes/nes_formats/__init__.py deleted file mode 100644 index 39aaf30..0000000 --- a/build/lib/nes/nes_formats/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from .cams_ra_format import to_netcdf_cams_ra -from .monarch_format import to_netcdf_monarch, to_monarch_units -from .cmaq_format import to_netcdf_cmaq, to_cmaq_units -from .wrf_chem_format import to_netcdf_wrf_chem, to_wrf_chem_units - -__all__ = [ - 'to_netcdf_cams_ra', 'to_netcdf_monarch', 'to_monarch_units', 'to_netcdf_cmaq', 'to_cmaq_units', - 'to_netcdf_wrf_chem', 'to_wrf_chem_units' -] diff --git a/build/lib/nes/nes_formats/cams_ra_format.py b/build/lib/nes/nes_formats/cams_ra_format.py deleted file mode 100644 index 480becc..0000000 --- a/build/lib/nes/nes_formats/cams_ra_format.py +++ /dev/null @@ -1,219 +0,0 @@ -#!/usr/bin/env python - -import sys -import nes -from numpy import float64, float32, int32, array -from warnings import warn -from netCDF4 import Dataset -from mpi4py import MPI -from copy import copy - - -# noinspection DuplicatedCode -def to_netcdf_cams_ra(self, path): - """ - Horizontal methods from one grid to another one. - - Parameters - ---------- - self : nes.Nes - Source projection Nes Object. - path : str - Path to the output netCDF file. - """ - - if not isinstance(self, nes.LatLonNes): - raise TypeError("CAMS Re-Analysis format must have Regular Lat-Lon projection") - if "" not in path: - raise ValueError(f"AMS Re-Analysis path must contain '' as pattern; current: '{path}'") - - orig_path = copy(path) - - for i_lev, level in enumerate(self.lev["data"]): - path = orig_path.replace("", "l{0}".format(i_lev)) - # Open NetCDF - if self.info: - print("Rank {0:03d}: Creating {1}".format(self.rank, path)) - if self.size > 1: - netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=True, comm=self.comm, info=MPI.Info()) - else: - netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=False) - if self.info: - print("Rank {0:03d}: NetCDF ready to write".format(self.rank)) - self.to_dtype(data_type=float32) - - # Create dimensions - create_dimensions(self, netcdf) - - # Create variables - create_variables(self, netcdf, i_lev) - - # Create dimension variables - create_dimension_variables(self, netcdf) - if self.info: - print("Rank {0:03d}: Dimensions done".format(self.rank)) - - # Close NetCDF - if self.global_attrs is not None: - for att_name, att_value in self.global_attrs.items(): - netcdf.setncattr(att_name, att_value) - - netcdf.close() - - return None - - -def create_dimensions(self, netcdf): - """ - Create "time", "time_bnds", "lev", "lon" and "lat" dimensions. - - Parameters - ---------- - self : nes.Nes - Source projection Nes Object. - netcdf : Dataset - netcdf4-python open dataset. - """ - - # Create time dimension - netcdf.createDimension("time", None) - - # Create lev, lon and lat dimensions - netcdf.createDimension("lat", len(self.get_full_latitudes()["data"])) - netcdf.createDimension("lon", len(self.get_full_longitudes()["data"])) - - return None - - -def create_dimension_variables(self, netcdf): - """ - Create the "time", "time_bnds", "lev", "lat", "lat_bnds", "lon" and "lon_bnds" variables. - - Parameters - ---------- - self : nes.Nes - Source projection Nes Object. - netcdf : Dataset - netcdf4-python open dataset. - """ - - # LATITUDES - lat = netcdf.createVariable("lat", float64, ("lat",)) - lat.standard_name = "latitude" - lat.long_name = "latitude" - lat.units = "degrees_north" - lat.axis = "Y" - - if self.size > 1: - lat.set_collective(True) - lat[:] = self.get_full_latitudes()["data"] - - # LONGITUDES - lon = netcdf.createVariable("lon", float64, ("lon",)) - lon.long_name = "longitude" - lon.standard_name = "longitude" - lon.units = "degrees_east" - lon.axis = "X" - if self.size > 1: - lon.set_collective(True) - lon[:] = self.get_full_longitudes()["data"] - - # TIMES - time_var = netcdf.createVariable("time", float64, ("time",)) - time_var.standard_name = "time" - time_var.units = "day as %Y%m%d.%f" - time_var.calendar = "proleptic_gregorian" - time_var.axis = "T" - if self.size > 1: - time_var.set_collective(True) - time_var[:] = __date2num(self.get_full_times()[self._get_time_id(self.hours_start, first=True): - self._get_time_id(self.hours_end, first=False)]) - - return None - - -# noinspection DuplicatedCode -def create_variables(self, netcdf, i_lev): - """ - Create and write variables to a netCDF file. - - Parameters - ---------- - self : nes.Nes - Source projection Nes Object. - netcdf : Dataset - netcdf4-python open dataset. - i_lev : int - The specific level index to write data for. - """ - - for i, (var_name, var_dict) in enumerate(self.variables.items()): - if var_dict["data"] is not None: - if self.info: - print("Rank {0:03d}: Writing {1} var ({2}/{3})".format(self.rank, var_name, i + 1, len(self.variables))) - try: - var = netcdf.createVariable(var_name, float32, ("time", "lat", "lon",), - zlib=True, complevel=7, least_significant_digit=3) - - if self.info: - print("Rank {0:03d}: Var {1} created ({2}/{3})".format( - self.rank, var_name, i + 1, len(self.variables))) - if self.size > 1: - var.set_collective(True) - if self.info: - print("Rank {0:03d}: Var {1} collective ({2}/{3})".format( - self.rank, var_name, i + 1, len(self.variables))) - - if self.info: - print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) - var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], - self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = var_dict["data"][:, i_lev, :, :] - - if self.info: - print("Rank {0:03d}: Var {1} data ({2}/{3})".format( - self.rank, var_name, i + 1, len(self.variables))) - var.long_name = var_dict["long_name"] - var.units = var_dict["units"] - var.number_of_significant_digits = int32(3) - - if self.info: - print("Rank {0:03d}: Var {1} completed ({2}/{3})".format(self.rank, var_name, i + 1, - len(self.variables))) - except Exception as e: - print(f"**ERROR** an error has occurred while writing the '{var_name}' variable") - raise e - else: - msg = "WARNING!!! " - msg += "Variable {0} was not loaded. It will not be written.".format(var_name) - warn(msg) - sys.stderr.flush() - - return None - - -def __date2num(time_array): - """ - Convert an array of datetime objects to numerical values. - - Parameters - ---------- - time_array : List[datetime.datetime] - List of datetime objects to be converted. - - Returns - ------- - numpy.ndarray - Array of numerical time values, with each date represented as a float. - - Notes - ----- - The conversion represents each datetime as a float in the format YYYYMMDD.HH/24. - """ - - time_res = [] - for aux_time in time_array: - time_res.append(float(aux_time.strftime("%Y%m%d")) + (float(aux_time.strftime("%H")) / 24)) - time_res = array(time_res, dtype=float64) - - return time_res diff --git a/build/lib/nes/nes_formats/cmaq_format.py b/build/lib/nes/nes_formats/cmaq_format.py deleted file mode 100644 index 30a5cea..0000000 --- a/build/lib/nes/nes_formats/cmaq_format.py +++ /dev/null @@ -1,355 +0,0 @@ -#!/usr/bin/env python - -import nes -from numpy import float32, array, ndarray, empty, int32, float64 -from netCDF4 import Dataset -from mpi4py import MPI -from copy import deepcopy -from datetime import datetime - -GLOBAL_ATTRIBUTES_ORDER = [ - "IOAPI_VERSION", "EXEC_ID", "FTYPE", "CDATE", "CTIME", "WDATE", "WTIME", "SDATE", "STIME", "TSTEP", "NTHIK", - "NCOLS", "NROWS", "NLAYS", "NVARS", "GDTYP", "P_ALP", "P_BET", "P_GAM", "XCENT", "YCENT", "XORIG", "YORIG", - "XCELL", "YCELL", "VGTYP", "VGTOP", "VGLVLS", "GDNAM", "UPNAM", "FILEDESC", "HISTORY", "VAR-LIST"] - - -# noinspection DuplicatedCode -def to_netcdf_cmaq(self, path, keep_open=False): - """ - Create the NetCDF using netcdf4-python methods. - - Parameters - ---------- - self : nes.Nes - Source projection Nes Object. - path : str - Path to the output netCDF file. - keep_open : bool - Indicates if you want to keep open the NetCDH to fill the data by time-step. - """ - - self.to_dtype(float32) - - set_global_attributes(self) - change_variable_attributes(self) - - # Open NetCDF - if self.info: - print("Rank {0:03d}: Creating {1}".format(self.rank, path)) - if self.size > 1: - netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=True, comm=self.comm, info=MPI.Info()) - else: - netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=False) - if self.info: - print("Rank {0:03d}: NetCDF ready to write".format(self.rank)) - - # Create dimensions - create_dimensions(self, netcdf) - - create_dimension_variables(self, netcdf) - if self.info: - print("Rank {0:03d}: Dimensions done".format(self.rank)) - - # Create variables - create_variables(self, netcdf) - - for att_name in GLOBAL_ATTRIBUTES_ORDER: - netcdf.setncattr(att_name, self.global_attrs[att_name]) - - # Close NetCDF - if keep_open: - self.dataset = netcdf - else: - netcdf.close() - - return None - - -def change_variable_attributes(self): - """ - Modify the emission list to be consistent to use the output as input for CMAQ model. - - Parameters - ---------- - self : nes.Nes - A Nes Object. - """ - - for var_name in self.variables.keys(): - - if self.variables[var_name]["units"] == "mol.s-1": - self.variables[var_name]["units"] = "{:<16}".format("mole/s") - self.variables[var_name]["var_desc"] = "{:<80}".format(self.variables[var_name]["long_name"]) - self.variables[var_name]["long_name"] = "{:<16}".format(var_name) - elif self.variables[var_name]["units"] == "g.s-1": - self.variables[var_name]["units"] = "{:<16}".format("g/s") - self.variables[var_name]["var_desc"] = "{:<80}".format(self.variables[var_name]["long_name"]) - self.variables[var_name]["long_name"] = "{:<16}".format(var_name) - - else: - raise TypeError("The unit '{0}' of specie {1} is not defined correctly. ".format( - self.variables[var_name]["units"], var_name) + "Should be 'mol.s-1' or 'g.s-1'") - - return None - - -def to_cmaq_units(self): - """ - Change the data values according to the CMAQ conventions - - Parameters - ---------- - self : nes.Nes - A Nes Object. - - Returns - ------- - dict - Variable in the MONARCH units. - """ - - self.calculate_grid_area(overwrite=False) - for var_name in self.variables.keys(): - if isinstance(self.variables[var_name]["data"], ndarray): - if self.variables[var_name]["units"] == "mol.s-1": - # Kmol.m-2.s-1 to mol.s-1 - self.variables[var_name]["data"] = array( - self.variables[var_name]["data"] * 1000 * self.cell_measures["cell_area"]["data"], dtype=float32) - elif self.variables[var_name]["units"] == "g.s-1": - # Kg.m-2.s-1 to g.s-1 - self.variables[var_name]["data"] = array( - self.variables[var_name]["data"] * 1000 * self.cell_measures["cell_area"]["data"], dtype=float32) - - else: - raise TypeError("The unit '{0}' of specie {1} is not defined correctly. ".format( - self.variables[var_name]["units"], var_name) + "Should be 'mol.s-1' or 'g.s-1'") - self.variables[var_name]["dtype"] = float32 - - return self.variables - - -def create_tflag(self): - """ - Create the content of the CMAQ variable TFLAG. - - Parameters - ---------- - self : nes.Nes - A Nes Object. - - Returns - ------- - numpy.ndarray - Array with the content of TFLAG. - """ - - t_flag = empty((len(self.time), len(self.variables), 2)) - - for i_d, aux_date in enumerate(self.time): - y_d = int(aux_date.strftime("%Y%j")) - hms = int(aux_date.strftime("%H%M%S")) - for i_p in range(len(self.variables)): - t_flag[i_d, i_p, 0] = y_d - t_flag[i_d, i_p, 1] = hms - - return t_flag - - -def str_var_list(self): - """ - Transform the list of variable names to a string with the elements with 16 white spaces. - - Parameters - ---------- - self : nes.Nes - A Nes Object. - - Returns - ------- - str - List of variable names transformed on string. - """ - - str_var_list_aux = "" - for var in self.variables.keys(): - str_var_list_aux += "{:<16}".format(var) - - return str_var_list_aux - - -# noinspection DuplicatedCode -def set_global_attributes(self): - """ - Set the NetCDF global attributes. - - Parameters - ---------- - self : nes.Nes - A Nes Object. - """ - - now = datetime.now() - if len(self.time) > 1: - tstep = ((self.time[1] - self.time[0]).seconds // 3600) * 10000 - else: - tstep = 1 * 10000 - - current_attributes = deepcopy(self.global_attrs) - del self.global_attrs - - self.global_attrs = {"IOAPI_VERSION": "None: made only with NetCDF libraries", - "EXEC_ID": "{:<80}".format("0.1alpha"), # Editable - "FTYPE": int32(1), # Editable - "CDATE": int32(now.strftime("%Y%j")), - "CTIME": int32(now.strftime("%H%M%S")), - "WDATE": int32(now.strftime("%Y%j")), - "WTIME": int32(now.strftime("%H%M%S")), - "SDATE": int32(self.time[0].strftime("%Y%j")), - "STIME": int32(self.time[0].strftime("%H%M%S")), - "TSTEP": int32(tstep), - "NTHIK": int32(1), # Editable - "NCOLS": None, # Projection dependent - "NROWS": None, # Projection dependent - "NLAYS": int32(len(self.lev["data"])), - "NVARS": None, # Projection dependent - "GDTYP": None, # Projection dependent - "P_ALP": None, # Projection dependent - "P_BET": None, # Projection dependent - "P_GAM": None, # Projection dependent - "XCENT": None, # Projection dependent - "YCENT": None, # Projection dependent - "XORIG": None, # Projection dependent - "YORIG": None, # Projection dependent - "XCELL": None, # Projection dependent - "YCELL": None, # Projection dependent - "VGTYP": int32(7), # Editable - "VGTOP": float32(5000.), # Editable - "VGLVLS": array([1., 0.], dtype=float32), # Editable - "GDNAM": "{:<16}".format(""), # Editable - "UPNAM": "{:<16}".format("HERMESv3"), - "FILEDESC": "", # Editable - "HISTORY": "", # Editable - "VAR-LIST": str_var_list(self)} - - # Editable attributes - for att_name, att_value in current_attributes.items(): - if att_name == "EXEC_ID": - self.global_attrs[att_name] = "{:<80}".format(att_value) # Editable - elif att_name == "FTYPE": - self.global_attrs[att_name] = int32(att_value) # Editable - elif att_name == "NTHIK": - self.global_attrs[att_name] = int32(att_value) # Editable - elif att_name == "VGTYP": - self.global_attrs[att_name] = int32(att_value) # Editable - elif att_name == "VGTOP": - self.global_attrs[att_name] = float32(att_value) # Editable - elif att_name == "VGLVLS": - self.global_attrs[att_name] = array(att_value.split(), dtype=float32) # Editable - elif att_name == "GDNAM": - self.global_attrs[att_name] = "{:<16}".format(att_value) # Editable - elif att_name == "FILEDESC": - self.global_attrs[att_name] = att_value # Editable - elif att_name == "HISTORY": - self.global_attrs[att_name] = att_value # Editable - - # Projection dependent attributes - if isinstance(self, nes.LCCNes): - self.global_attrs["NCOLS"] = int32(len(self._full_x["data"])) - self.global_attrs["NROWS"] = int32(len(self._full_y["data"])) - self.global_attrs["NVARS"] = int32(len(self.variables)) - self.global_attrs["GDTYP"] = int32(2) - - self.global_attrs["P_ALP"] = float64(self.projection_data["standard_parallel"][0]) - self.global_attrs["P_BET"] = float64(self.projection_data["standard_parallel"][1]) - self.global_attrs["P_GAM"] = float64(self.projection_data["longitude_of_central_meridian"]) - self.global_attrs["XCENT"] = float64(self.projection_data["longitude_of_central_meridian"]) - self.global_attrs["YCENT"] = float64(self.projection_data["latitude_of_projection_origin"]) - self.global_attrs["XORIG"] = float64( - self._full_x["data"][0]) - (float64(self._full_x["data"][1] - self._full_x["data"][0]) / 2) - self.global_attrs["YORIG"] = float64( - self._full_y["data"][0]) - (float64(self._full_y["data"][1] - self._full_y["data"][0]) / 2) - self.global_attrs["XCELL"] = float64(self._full_x["data"][1] - self._full_x["data"][0]) - self.global_attrs["YCELL"] = float64(self._full_y["data"][1] - self._full_y["data"][0]) - - return None - - -def create_dimensions(self, netcdf): - """ - Create "time", "time_bnds", "lev", "lon" and "lat" dimensions. - - Parameters - ---------- - self : nes.Nes - Nes Object. - netcdf : Dataset - netcdf4-python open dataset. - """ - - netcdf.createDimension("TSTEP", len(self.get_full_times())) - netcdf.createDimension("DATE-TIME", 2) - netcdf.createDimension("LAY", len(self.get_full_levels()["data"])) - netcdf.createDimension("VAR", len(self.variables)) - if isinstance(self, nes.LCCNes): - netcdf.createDimension("COL", len(self._full_x["data"])) - netcdf.createDimension("ROW", len(self._full_y["data"])) - - return None - - -def create_dimension_variables(self, netcdf): - """ - Create the "y" and "x" variables. - - Parameters - ---------- - self : nes.Nes - A Nes Object. - netcdf : Dataset - NetCDF object. - """ - - tflag = netcdf.createVariable("TFLAG", "i", ("TSTEP", "VAR", "DATE-TIME",)) - tflag.setncatts({"units": "{:<16}".format(""), "long_name": "{:<16}".format("TFLAG"), - "var_desc": "{:<80}".format("Timestep-valid flags: (1) YYYYDDD or (2) HHMMSS")}) - tflag[:] = create_tflag(self) - - return None - - -# noinspection DuplicatedCode -def create_variables(self, netcdf): - """ - Create the netCDF file variables. - - Parameters - ---------- - self : nes.Nes - Nes Object. - netcdf : Dataset - netcdf4-python open dataset. - """ - - for var_name, var_info in self.variables.items(): - var = netcdf.createVariable(var_name, "f", ("TSTEP", "LAY", "ROW", "COL",), - zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - var.units = var_info["units"] - var.long_name = str(var_info["long_name"]) - var.var_desc = str(var_info["var_desc"]) - if var_info["data"] is not None: - if self.info: - print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) - - if isinstance(var_info["data"], int) and var_info["data"] == 0: - var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], - self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], - self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = 0 - - elif len(var_info["data"].shape) == 4: - var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], - self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], - self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = var_info["data"] - - return None diff --git a/build/lib/nes/nes_formats/monarch_format.py b/build/lib/nes/nes_formats/monarch_format.py deleted file mode 100644 index 0a50e75..0000000 --- a/build/lib/nes/nes_formats/monarch_format.py +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env python - -import nes -from numpy import float32, array, ndarray -from netCDF4 import Dataset -from mpi4py import MPI - - -# noinspection DuplicatedCode -def to_netcdf_monarch(self, path, chunking=False, keep_open=False): - """ - Create the NetCDF using netcdf4-python methods. - - Parameters - ---------- - self : nes.Nes - Source projection Nes Object. - path : str - Path to the output netCDF file. - chunking: bool - Indicates if you want to chunk the output netCDF. - keep_open : bool - Indicates if you want to keep open the NetCDH to fill the data by time-step. - """ - - self.to_dtype(float32) - - # Open NetCDF - if self.info: - print("Rank {0:03d}: Creating {1}".format(self.rank, path)) - if self.size > 1: - netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=True, comm=self.comm, info=MPI.Info()) - else: - netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=False) - if self.info: - print("Rank {0:03d}: NetCDF ready to write".format(self.rank)) - - # Create dimensions - self._create_dimensions(netcdf) - - # Create dimension variables - if self.master: - self._full_lev["data"] = array(self._full_lev["data"], dtype=float32) - self._full_lat["data"] = array(self._full_lat["data"], dtype=float32) - self._full_lat_bnds["data"] = array(self._full_lat_bnds["data"], dtype=float32) - self._full_lon["data"] = array(self._full_lon["data"], dtype=float32) - self._full_lon_bnds["data"] = array(self._full_lon_bnds["data"], dtype=float32) - - if isinstance(self, nes.RotatedNes): - self._full_rlat["data"] = array(self._full_rlat["data"], dtype=float32) - self._full_rlon["data"] = array(self._full_rlon["data"], dtype=float32) - if isinstance(self, nes.LCCNes) or isinstance(self, nes.MercatorNes): - self._full_y["data"] = array(self._full_y["data"], dtype=float32) - self._full_x["data"] = array(self._full_x["data"], dtype=float32) - - self._create_dimension_variables(netcdf) - if self.info: - print("Rank {0:03d}: Dimensions done".format(self.rank)) - - # Create cell measures - if "cell_area" in self.cell_measures.keys(): - self.cell_measures["cell_area"]["data"] = array(self.cell_measures["cell_area"]["data"], dtype=float32) - self._create_cell_measures(netcdf) - - # Create variables - self._create_variables(netcdf, chunking=chunking) - - # Create metadata - self._create_metadata(netcdf) - - # Close NetCDF - if self.global_attrs is not None: - for att_name, att_value in self.global_attrs.items(): - netcdf.setncattr(att_name, att_value) - netcdf.setncattr("Conventions", "CF-1.7") - - if keep_open: - self.dataset = netcdf - else: - netcdf.close() - - return None - - -def to_monarch_units(self): - """ - Change the data values according to the MONARCH conventions. - - Parameters - ---------- - self : nes.Nes - A Nes Object. - - Returns - ------- - dict - Variable in the MONARCH units. - """ - - for var_name in self.variables.keys(): - if isinstance(self.variables[var_name]["data"], ndarray): - if self.variables[var_name]["units"] == "mol.s-1.m-2": - # Kmol to mol - self.variables[var_name]["data"] = array(self.variables[var_name]["data"] * 1000, dtype=float32) - elif self.variables[var_name]["units"] == "kg.s-1.m-2": - # No unit change needed - self.variables[var_name]["data"] = array(self.variables[var_name]["data"], dtype=float32) - - else: - raise TypeError("The unit '{0}' of specie {1} is not defined correctly. ".format( - self.variables[var_name]["units"], var_name) + - "Should be 'mol.s-1.m-2' or 'kg.s-1.m-2'") - self.variables[var_name]["dtype"] = float32 - return self.variables diff --git a/build/lib/nes/nes_formats/wrf_chem_format.py b/build/lib/nes/nes_formats/wrf_chem_format.py deleted file mode 100644 index 6a06af4..0000000 --- a/build/lib/nes/nes_formats/wrf_chem_format.py +++ /dev/null @@ -1,398 +0,0 @@ -#!/usr/bin/env python - -import nes -from numpy import float32, int32, ndarray, array, chararray -from netCDF4 import Dataset -from mpi4py import MPI -from copy import deepcopy - -GLOBAL_ATTRIBUTES_ORDER = [ - "TITLE", "START_DATE", "WEST-EAST_GRID_DIMENSION", "SOUTH-NORTH_GRID_DIMENSION", "BOTTOM-TOP_GRID_DIMENSION", "DX", - "DY", "GRIDTYPE", "DIFF_OPT", "KM_OPT", "DAMP_OPT", "DAMPCOEF", "KHDIF", "KVDIF", "MP_PHYSICS", "RA_LW_PHYSICS", - "RA_SW_PHYSICS", "SF_SFCLAY_PHYSICS", "SF_SURFACE_PHYSICS", "BL_PBL_PHYSICS", "CU_PHYSICS", "SF_LAKE_PHYSICS", - "SURFACE_INPUT_SOURCE", "SST_UPDATE", "GRID_FDDA", "GFDDA_INTERVAL_M", "GFDDA_END_H", "GRID_SFDDA", - "SGFDDA_INTERVAL_M", "SGFDDA_END_H", "WEST-EAST_PATCH_START_UNSTAG", "WEST-EAST_PATCH_END_UNSTAG", - "WEST-EAST_PATCH_START_STAG", "WEST-EAST_PATCH_END_STAG", "SOUTH-NORTH_PATCH_START_UNSTAG", - "SOUTH-NORTH_PATCH_END_UNSTAG", "SOUTH-NORTH_PATCH_START_STAG", "SOUTH-NORTH_PATCH_END_STAG", - "BOTTOM-TOP_PATCH_START_UNSTAG", "BOTTOM-TOP_PATCH_END_UNSTAG", "BOTTOM-TOP_PATCH_START_STAG", - "BOTTOM-TOP_PATCH_END_STAG", "GRID_ID", "PARENT_ID", "I_PARENT_START", "J_PARENT_START", "PARENT_GRID_RATIO", "DT", - "CEN_LAT", "CEN_LON", "TRUELAT1", "TRUELAT2", "MOAD_CEN_LAT", "STAND_LON", "POLE_LAT", "POLE_LON", "GMT", "JULYR", - "JULDAY", "MAP_PROJ", "MMINLU", "NUM_LAND_CAT", "ISWATER", "ISLAKE", "ISICE", "ISURBAN", "ISOILWATER"] - - -# noinspection DuplicatedCode -def to_netcdf_wrf_chem(self, path, keep_open=False): - """ - Create the NetCDF using netcdf4-python methods. - - Parameters - ---------- - self : nes.Nes - Source projection Nes Object. - path : str - Path to the output netCDF file. - keep_open : bool - Indicates if you want to keep open the NetCDH to fill the data by time-step. - """ - - self.to_dtype(float32) - - set_global_attributes(self) - change_variable_attributes(self) - - # Open NetCDF - if self.info: - print("Rank {0:03d}: Creating {1}".format(self.rank, path)) - if self.size > 1: - netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=True, comm=self.comm, info=MPI.Info()) - else: - netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=False) - if self.info: - print("Rank {0:03d}: NetCDF ready to write".format(self.rank)) - - # Create dimensions - create_dimensions(self, netcdf) - - create_dimension_variables(self, netcdf) - if self.info: - print("Rank {0:03d}: Dimensions done".format(self.rank)) - - # Create variables - create_variables(self, netcdf) - - for att_name in GLOBAL_ATTRIBUTES_ORDER: - netcdf.setncattr(att_name, self.global_attrs[att_name]) - - # Close NetCDF - if keep_open: - self.dataset = netcdf - else: - netcdf.close() - - return None - - -def change_variable_attributes(self): - """ - Modify the emission list to be consistent to use the output as input for WRF-CHEM model. - - Parameters - ---------- - self : nes.Nes - A Nes Object. - """ - - for var_name in self.variables.keys(): - if self.variables[var_name]["units"] == "mol.h-1.km-2": - self.variables[var_name]["FieldType"] = int32(104) - self.variables[var_name]["MemoryOrder"] = "XYZ" - self.variables[var_name]["description"] = "EMISSIONS" - self.variables[var_name]["units"] = "mol km^-2 hr^-1" - self.variables[var_name]["stagger"] = "" - self.variables[var_name]["coordinates"] = "XLONG XLAT" - - elif self.variables[var_name]["units"] == "ug.s-1.m-2": - self.variables[var_name]["FieldType"] = int32(104) - self.variables[var_name]["MemoryOrder"] = "XYZ" - self.variables[var_name]["description"] = "EMISSIONS" - self.variables[var_name]["units"] = "ug/m3 m/s" - self.variables[var_name]["stagger"] = "" - self.variables[var_name]["coordinates"] = "XLONG XLAT" - - else: - raise TypeError("The unit '{0}' of specie {1} is not defined correctly. ".format( - self.variables[var_name]["units"], var_name) + "Should be 'mol.h-1.km-2' or 'ug.s-1.m-2'") - - if "long_name" in self.variables[var_name].keys(): - del self.variables[var_name]["long_name"] - - return None - - -def to_wrf_chem_units(self): - """ - Change the data values according to the WRF-CHEM conventions. - - Parameters - ---------- - self : nes.Nes - A Nes Object. - - Returns - ------- - dict - Variable in the MONARCH units. - """ - - self.calculate_grid_area(overwrite=False) - for var_name in self.variables.keys(): - if isinstance(self.variables[var_name]["data"], ndarray): - if self.variables[var_name]["units"] == "mol.h-1.km-2": - # 10**6 -> from m2 to km2 - # 10**3 -> from kmol to mol - # 3600 -> from s to h - self.variables[var_name]["data"] = array( - self.variables[var_name]["data"] * 10 ** 6 * 10 ** 3 * 3600, dtype=float32) - elif self.variables[var_name]["units"] == "ug.s-1.m-2": - # 10**9 -> from kg to ug - self.variables[var_name]["data"] = array( - self.variables[var_name]["data"] * 10 ** 9, dtype=float32) - - else: - raise TypeError("The unit '{0}' of specie {1} is not defined correctly. ".format( - self.variables[var_name]["units"], var_name) + "Should be 'mol.h-1.km-2' or 'ug.s-1.m-2'") - self.variables[var_name]["dtype"] = float32 - - return self.variables - - -def create_times_var(self): - """ - Create the content of the WRF-CHEM variable times. - - Parameters - ---------- - self : nes.Nes - A Nes Object. - - Returns - ------- - numpy.ndarray - Array with the content of TFLAG. - """ - - aux_times = chararray((len(self.time), 19), itemsize=1) - - for i_d, aux_date in enumerate(self.time): - aux_times[i_d] = list(aux_date.strftime("%Y-%m-%d_%H:%M:%S")) - - return aux_times - - -# noinspection DuplicatedCode -def set_global_attributes(self): - """ - Set the NetCDF global attributes - - Parameters - ---------- - self : nes.Nes - A Nes Object. - """ - - # now = datetime.now() - # if len(self.time) > 1: - # tstep = ((self.time[1] - self.time[0]).seconds // 3600) * 10000 - # else: - # tstep = 1 * 10000 - - current_attributes = deepcopy(self.global_attrs) - del self.global_attrs - - self.global_attrs = {"TITLE": None, - "START_DATE": self.time[0].strftime("%Y-%m-%d_%H:%M:%S"), - "WEST-EAST_GRID_DIMENSION": None, # Projection dependent attributes - "SOUTH-NORTH_GRID_DIMENSION": None, # Projection dependent attributes - "BOTTOM-TOP_GRID_DIMENSION": int32(45), - "DX": None, # Projection dependent attributes - "DY": None, # Projection dependent attributes - "GRIDTYPE": "C", - "DIFF_OPT": int32(1), - "KM_OPT": int32(4), - "DAMP_OPT": int32(3), - "DAMPCOEF": float32(0.2), - "KHDIF": float32(0.), - "KVDIF": float32(0.), - "MP_PHYSICS": int32(6), - "RA_LW_PHYSICS": int32(4), - "RA_SW_PHYSICS": int32(4), - "SF_SFCLAY_PHYSICS": int32(2), - "SF_SURFACE_PHYSICS": int32(2), - "BL_PBL_PHYSICS": int32(8), - "CU_PHYSICS": int32(0), - "SF_LAKE_PHYSICS": int32(0), - "SURFACE_INPUT_SOURCE": None, # Projection dependent attributes - "SST_UPDATE": int32(0), - "GRID_FDDA": int32(0), - "GFDDA_INTERVAL_M": int32(0), - "GFDDA_END_H": int32(0), - "GRID_SFDDA": int32(0), - "SGFDDA_INTERVAL_M": int32(0), - "SGFDDA_END_H": int32(0), - "WEST-EAST_PATCH_START_UNSTAG": None, # Projection dependent attributes - "WEST-EAST_PATCH_END_UNSTAG": None, # Projection dependent attributes - "WEST-EAST_PATCH_START_STAG": None, # Projection dependent attributes - "WEST-EAST_PATCH_END_STAG": None, # Projection dependent attributes - "SOUTH-NORTH_PATCH_START_UNSTAG": None, # Projection dependent attributes - "SOUTH-NORTH_PATCH_END_UNSTAG": None, # Projection dependent attributes - "SOUTH-NORTH_PATCH_START_STAG": None, # Projection dependent attributes - "SOUTH-NORTH_PATCH_END_STAG": None, # Projection dependent attributes - "BOTTOM-TOP_PATCH_START_UNSTAG": None, - "BOTTOM-TOP_PATCH_END_UNSTAG": None, - "BOTTOM-TOP_PATCH_START_STAG": None, - "BOTTOM-TOP_PATCH_END_STAG": None, - "GRID_ID": int32(1), - "PARENT_ID": int32(0), - "I_PARENT_START": int32(1), - "J_PARENT_START": int32(1), - "PARENT_GRID_RATIO": int32(1), - "DT": float32(18.), - "CEN_LAT": None, # Projection dependent attributes - "CEN_LON": None, # Projection dependent attributes - "TRUELAT1": None, # Projection dependent attributes - "TRUELAT2": None, # Projection dependent attributes - "MOAD_CEN_LAT": None, # Projection dependent attributes - "STAND_LON": None, # Projection dependent attributes - "POLE_LAT": None, # Projection dependent attributes - "POLE_LON": None, # Projection dependent attributes - "GMT": float32(self.time[0].hour), - "JULYR": int32(self.time[0].year), - "JULDAY": int32(self.time[0].strftime("%j")), - "MAP_PROJ": None, # Projection dependent attributes - "MMINLU": "MODIFIED_IGBP_MODIS_NOAH", - "NUM_LAND_CAT": int32(41), - "ISWATER": int32(17), - "ISLAKE": int32(-1), - "ISICE": int32(15), - "ISURBAN": int32(13), - "ISOILWATER": int32(14), - "HISTORY": "", # Editable - } - - # Editable attributes - float_atts = ["DAMPCOEF", "KHDIF", "KVDIF", "CEN_LAT", "CEN_LON", "DT"] - int_atts = ["BOTTOM-TOP_GRID_DIMENSION", "DIFF_OPT", "KM_OPT", "DAMP_OPT", - "MP_PHYSICS", "RA_LW_PHYSICS", "RA_SW_PHYSICS", "SF_SFCLAY_PHYSICS", "SF_SURFACE_PHYSICS", - "BL_PBL_PHYSICS", "CU_PHYSICS", "SF_LAKE_PHYSICS", "SURFACE_INPUT_SOURCE", "SST_UPDATE", - "GRID_FDDA", "GFDDA_INTERVAL_M", "GFDDA_END_H", "GRID_SFDDA", "SGFDDA_INTERVAL_M", "SGFDDA_END_H", - "BOTTOM-TOP_PATCH_START_UNSTAG", "BOTTOM-TOP_PATCH_END_UNSTAG", "BOTTOM-TOP_PATCH_START_STAG", - "BOTTOM-TOP_PATCH_END_STAG", "GRID_ID", "PARENT_ID", "I_PARENT_START", "J_PARENT_START", - "PARENT_GRID_RATIO", "NUM_LAND_CAT", "ISWATER", "ISLAKE", "ISICE", "ISURBAN", "ISOILWATER"] - str_atts = ["GRIDTYPE", "MMINLU", "HISTORY"] - for att_name, att_value in current_attributes.items(): - if att_name in int_atts: - self.global_attrs[att_name] = int32(att_value) - elif att_name in float_atts: - self.global_attrs[att_name] = float32(att_value) - elif att_name in str_atts: - self.global_attrs[att_name] = str(att_value) - - # Projection dependent attributes - if isinstance(self, nes.LCCNes) or isinstance(self, nes.MercatorNes): - self.global_attrs["WEST-EAST_GRID_DIMENSION"] = int32(len(self._full_x["data"]) + 1) - self.global_attrs["SOUTH-NORTH_GRID_DIMENSION"] = int32(len(self._full_y["data"]) + 1) - self.global_attrs["DX"] = float32(self._full_x["data"][1] - self._full_x["data"][0]) - self.global_attrs["DY"] = float32(self._full_y["data"][1] - self._full_y["data"][0]) - self.global_attrs["SURFACE_INPUT_SOURCE"] = int32(1) - self.global_attrs["WEST-EAST_PATCH_START_UNSTAG"] = int32(1) - self.global_attrs["WEST-EAST_PATCH_END_UNSTAG"] = int32(len(self._full_x["data"])) - self.global_attrs["WEST-EAST_PATCH_START_STAG"] = int32(1) - self.global_attrs["WEST-EAST_PATCH_END_STAG"] = int32(len(self._full_x["data"]) + 1) - self.global_attrs["SOUTH-NORTH_PATCH_START_UNSTAG"] = int32(1) - self.global_attrs["SOUTH-NORTH_PATCH_END_UNSTAG"] = int32(len(self._full_y["data"])) - self.global_attrs["SOUTH-NORTH_PATCH_START_STAG"] = int32(1) - self.global_attrs["SOUTH-NORTH_PATCH_END_STAG"] = int32(len(self._full_y["data"]) + 1) - - self.global_attrs["POLE_LAT"] = float32(90) - self.global_attrs["POLE_LON"] = float32(0) - - if isinstance(self, nes.LCCNes): - self.global_attrs["MAP_PROJ"] = int32(1) - self.global_attrs["TRUELAT1"] = float32(self.projection_data["standard_parallel"][0]) - self.global_attrs["TRUELAT2"] = float32(self.projection_data["standard_parallel"][1]) - self.global_attrs["MOAD_CEN_LAT"] = float32(self.projection_data["latitude_of_projection_origin"]) - self.global_attrs["STAND_LON"] = float32(self.projection_data["longitude_of_central_meridian"]) - self.global_attrs["CEN_LAT"] = float32(self.projection_data["latitude_of_projection_origin"]) - self.global_attrs["CEN_LON"] = float32(self.projection_data["longitude_of_central_meridian"]) - elif isinstance(self, nes.MercatorNes): - self.global_attrs["MAP_PROJ"] = int32(3) - self.global_attrs["TRUELAT1"] = float32(self.projection_data["standard_parallel"]) - self.global_attrs["TRUELAT2"] = float32(0) - self.global_attrs["MOAD_CEN_LAT"] = float32(self.projection_data["standard_parallel"]) - self.global_attrs["STAND_LON"] = float32(self.projection_data["longitude_of_projection_origin"]) - self.global_attrs["CEN_LAT"] = float32(self.projection_data["standard_parallel"]) - self.global_attrs["CEN_LON"] = float32(self.projection_data["longitude_of_projection_origin"]) - - return None - - -def create_dimensions(self, netcdf): - """ - Create "time", "time_bnds", "lev", "lon" and "lat" dimensions. - - Parameters - ---------- - self : nes.Nes - Nes Object. - netcdf : Dataset - netcdf4-python open dataset. - """ - - netcdf.createDimension("Time", len(self.get_full_times())) - netcdf.createDimension("DateStrLen", 19) - netcdf.createDimension("emissions_zdim", len(self.get_full_levels()["data"])) - if isinstance(self, nes.LCCNes): - netcdf.createDimension("west_east", len(self._full_x["data"])) - netcdf.createDimension("south_north", len(self._full_y["data"])) - - return None - - -def create_dimension_variables(self, netcdf): - """ - Create the "y" and "x" variables. - - Parameters - ---------- - self : nes.Nes - A Nes Object. - netcdf : Dataset - NetCDF object. - """ - - times = netcdf.createVariable("Times", "S1", ("Time", "DateStrLen", )) - times[:] = create_times_var(self) - - return None - - -# noinspection DuplicatedCode -def create_variables(self, netcdf): - """ - Create the netCDF file variables. - - Parameters - ---------- - self : nes.Nes - Nes Object. - netcdf : Dataset - netcdf4-python open dataset. - """ - - for var_name, var_info in self.variables.items(): - var = netcdf.createVariable(var_name, "f", ("Time", "emissions_zdim", "south_north", "west_east",), - zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - var.FieldType = var_info["FieldType"] - var.MemoryOrder = var_info["MemoryOrder"] - var.description = var_info["description"] - var.units = var_info["units"] - var.stagger = var_info["stagger"] - var.coordinates = var_info["coordinates"] - - if var_info["data"] is not None: - if self.info: - print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) - - if isinstance(var_info["data"], int) and var_info["data"] == 0: - var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], - self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], - self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = 0 - - elif len(var_info["data"].shape) == 4: - var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], - self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], - self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], - self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = var_info["data"] - - return None diff --git a/build/lib/tests/1.1-test_read_write_projection.py b/build/lib/tests/1.1-test_read_write_projection.py deleted file mode 100644 index 5788b30..0000000 --- a/build/lib/tests/1.1-test_read_write_projection.py +++ /dev/null @@ -1,221 +0,0 @@ -#!/usr/bin/env python - -import sys -import timeit -import pandas as pd -from mpi4py import MPI -from nes import open_netcdf - -comm = MPI.COMM_WORLD -rank = comm.Get_rank() -size = comm.Get_size() - -parallel_method = 'Y' - -result_path = "Times_test_1.1_read_write_projection_{0}_{1:03d}.csv".format(parallel_method, size) -result = pd.DataFrame(index=['read', 'write'], - columns=['1.1.1.Regular', '1.1.2.Rotated', '1.1.3.Points', '1.1.4.Points_GHOST', - '1.1.5.LCC', '1.1.6.Mercator']) - -# ====================================================================================================================== -# ============================================= REGULAR ======================================================== -# ====================================================================================================================== - -test_name = '1.1.1.Regular' -if rank == 0: - print(test_name) -comm.Barrier() - -# Original path: /gpfs/scratch/bsc32/bsc32538/original_files/franco_interp.nc -# Regular lat-lon grid from MONARCH -path = '/gpfs/projects/bsc32/models/NES_tutorial_data/franco_interp.nc' - -# READ -st_time = timeit.default_timer() -nessy = open_netcdf(path=path, comm=comm, info=True, parallel_method=parallel_method) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# LOAD VARIABLES -variables = ['sconcno2'] -nessy.keep_vars(variables) -nessy.load() - -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ============================================= ROTATED ======================================================== -# ====================================================================================================================== - -test_name = '1.1.2.Rotated' -if rank == 0: - print(test_name) - -# Original path: /gpfs/scratch/bsc32/bsc32538/mr_multiplyby/OUT/stats_bnds/monarch/a45g/regional/daily_max/O3_all/O3_all-000_2021080300.nc -# Rotated grid from MONARCH -path = '/gpfs/projects/bsc32/models/NES_tutorial_data/O3_all-000_2021080300.nc' - -# READ -st_time = timeit.default_timer() -nessy = open_netcdf(path=path, comm=comm, info=True, parallel_method=parallel_method) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# LOAD VARIABLES -variables = ['O3_all'] -nessy.keep_vars(variables) -nessy.load() - -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - - -# ====================================================================================================================== -# ============================================= LCC ============================================================ -# ====================================================================================================================== - -test_name = '1.1.5.LCC' -if rank == 0: - print(test_name) - -# Original path: /esarchive/exp/snes/a5g1/ip/daily_max/sconco3/sconco3_2022111500.nc -# LCC grid with a coverage over the Iberian Peninsula (4x4km) -path = '/gpfs/projects/bsc32/models/NES_tutorial_data/sconco3_2022111500.nc' - -# READ -st_time = timeit.default_timer() -nessy = open_netcdf(path=path, comm=comm, info=True, parallel_method=parallel_method) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# LOAD VARIABLES -nessy.load() - -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ============================================= MERCATOR ======================================================== -# ====================================================================================================================== - -test_name = '1.1.6.Mercator' -if rank == 0: - print(test_name) - -# Original path: None (generated with NES) -# Mercator grid -path = '/gpfs/projects/bsc32/models/NES_tutorial_data/mercator_grid.nc' - -# READ -st_time = timeit.default_timer() -nessy = open_netcdf(path=path, comm=comm, info=True, parallel_method=parallel_method) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# LOAD VARIABLES -nessy.load() - -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -if rank == 0: - result.to_csv(result_path) - -# ====================================================================================================================== -# ============================================= POINTS ========================================================= -# ====================================================================================================================== - -test_name = '1.1.3.Points' -if rank == 0: - print(test_name) - -# Original path: /esarchive/obs/nilu/ebas/daily/pm10/pm10_201507.nc -# Points grid from EBAS network -path = '/gpfs/projects/bsc32/models/NES_tutorial_data/pm10_201507.nc' - -# READ -st_time = timeit.default_timer() -nessy = open_netcdf(path=path, comm=comm, info=True, parallel_method=parallel_method) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# LOAD VARIABLES -nessy.load() - -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ============================================= POINTS GHOST =================================================== -# ====================================================================================================================== - -test_name = '1.1.4.Points_GHOST' -if rank == 0: - print(test_name) - -path = '/gpfs/projects/bsc32/AC_cache/obs/ghost/EBAS/1.4/hourly/sconco3/sconco3_201906.nc' - -# READ -st_time = timeit.default_timer() -nessy = open_netcdf(path=path, comm=comm, info=True, parallel_method=parallel_method) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# LOAD VARIABLES -nessy.load() - -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -if rank == 0: - result.to_csv(result_path) - print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/1.2-test_create_projection.py b/build/lib/tests/1.2-test_create_projection.py deleted file mode 100644 index 60c470a..0000000 --- a/build/lib/tests/1.2-test_create_projection.py +++ /dev/null @@ -1,190 +0,0 @@ -#!/usr/bin/env python - -import sys -from mpi4py import MPI -import pandas as pd -import timeit -from nes import create_nes - -comm = MPI.COMM_WORLD -rank = comm.Get_rank() -size = comm.Get_size() - -parallel_method = 'Y' - -result_path = "Times_test_1.2_create_projection_{0}_{1:03d}.csv".format(parallel_method, size) -result = pd.DataFrame(index=['create', 'write'], - columns=['1.2.1.Regular', '1.2.2.Rotated', '1.2.3.LCC', '1.2.4.Mercator', '1.2.5.Global']) - -# ====================================================================================================================== -# ============================================= REGULAR ======================================================== -# ====================================================================================================================== - -test_name = '1.2.1.Regular' -if rank == 0: - print(test_name) - -# CREATE GRID -st_time = timeit.default_timer() -lat_orig = 41.1 -lon_orig = 1.8 -inc_lat = 0.01 -inc_lon = 0.01 -n_lat = 100 -n_lon = 100 -nessy = create_nes(projection='regular', parallel_method=parallel_method, - lat_orig=lat_orig, lon_orig=lon_orig, inc_lat=inc_lat, inc_lon=inc_lon, n_lat=n_lat, n_lon=n_lon) - -comm.Barrier() -result.loc['create', test_name] = timeit.default_timer() - st_time - -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ============================================= ROTATED ======================================================== -# ====================================================================================================================== - -test_name = '1.2.2.Rotated' -if rank == 0: - print(test_name) - -# CREATE GRID -st_time = timeit.default_timer() -centre_lat = 51 -centre_lon = 10 -west_boundary = -35 -south_boundary = -27 -inc_rlat = 0.2 -inc_rlon = 0.2 -nessy = create_nes(projection='rotated', parallel_method=parallel_method, - centre_lat=centre_lat, centre_lon=centre_lon, - west_boundary=west_boundary, south_boundary=south_boundary, - inc_rlat=inc_rlat, inc_rlon=inc_rlon) - -comm.Barrier() -result.loc['create', test_name] = timeit.default_timer() - st_time - -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ============================================= LCC ======================================================== -# ====================================================================================================================== - -test_name = '1.2.3.LCC' -if rank == 0: - print(test_name) - -# CREATE GRID -st_time = timeit.default_timer() -lat_1 = 37 -lat_2 = 43 -lon_0 = -3 -lat_0 = 40 -nx = 397 -ny = 397 -inc_x = 4000 -inc_y = 4000 -x_0 = -807847.688 -y_0 = -797137.125 -nessy = create_nes(projection='lcc', parallel_method=parallel_method, - lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, - nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0) - -comm.Barrier() -result.loc['create', test_name] = timeit.default_timer() - st_time - -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ============================================= MERCATOR ======================================================== -# ====================================================================================================================== - -test_name = '1.2.4.Mercator' -if rank == 0: - print(test_name) - -# CREATE GRID -st_time = timeit.default_timer() -lat_ts = -1.5 -lon_0 = -18.0 -nx = 210 -ny = 236 -inc_x = 50000 -inc_y = 50000 -x_0 = -126017.5 -y_0 = -5407460.0 -nessy = create_nes(projection='mercator', parallel_method=parallel_method, - lat_ts=lat_ts, lon_0=lon_0, nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0) - -comm.Barrier() -result.loc['create', test_name] = timeit.default_timer() - st_time - -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ============================================== GLOBAL ======================================================== -# ====================================================================================================================== - -test_name = '1.2.5.Global' -if rank == 0: - print(test_name) - -# CREATE GRID -st_time = timeit.default_timer() -inc_lat = 0.1 -inc_lon = 0.1 -nessy = create_nes(projection='global', parallel_method=parallel_method, inc_lat=inc_lat, inc_lon=inc_lon) - -comm.Barrier() -result.loc['create', test_name] = timeit.default_timer() - st_time - -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -if rank == 0: - result.to_csv(result_path) - print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/1.3-test_selecting.py b/build/lib/tests/1.3-test_selecting.py deleted file mode 100644 index 00bbb23..0000000 --- a/build/lib/tests/1.3-test_selecting.py +++ /dev/null @@ -1,183 +0,0 @@ -#!/usr/bin/env python -import sys -import timeit -import pandas as pd -from mpi4py import MPI -from nes import open_netcdf -from datetime import datetime - -comm = MPI.COMM_WORLD -rank = comm.Get_rank() -size = comm.Get_size() - -parallel_method = 'Y' -serial_write = True - -result_path = "Times_test_1.3.Selecting_{0}_{1:03d}.csv".format(parallel_method, size) - -result = pd.DataFrame(index=['read', 'calcul', 'write'], - columns=['1.3.1.LatLon', '1.3.2.Level', '1.3.3.Time', '1.3.4.Time_min', '1.3.5.Time_max']) - -# NAMEE -src_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc" -var_list = ['O3'] - -# ====================================================================================================================== -# ====================================== '1.3.1.LatLon' ===================================================== -# ====================================================================================================================== -test_name = '1.3.1.Selecting_LatLon' - -if rank == 0: - print(test_name) - -st_time = timeit.default_timer() - -# Source data -nessy = open_netcdf(src_path, parallel_method=parallel_method, balanced=True) -nessy.keep_vars(var_list) -nessy.sel(lat_min=35, lat_max=45, lon_min=-9, lon_max=5) - -nessy.load() - -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size), serial=serial_write) - -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ====================================== 1.3.2.Level ===================================================== -# ====================================================================================================================== -test_name = '1.3.2.Selecting_Level' - -if rank == 0: - print(test_name) - -st_time = timeit.default_timer() - -# Source data -nessy = open_netcdf(src_path, parallel_method=parallel_method) -nessy.keep_vars(var_list) -nessy.sel(lev_min=3, lev_max=5) - -nessy.load() - -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size), serial=serial_write) - -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ====================================== 1.3.3.Time ===================================================== -# ====================================================================================================================== -test_name = '1.3.3.Selecting_Time' - -if rank == 0: - print(test_name) - -st_time = timeit.default_timer() - -# Source data -nessy = open_netcdf(src_path, parallel_method=parallel_method) -nessy.keep_vars(var_list) -nessy.sel(time_min=datetime(year=2022, month=11, day=16, hour=0), - time_max=datetime(year=2022, month=11, day=16, hour=0)) - -nessy.load() - -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size), serial=serial_write) - -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ====================================== '1.3.4.Time_min' ===================================================== -# ====================================================================================================================== -test_name = '1.3.4.Selecting_Time_min' - -if rank == 0: - print(test_name) - -st_time = timeit.default_timer() - -# Source data -nessy = open_netcdf(src_path, parallel_method=parallel_method) -nessy.keep_vars(var_list) -nessy.sel(time_min=datetime(year=2022, month=11, day=16, hour=0)) - -nessy.load() - -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size), serial=serial_write) - -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ====================================== '1.3.5.Time_max' ===================================================== -# ====================================================================================================================== -test_name = '1.3.5.Selecting_Time_max' - -if rank == 0: - print(test_name) - -st_time = timeit.default_timer() - -# Source data -nessy = open_netcdf(src_path, parallel_method=parallel_method) -nessy.keep_vars(var_list) -nessy.sel(time_max=datetime(year=2022, month=11, day=16, hour=0)) - -nessy.load() - -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size), serial=serial_write) - -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -if rank == 0: - result.to_csv(result_path) - print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/2.1-test_spatial_join.py b/build/lib/tests/2.1-test_spatial_join.py deleted file mode 100644 index e24d443..0000000 --- a/build/lib/tests/2.1-test_spatial_join.py +++ /dev/null @@ -1,329 +0,0 @@ -#!/usr/bin/env python - -import sys -from mpi4py import MPI -import pandas as pd -import timeit -from nes import open_netcdf, from_shapefile - -comm = MPI.COMM_WORLD -rank = comm.Get_rank() -size = comm.Get_size() - -parallel_method = 'Y' -serial_write = False - -result_path = "Times_test_2.1_spatial_join_{0}_{1:03d}.csv".format(parallel_method, size) -result = pd.DataFrame(index=['read', 'calculate', 'write'], - columns=['2.1.1.Existing_file_centroid', '2.1.2.New_file_centroid', - '2.1.3.Existing_file_nearest', '2.1.4.New_file_nearest', - '2.1.5.Existing_file_intersection', '2.1.6.New_file_intersection']) - -# ===== PATH TO MASK ===== # -# Timezones -# shapefile_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/timezones_2021c/timezones_2021c.shp' -# shapefile_var_list = ['tzid'] -# str_len = 32 -# Country ISO codes -shapefile_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/gadm_country_mask/gadm_country_ISO3166.shp" -shapefile_var_list = ['ISO'] -str_len = 3 - -# Original path: /gpfs/scratch/bsc32/bsc32538/original_files/franco_interp.nc -# Regular lat-lon grid from MONARCH -original_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/franco_interp.nc' -# CAMS_Global -# original_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/nox_no_201505.nc" - -# ====================================================================================================================== -# =================================== CENTROID EXISTING FILE =================================================== -# ====================================================================================================================== - -test_name = '2.1.1.Existing_file_centroid' -if rank == 0: - print(test_name) - -# READ -st_time = timeit.default_timer() -nessy = open_netcdf(original_path, parallel_method=parallel_method) -nessy.variables = {} -nessy.create_shapefile() -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# SPATIAL JOIN -# Method can be centroid, nearest and intersection -st_time = timeit.default_timer() -nessy.spatial_join(shapefile_path, method='centroid', var_list=shapefile_var_list, info=True) -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -st_time = timeit.default_timer() - -# ADD Var -for var_name in shapefile_var_list: - data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) - nessy.variables[var_name] = {'data': data, 'dtype': str} -nessy.set_strlen(str_len) -comm.Barrier() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -# REOPEN -nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) -nessy.load() - -# REWRITE -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}_2.nc".format(size), serial=serial_write) - -# REOPEN -nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}_2.nc".format(size), parallel_method=parallel_method) -nessy.load() - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# =================================== CENTROID FROM NEW FILE =================================================== -# ====================================================================================================================== - -test_name = '2.1.2.New_file_centroid' -if rank == 0: - print(test_name) - -# DEFINE PROJECTION -st_time = timeit.default_timer() -projection = 'regular' -lat_orig = 41.1 -lon_orig = 1.8 -inc_lat = 0.2 -inc_lon = 0.2 -n_lat = 100 -n_lon = 100 - -# SPATIAL JOIN -# Method can be centroid, nearest and intersection -nessy = from_shapefile(shapefile_path, method='centroid', projection=projection, - lat_orig=lat_orig, lon_orig=lon_orig, - inc_lat=inc_lat, inc_lon=inc_lon, - n_lat=n_lat, n_lon=n_lon) -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time -print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) - -# ADD Var -for var_name in shapefile_var_list: - data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) - nessy.variables[var_name] = {'data': data, 'dtype': str} -nessy.set_strlen(str_len) - -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -# REOPEN -nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) -nessy.load() - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# =================================== NEAREST EXISTING FILE =================================================== -# ====================================================================================================================== - -test_name = '2.1.3.Existing_file_nearest' -if rank == 0: - print(test_name) - -# READ -st_time = timeit.default_timer() -nessy = open_netcdf(original_path, parallel_method=parallel_method) -nessy.variables = {} -nessy.create_shapefile() -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# SPATIAL JOIN -# Method can be centroid, nearest and intersection -st_time = timeit.default_timer() -nessy.spatial_join(shapefile_path, method='nearest', var_list=shapefile_var_list, info=True) -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time -print('SPATIAL JOIN - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) - -# ADD Var -for var_name in shapefile_var_list: - data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) - nessy.variables[var_name] = {'data': data, 'dtype': str} -nessy.set_strlen(str_len) - -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -# REOPEN -nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) -nessy.load() - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# =================================== NEAREST FROM NEW FILE =================================================== -# ====================================================================================================================== - -test_name = '2.1.4.New_file_nearest' -if rank == 0: - print(test_name) - -# DEFINE PROJECTION -st_time = timeit.default_timer() -projection = 'regular' -lat_orig = 41.1 -lon_orig = 1.8 -inc_lat = 0.2 -inc_lon = 0.2 -n_lat = 100 -n_lon = 100 - -# SPATIAL JOIN -# Method can be centroid, nearest and intersection -nessy = from_shapefile(shapefile_path, method='nearest', projection=projection, - lat_orig=lat_orig, lon_orig=lon_orig, - inc_lat=inc_lat, inc_lon=inc_lon, - n_lat=n_lat, n_lon=n_lon) -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time -print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) - -# ADD Var -for var_name in shapefile_var_list: - data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) - nessy.variables[var_name] = {'data': data, 'dtype': str} -nessy.set_strlen(str_len) - -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -# REOPEN -nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) -nessy.load() - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - - -# ====================================================================================================================== -# =================================== INTERSECTION EXISTING FILE =================================================== -# ====================================================================================================================== - -test_name = '2.1.5.Existing_file_intersection' -if rank == 0: - print(test_name) - -# READ -st_time = timeit.default_timer() -nessy = open_netcdf(original_path, parallel_method=parallel_method) -nessy.variables = {} -nessy.create_shapefile() -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# SPATIAL JOIN -# Method can be centroid, nearest and intersection -st_time = timeit.default_timer() -nessy.spatial_join(shapefile_path, method='intersection', var_list=shapefile_var_list, info=True) -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time -print('SPATIAL JOIN - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) - -# ADD Var -for var_name in shapefile_var_list: - data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) - nessy.variables[var_name] = {'data': data, 'dtype': str} -nessy.set_strlen(str_len) - -# WRITE -st_time = timeit.default_timer() -nessy.set_strlen(strlen=str_len) -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -# REOPEN -nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) -nessy.load() - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# =================================== INTERSECTION FROM NEW FILE =================================================== -# ====================================================================================================================== - -test_name = '2.1.6.New_file_intersection' -if rank == 0: - print(test_name) - -# DEFINE PROJECTION -st_time = timeit.default_timer() -projection = 'regular' -lat_orig = 41.1 -lon_orig = 1.8 -inc_lat = 0.2 -inc_lon = 0.2 -n_lat = 100 -n_lon = 100 - -# SPATIAL JOIN -# Method can be centroid, nearest and intersection -nessy = from_shapefile(shapefile_path, method='intersection', projection=projection, - lat_orig=lat_orig, lon_orig=lon_orig, - inc_lat=inc_lat, inc_lon=inc_lon, - n_lat=n_lat, n_lon=n_lon) -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time -print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) - -# ADD Var -for var_name in shapefile_var_list: - data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) - nessy.variables[var_name] = {'data': data, 'dtype': str} -nessy.set_strlen(str_len) -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -# REOPEN -nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) -nessy.load() - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -if rank == 0: - result.to_csv(result_path) - print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/2.2-test_create_shapefile.py b/build/lib/tests/2.2-test_create_shapefile.py deleted file mode 100644 index 6d443a7..0000000 --- a/build/lib/tests/2.2-test_create_shapefile.py +++ /dev/null @@ -1,201 +0,0 @@ -#!/usr/bin/env python - -import sys -import timeit -import pandas as pd -from mpi4py import MPI -import datetime -from nes import create_nes, open_netcdf - -comm = MPI.COMM_WORLD -rank = comm.Get_rank() -size = comm.Get_size() - -parallel_method = 'Y' - -result_path = "Times_test_2.2_create_shapefile_{0}_{1:03d}.csv".format(parallel_method, size) -result = pd.DataFrame(index=['read', 'calculate'], - columns=['2.2.1.Existing', '2.2.2.New_Regular', - '2.2.3.New_Rotated', '2.2.4.New_LCC', '2.2.5.New_Mercator']) - -# ====================================================================================================================== -# ===================================== CREATE SHAPEFILE FROM EXISTING GRID ========================================== -# ====================================================================================================================== - -test_name = '2.2.1.Existing' -if rank == 0: - print(test_name) - -# Original path: /gpfs/scratch/bsc32/bsc32538/original_files/franco_interp.nc -# Regular lat-lon grid from MONARCH -path = '/gpfs/projects/bsc32/models/NES_tutorial_data/franco_interp.nc' - -# READ -st_time = timeit.default_timer() -nessy = open_netcdf(path=path, info=True, parallel_method=parallel_method) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# LOAD VARIABLES -nessy.load() - -# CREATE SHAPEFILE -st_time = timeit.default_timer() -nessy.to_shapefile(path='regular_shp', - time=datetime.datetime(2019, 1, 1, 10, 0), - lev=0, var_list=['sconcno2']) -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time -print('FROM EXISTING GRID - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ===================================== CREATE SHAPEFILE FROM NEW REGULAR GRID ======================================= -# ====================================================================================================================== - -test_name = '2.2.2.New_Regular' -if rank == 0: - print(test_name) - -# CREATE GRID -st_time = timeit.default_timer() -lat_orig = 41.1 -lon_orig = 1.8 -inc_lat = 0.1 -inc_lon = 0.1 -n_lat = 50 -n_lon = 100 -nessy = create_nes(comm=None, info=False, projection='regular', - lat_orig=lat_orig, lon_orig=lon_orig, inc_lat=inc_lat, inc_lon=inc_lon, - n_lat=n_lat, n_lon=n_lon) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# CREATE SHAPEFILE -st_time = timeit.default_timer() -nessy.create_shapefile() -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time -print('FROM NEW REGULAR GRID - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ===================================== CREATE SHAPEFILE FROM NEW ROTATED GRID ======================================= -# ====================================================================================================================== - -test_name = '2.2.3.New_Rotated' -if rank == 0: - print(test_name) - -# CREATE GRID -st_time = timeit.default_timer() -centre_lat = 51 -centre_lon = 10 -west_boundary = -35 -south_boundary = -27 -inc_rlat = 0.2 -inc_rlon = 0.2 -nessy = create_nes(comm=None, info=False, projection='rotated', - centre_lat=centre_lat, centre_lon=centre_lon, - west_boundary=west_boundary, south_boundary=south_boundary, - inc_rlat=inc_rlat, inc_rlon=inc_rlon) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# CREATE SHAPEFILE -st_time = timeit.default_timer() -nessy.create_shapefile() -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time -print('FROM NEW ROTATED GRID - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ===================================== CREATE SHAPEFILE FROM NEW LCC GRID =========================================== -# ====================================================================================================================== - -test_name = '2.2.4.New_LCC' -if rank == 0: - print(test_name) - -# CREATE GRID -st_time = timeit.default_timer() -lat_1 = 37 -lat_2 = 43 -lon_0 = -3 -lat_0 = 40 -nx = 100 -ny = 200 -inc_x = 4000 -inc_y = 4000 -x_0 = -807847.688 -y_0 = -797137.125 -nessy = create_nes(comm=None, info=False, projection='lcc', - lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, - nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# CREATE SHAPEFILE -st_time = timeit.default_timer() -nessy.create_shapefile() -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time -print('FROM NEW LCC GRID - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ===================================== CREATE SHAPEFILE FROM NEW MERCATOR GRID ====================================== -# ====================================================================================================================== - -test_name = '2-2.5.New_Mercator' -if rank == 0: - print(test_name) - -# CREATE GRID -st_time = timeit.default_timer() -lat_ts = -1.5 -lon_0 = -18.0 -nx = 100 -ny = 50 -inc_x = 50000 -inc_y = 50000 -x_0 = -126017.5 -y_0 = -5407460.0 -nessy = create_nes(comm=None, info=False, projection='mercator', - lat_ts=lat_ts, lon_0=lon_0, nx=nx, ny=ny, - inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# CREATE SHAPEFILE -st_time = timeit.default_timer() -nessy.create_shapefile() -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time -print('FROM NEW MERCATOR GRID - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -if rank == 0: - result.to_csv(result_path) - print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/2.3-test_bounds.py b/build/lib/tests/2.3-test_bounds.py deleted file mode 100644 index a2a9c1c..0000000 --- a/build/lib/tests/2.3-test_bounds.py +++ /dev/null @@ -1,275 +0,0 @@ -#!/usr/bin/env python - -import sys -import timeit -import pandas as pd -from mpi4py import MPI -from nes import open_netcdf, create_nes - -comm = MPI.COMM_WORLD -rank = comm.Get_rank() -size = comm.Get_size() - -parallel_method = 'Y' - -result_path = "Times_test_2.3_bounds_{0}_{1:03d}.csv".format(parallel_method, size) -result = pd.DataFrame(index=['read', 'calculate', 'write'], - columns=['2.3.1.With_bounds', '2.3.2.Without_bounds', "2.3.3.Create_new", - "2.3.4.latlon_sel_create_bnds", "2.3.5.rotated_sel_create_bnds"]) - -# ====================================================================================================================== -# ===================================== FILE WITH EXISTING BOUNDS ==================================================== -# ====================================================================================================================== - -test_name = "2.3.1.With_bounds" -if rank == 0: - print(test_name) - -# READ -st_time = timeit.default_timer() -# Original path: /esarchive/exp/snes/a5s1/regional/3hourly/od550du/od550du-000_2021070612.nc -# Rotated grid for dust regional -path_1 = '/gpfs/projects/bsc32/models/NES_tutorial_data/od550du-000_2021070612.nc' -nessy_1 = open_netcdf(path=path_1, parallel_method=parallel_method, info=True) - -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# EXPLORE BOUNDS -st_time = timeit.default_timer() -print('FILE WITH EXISTING BOUNDS - Rank', rank, '-', 'Lat bounds', nessy_1.lat_bnds) -print('FILE WITH EXISTING BOUNDS - Rank', rank, '-', 'Lon bounds', nessy_1.lon_bnds) -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -# WRITE -st_time = timeit.default_timer() -nessy_1.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -# REOPEN -nessy_2 = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) - -# LOAD DATA AND EXPLORE BOUNDS -print('FILE WITH EXISTING BOUNDS AFTER WRITE - Rank', rank, '-', 'Lat bounds', nessy_2.lat_bnds) -print('FILE WITH EXISTING BOUNDS AFTER WRITE - Rank', rank, '-', 'Lon bounds', nessy_2.lon_bnds) - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# =================================== FILE WITHOUT EXISTING BOUNDS =================================================== -# ====================================================================================================================== - -test_name = '2.3.2.Without_bounds' -if rank == 0: - print(test_name) - -# Original path: /gpfs/scratch/bsc32/bsc32538/mr_multiplyby/OUT/stats_bnds/monarch/a45g/regional/daily_max/O3_all -# /O3_all-000_2021080300.nc Rotated grid from MONARCH -st_time = timeit.default_timer() -path_3 = "/gpfs/projects/bsc32/models/NES_tutorial_data/O3_all-000_2021080300.nc" -nessy_3 = open_netcdf(path=path_3, parallel_method=parallel_method, info=True) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# CREATE BOUNDS -st_time = timeit.default_timer() -nessy_3.create_spatial_bounds() -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -# EXPLORE BOUNDS -print('FILE WITHOUT EXISTING BOUNDS - Rank', rank, '-', 'Lat bounds', nessy_3.lat_bnds) -print('FILE WITHOUT EXISTING BOUNDS - Rank', rank, '-', 'Lon bounds', nessy_3.lon_bnds) - -# WRITE -st_time = timeit.default_timer() -nessy_3.to_netcdf('/tmp/bounds_file_2.nc', info=True) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -# REOPEN -nessy_4 = open_netcdf('/tmp/bounds_file_2.nc', info=True) - -# LOAD DATA AND EXPLORE BOUNDS -print('FILE WITH EXISTING BOUNDS AFTER WRITE - Rank', rank, '-', 'Lat bounds', nessy_4.lat_bnds) -print('FILE WITH EXISTING BOUNDS AFTER WRITE - Rank', rank, '-', 'Lon bounds', nessy_4.lon_bnds) - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ==================================== CREATE NES REGULAR LAT-LON ==================================================== -# ====================================================================================================================== - -test_name = "2.3.3.Create_new" -if rank == 0: - print(test_name) - -# CREATE GRID -st_time = timeit.default_timer() -lat_orig = 41.1 -lon_orig = 1.8 -inc_lat = 0.2 -inc_lon = 0.2 -n_lat = 100 -n_lon = 100 -nessy_5 = create_nes(comm=None, parallel_method=parallel_method, info=True, projection='regular', - lat_orig=lat_orig, lon_orig=lon_orig, - inc_lat=inc_lat, inc_lon=inc_lon, - n_lat=n_lat, n_lon=n_lon) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# CREATE BOUNDS -st_time = timeit.default_timer() -nessy_5.create_spatial_bounds() -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -# EXPLORE BOUNDS -print('FROM NEW GRID - Rank', rank, '-', 'Lat bounds', nessy_5.lat_bnds) -print('FROM NEW GRID - Rank', rank, '-', 'Lon bounds', nessy_5.lon_bnds) - -# WRITE -st_time = timeit.default_timer() -nessy_5.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -# REOPEN -nessy_6 = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) - -# LOAD DATA AND EXPLORE BOUNDS -print('FROM NEW GRID AFTER WRITE - Rank', rank, '-', 'Lat bounds', nessy_6.lat_bnds) -print('FROM NEW GRID AFTER WRITE - Rank', rank, '-', 'Lon bounds', nessy_6.lon_bnds) - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - - -# ====================================================================================================================== -# ================================ REGULAR LAT-LON SEL THEN CREATE BOUNDS ============================================= -# ====================================================================================================================== - -test_name = "2.3.4.latlon_sel_create_bnds" -if rank == 0: - print(test_name) - -# USE SAME GRID SETTING AS 2.3.3 -nessy_7 = create_nes(comm=None, parallel_method=parallel_method, info=True, projection='regular', - lat_orig=lat_orig, lon_orig=lon_orig, - inc_lat=inc_lat, inc_lon=inc_lon, - n_lat=n_lat, n_lon=n_lon) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# SEL -nessy_7.sel(lat_min=50, lat_max=60, lon_min=10, lon_max=20) - -# CREATE BOUNDS -st_time = timeit.default_timer() -nessy_7.create_spatial_bounds() -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -# EXPLORE BOUNDS -print('FROM NEW GRID - Rank', rank, '-', 'Lat bounds', nessy_7.lat_bnds) -print('FROM NEW GRID - Rank', rank, '-', 'Lon bounds', nessy_7.lon_bnds) - -# Check lon_bnds -if nessy_7.lon_bnds['data'].shape != (52, 2): - raise Exception("Wrong lon_bnds.") - -# WRITE -st_time = timeit.default_timer() -nessy_7.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -# REOPEN -nessy_8 = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) - -# LOAD DATA AND EXPLORE BOUNDS -print('FROM NEW GRID AFTER WRITE - Rank', rank, '-', 'Lat bounds', nessy_8.lat_bnds) -print('FROM NEW GRID AFTER WRITE - Rank', rank, '-', 'Lon bounds', nessy_8.lon_bnds) - -# Check lon_bnds -if nessy_8.lon_bnds['data'].shape != (52, 2): - raise Exception("Wrong lon_bnds.") - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - - -# ====================================================================================================================== -# ================================ ROTATED SEL THEN CREATE BOUNDS ============================================= -# ====================================================================================================================== - -test_name = "2.3.5.rotated_sel_create_bnds" -if rank == 0: - print(test_name) - -# USE FILE AS 2.3.2 - -# Original path: /gpfs/scratch/bsc32/bsc32538/mr_multiplyby/OUT/stats_bnds/monarch/a45g/regional/daily_max/O3_all -# /O3_all-000_2021080300.nc Rotated grid from MONARCH -st_time = timeit.default_timer() -path_9 = "/gpfs/projects/bsc32/models/NES_tutorial_data/O3_all-000_2021080300.nc" -nessy_9 = open_netcdf(path=path_9, parallel_method=parallel_method, info=True) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# SEL -nessy_9.sel(lat_min=50, lat_max=60, lon_min=10, lon_max=15) - -# CREATE BOUNDS -st_time = timeit.default_timer() -nessy_9.create_spatial_bounds() -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -# EXPLORE BOUNDS -print('FILE WITHOUT EXISTING BOUNDS - Rank', rank, '-', 'Lat bounds', nessy_9.lat_bnds) -print('FILE WITHOUT EXISTING BOUNDS - Rank', rank, '-', 'Lon bounds', nessy_9.lon_bnds) - -# Check lon_bnds -if nessy_9.lon_bnds['data'].shape[0:2] != nessy_9.lon['data'].shape: - raise Exception("Wrong lon_bnds.") - -# WRITE -st_time = timeit.default_timer() -nessy_9.to_netcdf('/tmp/bounds_file_9.nc', info=True) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -# REOPEN -nessy_10 = open_netcdf('/tmp/bounds_file_9.nc', info=True) - -# LOAD DATA AND EXPLORE BOUNDS -print('FILE WITH EXISTING BOUNDS AFTER WRITE - Rank', rank, '-', 'Lat bounds', nessy_10.lat_bnds) -print('FILE WITH EXISTING BOUNDS AFTER WRITE - Rank', rank, '-', 'Lon bounds', nessy_10.lon_bnds) - -# Check lon_bnds -if nessy_10.lon_bnds['data'].shape[0:2] != nessy_10.lon['data'].shape: - raise Exception("Wrong lon_bnds.") - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - - -if rank == 0: - result.to_csv(result_path) - print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/2.4-test_cell_area.py b/build/lib/tests/2.4-test_cell_area.py deleted file mode 100644 index 9db836f..0000000 --- a/build/lib/tests/2.4-test_cell_area.py +++ /dev/null @@ -1,195 +0,0 @@ -#!/usr/bin/env python - -import sys -import timeit -import pandas as pd -from mpi4py import MPI -from nes import create_nes, open_netcdf, calculate_geometry_area - -comm = MPI.COMM_WORLD -rank = comm.Get_rank() -size = comm.Get_size() - -parallel_method = 'Y' - -result_path = "Times_test_2.4_cell_area_{0}_{1:03d}.csv".format(parallel_method, size) -result = pd.DataFrame(index=['read', 'calculate', 'write'], - columns=['2.4.1.New_file_grid_area', '2.4.2.New_file_geometry_area', - '2.4.3.Existing_file_grid_area', '2.4.4.Existing_file_geometry_area']) - -# ====================================================================================================================== -# ===================================== CALCULATE CELLS AREA FROM NEW GRID =========================================== -# ====================================================================================================================== - -test_name = "2.4.1.New_file_grid_area" -if rank == 0: - print(test_name) - -# CREATE GRID -st_time = timeit.default_timer() -lat_1 = 37 -lat_2 = 43 -lon_0 = -3 -lat_0 = 40 -nx = 20 -ny = 40 -inc_x = 4000 -inc_y = 4000 -x_0 = -807847.688 -y_0 = -797137.125 -nessy = create_nes(comm=None, info=False, projection='lcc', - lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, - nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# CALCULATE AREA OF EACH CELL IN GRID -st_time = timeit.default_timer() -nessy.calculate_grid_area() -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -# EXPLORE GRID AREA -print('Rank {0:03d}: Calculate grid cell area: {1}'.format(rank, nessy.cell_measures['cell_area'])) - -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -# REOPEN -# nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) - -# EXPLORE GRID AREA -print('Rank {0:03d}: Write grid cell area: {1}'.format(rank, nessy.cell_measures['cell_area'])) - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -del nessy - -# ====================================================================================================================== -# ===================================== CALCULATE CELLS AREA FROM GEOMETRIES ========================================= -# ====================================================================================================================== - -test_name = "2.4.2.New_file_geometry_area" -if rank == 0: - print(test_name) - -# CREATE GRID -st_time = timeit.default_timer() -lat_1 = 37 -lat_2 = 43 -lon_0 = -3 -lat_0 = 40 -nx = 20 -ny = 40 -inc_x = 4000 -inc_y = 4000 -x_0 = -807847.688 -y_0 = -797137.125 -nessy = create_nes(comm=None, info=False, projection='lcc', - lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, - nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# CALCULATE AREA OF EACH CELL POLYGON -st_time = timeit.default_timer() -nessy.create_shapefile() -geometry_list = nessy.shapefile['geometry'].values -geometry_area = calculate_geometry_area(geometry_list) -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -# EXPLORE GEOMETRIES AREA -print('Rank {0:03d}: Calculate geometry cell area: {1}'.format(rank, geometry_area)) - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ===================================== CALCULATE CELLS AREA FROM EXISTING GRID ====================================== -# ====================================================================================================================== - -test_name = '2.4.3.Existing_file_grid_area' -if rank == 0: - print(test_name) - -# Original path: /esarchive/exp/monarch/a4dd/original_files/000/2022111512/MONARCH_d01_2022111512.nc -# Rotated grid from MONARCH -original_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc' - -# READ -st_time = timeit.default_timer() -nessy = open_netcdf(original_path, parallel_method=parallel_method) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# CALCULATE AREA OF EACH CELL IN GRID -st_time = timeit.default_timer() -nessy.calculate_grid_area() -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -# EXPLORE GRID AREA -print('Rank {0:03d}: Calculate grid cell area: {1}'.format(rank, nessy.cell_measures['cell_area'])) - -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -# REOPEN -# nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() -del nessy - -# ====================================================================================================================== -# ===================================== CALCULATE CELLS AREA FROM GEOMETRIES FROM EXISTING GRID ====================== -# ====================================================================================================================== - -test_name = '2.4.4.Existing_file_geometry_area' -if rank == 0: - print(test_name) - -# Original path: /esarchive/exp/monarch/a4dd/original_files/000/2022111512/MONARCH_d01_2022111512.nc -# Rotated grid from MONARCH -original_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc' - -# READ -st_time = timeit.default_timer() -nessy = open_netcdf(original_path, parallel_method=parallel_method) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# CALCULATE AREA OF EACH CELL POLYGON -st_time = timeit.default_timer() -nessy.create_shapefile() -geometry_list = nessy.shapefile['geometry'].values -geometry_area = calculate_geometry_area(geometry_list) -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -# EXPLORE GEOMETRIES AREA -print('Rank {0:03d}: Calculate geometry cell area: {1}'.format(rank, geometry_area)) - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() -del nessy - -if rank == 0: - result.to_csv(result_path) - print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/3.1-test_vertical_interp.py b/build/lib/tests/3.1-test_vertical_interp.py deleted file mode 100644 index 9b78628..0000000 --- a/build/lib/tests/3.1-test_vertical_interp.py +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/env python -import sys -import timeit -import pandas as pd -from mpi4py import MPI -from nes import open_netcdf - -comm = MPI.COMM_WORLD -rank = comm.Get_rank() -size = comm.Get_size() - -parallel_method = 'T' - -result_path = "Times_test_3.1_vertical_interp_{0}_{1:03d}.csv".format(parallel_method, size) -result = pd.DataFrame(index=['read', 'calculate'], - columns=['3.1.1.Interp', '3.1.1.Exterp']) - -# ====================================================================================================================== -# =============================================== VERTICAL INTERPOLATION ============================================= -# ====================================================================================================================== - -test_name = '3.1.1.Interp' -if rank == 0: - print(test_name) - -# READ -st_time = timeit.default_timer() - -# Original path: /esarchive/exp/monarch/a4dd/original_files/000/2022111512/MONARCH_d01_2022111512.nc -# Rotated grid from MONARCH -source_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc' - -# Read source data -source_data = open_netcdf(path=source_path, info=True) - -# Select time and load variables -source_data.keep_vars(['O3', 'mid_layer_height_agl']) -source_data.load() - -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# INTERPOLATE -st_time = timeit.default_timer() -source_data.vertical_var_name = 'mid_layer_height_agl' -level_list = [0., 50., 100., 250., 500., 750., 1000., 2000., 3000., 5000.] -interp_nes = source_data.interpolate_vertical(level_list, info=True, kind='linear', extrapolate=None) -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -# WRITE -st_time = timeit.default_timer() -interp_nes.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# =============================================== VERTICAL INTERPOLATION ============================================= -# ====================================================================================================================== - -test_name = '3.1.1.Exterp' -if rank == 0: - print(test_name) - -# READ -st_time = timeit.default_timer() - -# Original path: /esarchive/exp/monarch/a4dd/original_files/000/2022111512/MONARCH_d01_2022111512.nc -# Rotated grid from MONARCH -source_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc' - -# Read source data -source_data = open_netcdf(path=source_path, info=True) - -# Select time and load variables -source_data.keep_vars(['O3', 'mid_layer_height_agl']) -source_data.load() - -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# INTERPOLATE -st_time = timeit.default_timer() -source_data.vertical_var_name = 'mid_layer_height_agl' -level_list = [0., 50., 100., 250., 500., 750., 1000., 2000., 3000., 5000., 21000, 25000, 30000, 40000, 50000] -interp_nes = source_data.interpolate_vertical(level_list, info=True, kind='linear', extrapolate=True) -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -# WRITE -st_time = timeit.default_timer() -interp_nes.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -if rank == 0: - result.to_csv(result_path) - print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/3.2-test_horiz_interp_bilinear.py b/build/lib/tests/3.2-test_horiz_interp_bilinear.py deleted file mode 100644 index 4366a8d..0000000 --- a/build/lib/tests/3.2-test_horiz_interp_bilinear.py +++ /dev/null @@ -1,222 +0,0 @@ -#!/usr/bin/env python -import sys -import timeit -import pandas as pd -from mpi4py import MPI -from nes import open_netcdf, create_nes -import os - -comm = MPI.COMM_WORLD -rank = comm.Get_rank() -size = comm.Get_size() - -parallel_method = 'T' - -result_path = "Times_test_3.2_horiz_interp_bilinear_{0}_{1:03d}.csv".format(parallel_method, size) -result = pd.DataFrame(index=['read', 'calculate', 'write'], - columns=['3.2.1.Only interp', '3.2.2.Create_WM', "3.2.3.Use_WM", "3.2.4.Read_WM"]) - -# NAMEE -src_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc" -var_list = ['O3'] - -# ====================================================================================================================== -# ====================================== Only interp ===================================================== -# ====================================================================================================================== -test_name = '3.2.1.NN_Only interp' -if rank == 0: - print(test_name) - sys.stdout.flush() - -# READING -st_time = timeit.default_timer() - -# Source data -src_nes = open_netcdf(src_path, parallel_method=parallel_method) -src_nes.keep_vars(var_list) -src_nes.load() - -# Destination Grid -lat_1 = 37 -lat_2 = 43 -lon_0 = -3 -lat_0 = 40 -nx = 397 -ny = 397 -inc_x = 4000 -inc_y = 4000 -x_0 = -807847.688 -y_0 = -797137.125 -dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, - nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, - times=src_nes.get_full_times()) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -st_time = timeit.default_timer() - -interp_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='NN') -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -st_time = timeit.default_timer() -interp_nes.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size)) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ====================================== Create_WM ===================================================== -# ====================================================================================================================== -test_name = '3.2.2.NN_Create_WM' -if rank == 0: - print(test_name) - -# READ -st_time = timeit.default_timer() - -# Read source data -src_nes = open_netcdf(src_path, parallel_method=parallel_method) -src_nes.keep_vars(var_list) -src_nes.load() - -# Destination Grid -lat_1 = 37 -lat_2 = 43 -lon_0 = -3 -lat_0 = 40 -nx = 397 -ny = 397 -inc_x = 4000 -inc_y = 4000 -x_0 = -807847.688 -y_0 = -797137.125 -dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, - nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, - times=src_nes.get_full_times()) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# Cleaning WM -if os.path.exists("NN_WM_NAMEE_to_IP.nc") and rank == 0: - os.remove("NN_WM_NAMEE_to_IP.nc") -comm.Barrier() - -st_time = timeit.default_timer() - -wm_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='NN', info=True, - weight_matrix_path="NN_WM_NAMEE_to_IP.nc", only_create_wm=True) -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ====================================== Use_WM ===================================================== -# ====================================================================================================================== -test_name = "3.2.3.NN_Use_WM" -if rank == 0: - print(test_name) - -# READING -st_time = timeit.default_timer() - -# Source data -src_nes = open_netcdf(src_path, parallel_method=parallel_method) -src_nes.keep_vars(var_list) -src_nes.load() - -# Destination Grid -lat_1 = 37 -lat_2 = 43 -lon_0 = -3 -lat_0 = 40 -nx = 397 -ny = 397 -inc_x = 4000 -inc_y = 4000 -x_0 = -807847.688 -y_0 = -797137.125 - -dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, - nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, - times=src_nes.get_full_times()) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -st_time = timeit.default_timer() - -interp_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='NN', wm=wm_nes) -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -st_time = timeit.default_timer() -interp_nes.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ====================================== Read_WM ===================================================== -# ====================================================================================================================== -test_name = "3.2.4.NN_Read_WM" -if rank == 0: - print(test_name) - -# READING -st_time = timeit.default_timer() - -# Source data -src_nes = open_netcdf(src_path, parallel_method=parallel_method) -src_nes.keep_vars(var_list) -src_nes.load() - -# Destination Grid -lat_1 = 37 -lat_2 = 43 -lon_0 = -3 -lat_0 = 40 -nx = 397 -ny = 397 -inc_x = 4000 -inc_y = 4000 -x_0 = -807847.688 -y_0 = -797137.125 - -dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, - nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, - times=src_nes.get_full_times()) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -st_time = timeit.default_timer() - -interp_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='NN', - weight_matrix_path="NN_WM_NAMEE_to_IP.nc") -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -st_time = timeit.default_timer() -interp_nes.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -if rank == 0: - result.to_csv(result_path) - print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/3.3-test_horiz_interp_conservative.py b/build/lib/tests/3.3-test_horiz_interp_conservative.py deleted file mode 100644 index 90aa72b..0000000 --- a/build/lib/tests/3.3-test_horiz_interp_conservative.py +++ /dev/null @@ -1,248 +0,0 @@ -#!/usr/bin/env python -import sys -import os -import timeit -import pandas as pd -from mpi4py import MPI -from nes import open_netcdf, create_nes - -comm = MPI.COMM_WORLD -rank = comm.Get_rank() -size = comm.Get_size() - -parallel_method = 'Y' - -result_path = "Times_test_3.3_horiz_interp_conservative.py_{0}_{1:03d}.csv".format(parallel_method, size) -result = pd.DataFrame(index=['read', 'calculate', 'write'], - columns=['3.3.1.Only interp', '3.3.2.Create_WM', "3.3.3.Use_WM", "3.3.4.Read_WM"]) - -src_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc" -src_type = 'NAMEE' -var_list = ['O3'] -# src_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/nox_no_201505.nc" -# src_type = 'CAMS_glob_antv21' -# var_list = ['nox_no'] - -# ====================================================================================================================== -# ====================================== Only interp ===================================================== -# ====================================================================================================================== - -test_name = '3.3.1.Only interp' -if rank == 0: - print(test_name) - -# READ -# final_dst.variables[var_name]['data'][time, lev] = np.sum(weights * src_aux, axis=1) - -st_time = timeit.default_timer() - -# Read source data -src_nes = open_netcdf(src_path, parallel_method=parallel_method) -src_nes.keep_vars(var_list) -src_nes.load() - -# Create destination grid -lat_1 = 37 -lat_2 = 43 -lon_0 = -3 -lat_0 = 40 -nx = 397 -ny = 397 -inc_x = 4000 -inc_y = 4000 -x_0 = -807847.688 -y_0 = -797137.125 -dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, - nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, - times=src_nes.get_full_times()) -dst_type = "IP" - -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -st_time = timeit.default_timer() - -# INTERPOLATE -interp_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='Conservative', info=False) -# interp_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='Conservative', weight_matrix_path='T_WM.nc') -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -# WRITE -st_time = timeit.default_timer() -interp_nes.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size), serial=True) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ====================================== Create_WM ===================================================== -# ====================================================================================================================== - -test_name = '3.3.2.Create_WM' -if rank == 0: - print(test_name) - -# READING -st_time = timeit.default_timer() - -# Read source data -src_nes = open_netcdf(src_path, parallel_method=parallel_method) -src_nes.keep_vars(var_list) -src_nes.load() - -# Create destination grid -lat_1 = 37 -lat_2 = 43 -lon_0 = -3 -lat_0 = 40 -nx = 397 -ny = 397 -inc_x = 4000 -inc_y = 4000 -x_0 = -807847.688 -y_0 = -797137.125 -dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, - nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, - times=src_nes.get_full_times()) -dst_type = "IP" - -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# Cleaning WM -if os.path.exists("CONS_WM_{0}_to_{1}.nc".format(src_type, dst_type)) and rank == 0: - os.remove("CONS_WM_{0}_to_{1}.nc".format(src_type, dst_type)) -comm.Barrier() - -# INTERPOLATE -st_time = timeit.default_timer() -wm_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='Conservative', info=True, - weight_matrix_path="CONS_WM_{0}_to_{1}.nc".format(src_type, dst_type), - only_create_wm=True) -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -# WRITE -# st_time = timeit.default_timer() -# interp_nes.to_netcdf(test_name.replace(' ', '_') + ".nc") -# comm.Barrier() -# result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ====================================== Use_WM ===================================================== -# ====================================================================================================================== - -test_name = "3.3.3.Use_WM" -if rank == 0: - print(test_name) - -# READ -st_time = timeit.default_timer() - -# Read source data -src_nes = open_netcdf(src_path, parallel_method=parallel_method) -src_nes.keep_vars(var_list) -src_nes.load() - -# Create destination grid -lat_1 = 37 -lat_2 = 43 -lon_0 = -3 -lat_0 = 40 -nx = 397 -ny = 397 -inc_x = 4000 -inc_y = 4000 -x_0 = -807847.688 -y_0 = -797137.125 -dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, - nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, - times=src_nes.get_full_times()) -dst_type = "IP" - -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# INTERPOLATE -st_time = timeit.default_timer() -interp_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='Conservative', wm=wm_nes) -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -# WRITE -st_time = timeit.default_timer() -interp_nes.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size)) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ====================================== Read_WM ===================================================== -# ====================================================================================================================== - -test_name = "3.3.4.Read_WM" -if rank == 0: - print(test_name) - -# READ -st_time = timeit.default_timer() - -# Read source data -src_nes = open_netcdf(src_path, parallel_method=parallel_method) -src_nes.keep_vars(var_list) -src_nes.load() - -# Create destination grid -lat_1 = 37 -lat_2 = 43 -lon_0 = -3 -lat_0 = 40 -nx = 397 -ny = 397 -inc_x = 4000 -inc_y = 4000 -x_0 = -807847.688 -y_0 = -797137.125 -dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, - nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, - times=src_nes.get_full_times()) -dst_type = "IP" - -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# INTERPOLATE -st_time = timeit.default_timer() -interp_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='Conservative', - weight_matrix_path="CONS_WM_{0}_to_{1}.nc".format(src_type, dst_type)) -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -# WRITE -st_time = timeit.default_timer() -interp_nes.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size)) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -if rank == 0: - result.to_csv(result_path) - print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/4.1-test_stats.py b/build/lib/tests/4.1-test_stats.py deleted file mode 100644 index f11206c..0000000 --- a/build/lib/tests/4.1-test_stats.py +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/env python - -import sys -import timeit -import pandas as pd -from mpi4py import MPI -from nes import open_netcdf - -comm = MPI.COMM_WORLD -rank = comm.Get_rank() -size = comm.Get_size() - -parallel_method = 'Y' - -result_path = "Times_test_4.1_daily_stats_{0}_{1:03d}.csv".format(parallel_method, size) -result = pd.DataFrame(index=['read', 'calculate', 'write'], - columns=['4.1.1.Mean']) - -# ====================================================================================================================== -# ============================================== CALCULATE DAILY MEAN ================================================ -# ====================================================================================================================== - -test_name = '4.1.1.Mean' -if rank == 0: - print(test_name) - -# Original path: /esarchive/exp/monarch/a4dd/original_files/000/2022111512/MONARCH_d01_2022111512.nc -# Rotated grid from MONARCH -cams_file = '/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc' - -# READ -st_time = timeit.default_timer() -nessy = open_netcdf(path=cams_file, info=True, parallel_method=parallel_method) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# LOAD VARIABLES -nessy.keep_vars('O3') -nessy.load() - -# CALCULATE MEAN -st_time = timeit.default_timer() -nessy.daily_statistic(op="mean") -print(nessy.variables['O3']['cell_methods']) -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# ========================================== CALCULATE 8-HOUR ROLLING MEAN =========================================== -# ====================================================================================================================== - -test_name = '4.1.2.Rolling_Mean' -if rank == 0: - print(test_name) - -# Original path: /esarchive/exp/monarch/a4dd/original_files/000/2022111512/MONARCH_d01_2022111512.nc -# Rotated grid from MONARCH -cams_file = '/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc' - -# READ -st_time = timeit.default_timer() -nessy = open_netcdf(path=cams_file, info=True, parallel_method=parallel_method) -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# CALCULATE MEAN -st_time = timeit.default_timer() -rolling_mean = nessy.rolling_mean(var_list='O3', hours=8) -print(rolling_mean.variables['O3']['data']) -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -if rank == 0: - result.to_csv(result_path) - print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/4.2-test_sum.py b/build/lib/tests/4.2-test_sum.py deleted file mode 100644 index 2f1a93c..0000000 --- a/build/lib/tests/4.2-test_sum.py +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env python - -import sys -from mpi4py import MPI -import pandas as pd -import timeit -import numpy as np -from nes import create_nes - -comm = MPI.COMM_WORLD -rank = comm.Get_rank() -size = comm.Get_size() - -parallel_method = 'Y' - -result_path = "Times_test_4.2_sum_{0}_{1:03d}.csv".format(parallel_method, size) -result = pd.DataFrame(index=['read', 'calculate', 'write'], - columns=['4.2.1.Sum']) - -# ====================================================================================================================== -# =================================== CENTROID FROM NEW FILE =================================================== -# ====================================================================================================================== - -test_name = '4.2.1.Sum' - -if rank == 0: - print(test_name) - -# CREATE GRID -st_time = timeit.default_timer() -projection = 'regular' -lat_orig = 41.1 -lon_orig = 1.8 -inc_lat = 0.2 -inc_lon = 0.2 -n_lat = 100 -n_lon = 100 -nessy = create_nes(projection=projection, lat_orig=lat_orig, lon_orig=lon_orig, inc_lat=inc_lat, inc_lon=inc_lon, - n_lat=n_lat, n_lon=n_lon) - -# ADD VARIABLES -nessy.variables = {'var_aux': {'data': np.ones((len(nessy.time), len(nessy.lev['data']), - nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]))}} - -# CREATE GRID WITH COPY -nessy_2 = nessy.copy(copy_vars=True) - -# ADD VARIABLES -for var_name in nessy_2.variables.keys(): - nessy_2.variables[var_name]['data'] *= 2 - -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# SUM -st_time = timeit.default_timer() -nessy_3 = nessy + nessy_2 -print('Sum result', nessy_3.variables['var_aux']['data']) - -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time - -# WRITE -st_time = timeit.default_timer() -nessy_3.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -if rank == 0: - result.to_csv(result_path) - print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/4.3-test_write_timestep.py b/build/lib/tests/4.3-test_write_timestep.py deleted file mode 100644 index b50c74b..0000000 --- a/build/lib/tests/4.3-test_write_timestep.py +++ /dev/null @@ -1,151 +0,0 @@ -#!/usr/bin/env python - -import sys -from mpi4py import MPI -import pandas as pd -import timeit -from datetime import datetime, timedelta -import numpy as np -from nes import create_nes - -comm = MPI.COMM_WORLD -rank = comm.Get_rank() -size = comm.Get_size() - -parallel_method = 'Y' - -result_path = "Times_test_4.3_write_time_step_{0}_{1:03d}.csv".format(parallel_method, size) -result = pd.DataFrame(index=['read', 'calculate', 'write'], - columns=['4.3.1.Parallel_Write', '4.3.2.Serial_Write']) - -# ====================================================================================================================== -# =================================== PARALLEL WRITE =================================================== -# ====================================================================================================================== - -test_name = '4.3.1.Parallel_Write' - -if rank == 0: - print(test_name) - -st_time = timeit.default_timer() -# CREATE GRID -centre_lat = 51 -centre_lon = 10 -west_boundary = -35 -south_boundary = -27 -inc_rlat = 0.2 -inc_rlon = 0.2 -nessy = create_nes(comm=None, info=False, projection='rotated', - centre_lat=centre_lat, centre_lon=centre_lon, - west_boundary=west_boundary, south_boundary=south_boundary, - inc_rlat=inc_rlat, inc_rlon=inc_rlon) - -# ADD VARIABLES -nessy.variables = {'var1': {'data': None, 'units': 'kg.s-1', 'dtype': np.float32}, - 'var2': {'data': None, 'units': 'kg.s-1', 'dtype': np.float32}} -time_list = [datetime(year=2023, month=1, day=1) + timedelta(hours=x) for x in range(24)] -nessy.set_time(time_list) - -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# CREATE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name + '.nc', keep_open=True, info=False) - -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -# CALCULATE & APPEND -result.loc['calculate', test_name] = 0 - -for i_time, time_aux in enumerate(time_list): - # CALCULATE - st_time = timeit.default_timer() - - nessy.variables['var1']['data'] = np.ones((1, 1, nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1])) * i_time - nessy.variables['var2']['data'] = np.ones((1, 1, nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1])) * i_time - - comm.Barrier() - result.loc['calculate', test_name] += timeit.default_timer() - st_time - - # APPEND - st_time = timeit.default_timer() - nessy.append_time_step_data(i_time) - comm.Barrier() - if i_time == len(time_list) - 1: - nessy.close() - result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# =================================== SERIAL WRITE =================================================== -# ====================================================================================================================== - -test_name = '4.3.2.Serial_Write' - -if rank == 0: - print(test_name) - -st_time = timeit.default_timer() -# CREATE GRID -centre_lat = 51 -centre_lon = 10 -west_boundary = -35 -south_boundary = -27 -inc_rlat = 0.2 -inc_rlon = 0.2 -nessy = create_nes(comm=None, info=False, projection='rotated', - centre_lat=centre_lat, centre_lon=centre_lon, - west_boundary=west_boundary, south_boundary=south_boundary, - inc_rlat=inc_rlat, inc_rlon=inc_rlon) - -# ADD VARIABLES -nessy.variables = {'var1': {'data': None, 'units': 'kg.s-1', 'dtype': np.float32}, - 'var2': {'data': None, 'units': 'kg.s-1', 'dtype': np.float32}} -time_list = [datetime(year=2023, month=1, day=1) + timedelta(hours=x) for x in range(24)] -nessy.set_time(time_list) - -comm.Barrier() -result.loc['read', test_name] = timeit.default_timer() - st_time - -# CREATE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name + '.nc', keep_open=True, info=False, serial=True) - -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -# CALCULATE & APPEND -result.loc['calculate', test_name] = 0 - -for i_time, time_aux in enumerate(time_list): - # CALCULATEATE - st_time = timeit.default_timer() - - nessy.variables['var1']['data'] = np.ones((1, 1, nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1])) * i_time - nessy.variables['var2']['data'] = np.ones((1, 1, nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1])) * i_time - - comm.Barrier() - result.loc['calculate', test_name] += timeit.default_timer() - st_time - - # APPEND - st_time = timeit.default_timer() - nessy.append_time_step_data(i_time) - comm.Barrier() - if i_time == len(time_list) - 1: - nessy.close() - result.loc['write', test_name] = timeit.default_timer() - st_time - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -if rank == 0: - result.to_csv(result_path) - print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/__init__.py b/build/lib/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/build/lib/tests/unit/__init__.py b/build/lib/tests/unit/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/build/lib/tests/unit/test_imports.py b/build/lib/tests/unit/test_imports.py deleted file mode 100644 index 346ebad..0000000 --- a/build/lib/tests/unit/test_imports.py +++ /dev/null @@ -1,106 +0,0 @@ -import unittest - - -class TestImports(unittest.TestCase): - def test_imports(self): - imports_to_test = [ - 'sys', 'os', 'time', 'timeit', 'math', 'calendar', 'datetime', - 'warnings', 'geopandas', 'pandas', 'numpy', 'shapely', - 'mpi4py', 'netCDF4', 'pyproj', 'configargparse', 'filelock', - 'eccodes'] - - for module_name in imports_to_test: - with self.subTest(module=module_name): - try: - __import__(module_name) - except ImportError as e: - self.fail(f"Failed to import {module_name}: {e}") - - def test_eccodes(self): - try: - import eccodes - from eccodes import codes_grib_new_from_file - from eccodes import codes_keys_iterator_new - from eccodes import codes_keys_iterator_next - from eccodes import codes_keys_iterator_get_name - from eccodes import codes_get_string - from eccodes import codes_keys_iterator_delete - from eccodes import codes_clone - from eccodes import codes_set - from eccodes import codes_set_values - from eccodes import codes_write - from eccodes import codes_release - from eccodes import codes_samples_path - import os - os.path.join(codes_samples_path(), 'GRIB2.tmpl') - - print("Eccodes: ", eccodes.__version__) - except ImportError as e: - self.fail(f"Import error: {e}") - - def test_geopandas(self): - try: - import geopandas - print("GeoPandas: ", geopandas.__version__) - except ImportError as e: - self.fail(f"Import error: {e}") - - def test_pandas(self): - try: - import pandas - print("Pandas: ", pandas.__version__) - except ImportError as e: - self.fail(f"Import error: {e}") - - def test_numpy(self): - try: - import numpy - print("NumPy: ", numpy.__version__) - except ImportError as e: - self.fail(f"Import error: {e}") - - def test_shapely(self): - try: - import shapely - print("Shapely: ", shapely.__version__) - except ImportError as e: - self.fail(f"Import error: {e}") - - def test_mpi(self): - try: - import mpi4py - print("mpi4py: ", mpi4py.__version__) - from mpi4py import MPI - print("MPI Vendor: ", MPI.get_vendor()) - except ImportError as e: - self.fail(f"Import error: {e}") - - def test_netcdf4(self): - try: - import netCDF4 - print("netCDF4 version:", netCDF4.__version__) - print("HDF5 version:", netCDF4.__hdf5libversion__) - print("NetCDF library version:", netCDF4.__netcdf4libversion__) - except ImportError as e: - self.fail(f"Import error: {e}") - - def test_netcdf4_parallel(self): - try: - from mpi4py import MPI - import numpy as np - from netCDF4 import Dataset - nc = Dataset('/tmp/parallel_test.nc', 'w', parallel=True, comm=MPI.COMM_WORLD, info=MPI.Info()) - nc.close() - except ImportError as e: - self.fail(f"Import error: {e}") - - def test_pyproj(self): - try: - import pyproj - print("pyproj: ", pyproj.__version__) - except ImportError as e: - self.fail(f"Import error: {e}") - - -if __name__ == '__main__': - unittest.main() diff --git a/environment.yml b/environment.yml index f1696c0..788450f 100755 --- a/environment.yml +++ b/environment.yml @@ -1,18 +1,35 @@ - channels: - conda-forge - - bioconda - - defaults + - anaconda dependencies: - - python >= 3.8 + - python=3.10 + - libnetcdf=*=mpi_mpich* + - netCDF4=*=mpi_mpich* + - h5py=*=mpi_mpich* + - pytest + - pytest-cov + - pycodestyle>=2.10.0 + - geopandas>=0.10.2 + - rtree>=0.9.0 + - numpy>=1.20.0 + - pyproj~=3.2.1 + - setuptools>=66.1.1 + - pytest>=7.2.1 + - shapely - mpi4py ~= 3.1.4 - - geopandas - - netcdf4=*=mpi* - eccodes - python-eccodes - filelock - configargparse + - psutil + - pyyaml + - rasterio - openpyxl - jupyter - - ipykernel \ No newline at end of file + - ipykernel + +variables: + CC: mpicc + CXX: mpicxx + LDFLAGS: -shared diff --git a/nes/__init__.py b/nes/__init__.py index 1f6c5f9..a8e71e3 100644 --- a/nes/__init__.py +++ b/nes/__init__.py @@ -1,4 +1,4 @@ -__date__ = "2025-02-18" +__date__ = "2025-04-22" __version__ = "1.1.9" __all__ = [ 'open_netcdf', 'concatenate_netcdfs', 'create_nes', 'from_shapefile', 'calculate_geometry_area', 'Nes', 'LatLonNes', diff --git a/tests/test_bash.mn4.sh b/tests/test_bash.mn4.sh deleted file mode 100644 index 3f8dc9e..0000000 --- a/tests/test_bash.mn4.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -#SBATCH --qos=debug -#SBATCH -A bsc32 -#SBATCH --cpus-per-task=1 -#SBATCH -n 4 -#SBATCH -t 02:00:00 -#SBATCH -J NES-test -#SBATCH --output=log_NES-tests_mn4_%j.out -#SBATCH --error=log_NES-tests_mn4_%j.err -#SBATCH --exclusive - -### ulimit -s 128000 - -module purge -module use /gpfs/projects/bsc32/software/suselinux/11/modules/all - -module load NES/1.1.3-mn4-foss-2019b-Python-3.7.4 -module load OpenMPI/4.0.5-GCC-8.3.0-mn4 - -cd /gpfs/projects/bsc32/models/NES_master/tests || exit - -mpirun --mca mpi_warn_on_fork 0 -np 4 python 1.1-test_read_write_projection.py -mpirun --mca mpi_warn_on_fork 0 -np 4 python 1.2-test_create_projection.py -mpirun --mca mpi_warn_on_fork 0 -np 4 python 1.3-test_selecting.py - -mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.1-test_spatial_join.py -mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.2-test_create_shapefile.py -mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.3-test_bounds.py -mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.4-test_cell_area.py -mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.5-test_longitude_conversion.py - -mpirun --mca mpi_warn_on_fork 0 -np 4 python 3.1-test_vertical_interp.py -mpirun --mca mpi_warn_on_fork 0 -np 4 python 3.2-test_horiz_interp_bilinear.py -mpirun --mca mpi_warn_on_fork 0 -np 4 python 3.3-test_horiz_interp_conservative.py - -mpirun --mca mpi_warn_on_fork 0 -np 4 python 4.1-test_stats.py -mpirun --mca mpi_warn_on_fork 0 -np 4 python 4.2-test_sum.py -mpirun --mca mpi_warn_on_fork 0 -np 4 python 4.3-test_write_timestep.py diff --git a/tests/test_bash.mn5.sh b/tests/test_bash.mn5.sh index 0966f3c..e767c3e 100644 --- a/tests/test_bash.mn5.sh +++ b/tests/test_bash.mn5.sh @@ -3,30 +3,18 @@ #SBATCH -A bsc32 #SBATCH --cpus-per-task=1 #SBATCH -n 4 -#SBATCH -t 00:10:00 +#SBATCH -t 00:30:00 #SBATCH -J NES-test #SBATCH --output=log_NES-tests_mn5_%j.out #SBATCH --error=log_NES-tests_mn5_%j.err #SBATCH --exclusive -set -xuve +module load Mamba/23.11.0-0 +source activate base -module purge -module load anaconda -source /apps/GPP/ANACONDA/2023.07/etc/profile.d/conda.sh -conda deactivate -conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/PHENOMENA_v0.2.0_bsc124195 -#export PYTHONPATH=/gpfs/projects/bsc32/repository/apps/conda_envs/NES_dev/lib/python3.12/site-packages -export PYTHONPATH=/gpfs/projects/bsc32/repository/apps/conda_envs/PHENOMENA_v0.2.0_${USER} -export SLURM_CPU_BIND=none -#export PYTHONPATH=/gpfs/scratch/bsc32/bsc032538/AC_PostProcess/NES:$PYTHONPATH +conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/NES_v1.1.9 -#conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/NES_v1.1.4 -##export PYTHONPATH=/gpfs/projects/bsc32/repository/apps/conda_envs/NES_v1.1.4/lib/python3.12/site-packages:$PYTHONPATH -#export PYTHONPATH=/gpfs/projects/bsc32/repository/apps/conda_envs/NES_v1.1.4/lib/python3.12/site-packages -#export SLURM_CPU_BIND=none - cd /gpfs/scratch/bsc32/${USER}/AC_PostProcess/NES/tests || exit mpirun -np 4 python 1.1-test_read_write_projection.py diff --git a/tests/test_bash.nord3v2.sh b/tests/test_bash.nord4.sh similarity index 86% rename from tests/test_bash.nord3v2.sh rename to tests/test_bash.nord4.sh index 5cfe091..a18de72 100644 --- a/tests/test_bash.nord3v2.sh +++ b/tests/test_bash.nord4.sh @@ -12,11 +12,13 @@ ### ulimit -s 128000 -module purge +module load Mamba/23.11.0-0 +source activate base -module load NES/1.1.3-nord3-v2-foss-2019b-Python-3.7.4 +conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/NES_v1.1.9 -cd /gpfs/projects/bsc32/models/NES_master/tests || exit + +cd /gpfs/scratch/bsc32/${USER}/AC_PostProcess/NES/tests || exit mpirun --mca mpi_warn_on_fork 0 -np 4 python 1.1-test_read_write_projection.py mpirun --mca mpi_warn_on_fork 0 -np 4 python 1.2-test_create_projection.py diff --git a/tests/unit/test_imports.py b/tests/unit/test_imports.py index 346ebad..ad4cb70 100644 --- a/tests/unit/test_imports.py +++ b/tests/unit/test_imports.py @@ -2,12 +2,20 @@ import unittest class TestImports(unittest.TestCase): + """ + Unit tests to verify the availability and correct import of external Python packages required by the HERMES system. + + Each test ensures that a particular package or set of functionalities is importable in the current environment. + Failures in these tests typically indicate that required dependencies are not properly installed or available. + """ + def test_imports(self): imports_to_test = [ 'sys', 'os', 'time', 'timeit', 'math', 'calendar', 'datetime', 'warnings', 'geopandas', 'pandas', 'numpy', 'shapely', 'mpi4py', 'netCDF4', 'pyproj', 'configargparse', 'filelock', - 'eccodes'] + 'pytz', 'eccodes', 'scipy', 'nes', 'pytest', 'dateutil', + 'rasterio'] for module_name in imports_to_test: with self.subTest(module=module_name): @@ -17,6 +25,17 @@ class TestImports(unittest.TestCase): self.fail(f"Failed to import {module_name}: {e}") def test_eccodes(self): + """ + Test that the `eccodes` library and its main GRIB manipulation functions can be successfully imported. + + This includes: + - Creating a GRIB message from a sample file. + - Accessing GRIB keys using a keys iterator. + - Getting and setting values. + - Writing and releasing GRIB messages. + + The test ensures that all critical `eccodes` functions are importable and accessible. + """ try: import eccodes from eccodes import codes_grib_new_from_file @@ -41,6 +60,8 @@ class TestImports(unittest.TestCase): def test_geopandas(self): try: import geopandas + from geopandas import sjoin_nearest + from geopandas import GeoDataFrame print("GeoPandas: ", geopandas.__version__) except ImportError as e: self.fail(f"Import error: {e}") @@ -85,14 +106,21 @@ class TestImports(unittest.TestCase): self.fail(f"Import error: {e}") def test_netcdf4_parallel(self): + import os try: from mpi4py import MPI import numpy as np from netCDF4 import Dataset - nc = Dataset('/tmp/parallel_test.nc', 'w', parallel=True, comm=MPI.COMM_WORLD, info=MPI.Info()) + + nc = Dataset("parallel_test.nc", "w", parallel=True, comm=MPI.COMM_WORLD, info=MPI.Info(),) + nc.createDimension("x", 10) nc.close() - except ImportError as e: - self.fail(f"Import error: {e}") + self.assertTrue(os.path.exists("parallel_test.nc")) + except (ImportError, RuntimeError, OSError) as e: + self.fail(f"Parallel netCDF4 support not available: {e}") + finally: + if os.path.exists("parallel_test.nc"): + os.remove("parallel_test.nc") def test_pyproj(self): try: @@ -101,6 +129,21 @@ class TestImports(unittest.TestCase): except ImportError as e: self.fail(f"Import error: {e}") + def test_pytz(self): + try: + import pytz + print("pytz: ", pytz.__version__) + except ImportError as e: + self.fail(f"Import error: {e}") + + def test_rasterio(self): + try: + import rasterio + from rasterio.windows import Window + print("rasterio: ", rasterio.__version__) + + except ImportError as e: + self.fail(f"Import error: {e}") if __name__ == '__main__': unittest.main() -- GitLab From ea62e1846f9ca2ba8c94f1f0504835cdaf0d2195 Mon Sep 17 00:00:00 2001 From: Carles Tena Date: Tue, 22 Apr 2025 16:09:52 +0200 Subject: [PATCH 33/33] Preparing v1.1.9 release --- CHANGELOG.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3633223..c3ddb06 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -13,6 +13,9 @@ CHANGELOG * Add additional names for the time variable * Added MOCAGE format * Bugfix on vertical interpolation. + * Selecting function allows now to select negative latitudes on 0-360 ones. + * Reorder functionality (0 360 to -180 180) as entry point + * Coordinates metadata conventions. 1.1.8 -- GitLab