diff --git a/build/lib/nes/__init__.py b/build/lib/nes/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1dcabe9cf5e53deaefb0255f16467b6ef747a97a --- /dev/null +++ b/build/lib/nes/__init__.py @@ -0,0 +1,13 @@ +__date__ = "2024-10-07" +__version__ = "1.1.8" +__all__ = [ + 'open_netcdf', 'concatenate_netcdfs', 'create_nes', 'from_shapefile', 'calculate_geometry_area', 'Nes', 'LatLonNes', + 'LCCNes', 'RotatedNes', 'RotatedNestedNes', 'MercatorNes', 'PointsNesProvidentia', 'PointsNesGHOST', 'PointsNes' +] + +from .load_nes import open_netcdf, concatenate_netcdfs +# from .load_nes import open_raster +from .create_nes import create_nes, from_shapefile +from .methods.cell_measures import calculate_geometry_area +from .nc_projections import (Nes, LatLonNes, LCCNes, RotatedNes, RotatedNestedNes, MercatorNes, PointsNesProvidentia, + PointsNes, PointsNesGHOST) diff --git a/build/lib/nes/create_nes.py b/build/lib/nes/create_nes.py new file mode 100644 index 0000000000000000000000000000000000000000..ce8b619fa7d6b231cb2dcb53121e58016c8e2fc5 --- /dev/null +++ b/build/lib/nes/create_nes.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python + +import warnings +import sys +from netCDF4 import num2date +from mpi4py import MPI +from .nc_projections import PointsNes, LatLonNes, RotatedNes, RotatedNestedNes, LCCNes, MercatorNes + + +def create_nes(comm=None, info=False, projection=None, parallel_method="Y", balanced=False, + times=None, avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, + **kwargs): + """ + Create a Nes class from scratch. + + Parameters + ---------- + comm : MPI.Comm, optional + MPI Communicator. If None, uses MPI.COMM_WORLD. + info : bool, optional + Indicates if reading/writing info should be provided. Default is False. + projection : str, optional + The projection type. Accepted values are None, "regular", "global", "rotated", "rotated-nested", "lcc", + "mercator". + parallel_method : str, optional + The parallelization method to use. Default is "Y". Accepted values are ["X", "Y", "T"]. + balanced : bool, optional + Indicates if balanced parallelization is desired. Balanced datasets cannot be written in chunking mode. + Default is False. + times : list of datetime, optional + List of datetime objects representing the time dimension. If None, a default time array is created. + avoid_first_hours : int, optional + Number of hours to remove from the start of the time steps. Default is 0. + avoid_last_hours : int, optional + Number of hours to remove from the end of the time steps. Default is 0. + first_level : int, optional + Index of the first level to use. Default is 0. + last_level : int or None, optional + Index of the last level to use. If None, the last level is used. Default is None. + **kwargs : additional arguments + Additional parameters required for specific projections. + + Returns + ------- + nes : Nes + An instance of the Nes class based on the specified parameters and projection. + + Raises + ------ + ValueError + If any required projection-specific parameters are missing or if invalid parameters are provided. + NotImplementedError + If an unsupported parallel method or projection type is specified. + + Notes + ----- + The function dynamically creates an instance of a specific Nes subclass based on the provided projection. + The required parameters for each projection type are: + - None: ["lat", "lon"] + - "regular": ["lat_orig", "lon_orig", "inc_lat", "inc_lon", "n_lat", "n_lon"] + - "global": ["inc_lat", "inc_lon"] + - "rotated": ["centre_lat", "centre_lon", "west_boundary", "south_boundary", "inc_rlat", "inc_rlon"] + - "rotated-nested": ["parent_grid_path", "parent_ratio", "i_parent_start", "j_parent_start", "n_rlat", "n_rlon"] + - "lcc": ["lat_1", "lat_2", "lon_0", "lat_0", "nx", "ny", "inc_x", "inc_y", "x_0", "y_0"] + - "mercator": ["lat_ts", "lon_0", "nx", "ny", "inc_x", "inc_y", "x_0", "y_0"] + + Example + ------- + >>> nes = create_nes(projection="regular", lat_orig=0, lon_orig=0, inc_lat=1, inc_lon=1, n_lat=180, n_lon=360) + """ + + if comm is None: + comm = MPI.COMM_WORLD + else: + comm = comm + + # Create time array + if times is None: + units = "days since 1996-12-31 00:00:00" + calendar = "standard" + times = num2date([0], units=units, calendar=calendar) + times = [aux.replace(second=0, microsecond=0) for aux in times] + else: + if not isinstance(times, list): + times = list(times) + + # Check if the parameters that are required to create the object have been defined in kwargs + kwargs_list = [] + for name, value in kwargs.items(): + kwargs_list.append(name) + + if projection is None: + required_vars = ["lat", "lon"] + elif projection == "regular": + required_vars = ["lat_orig", "lon_orig", "inc_lat", "inc_lon", "n_lat", "n_lon"] + elif projection == "global": + required_vars = ["inc_lat", "inc_lon"] + elif projection == "rotated": + required_vars = ["centre_lat", "centre_lon", "west_boundary", "south_boundary", "inc_rlat", "inc_rlon"] + elif projection == "rotated-nested": + required_vars = ["parent_grid_path", "parent_ratio", "i_parent_start", "j_parent_start", "n_rlat", "n_rlon"] + elif projection == "lcc": + required_vars = ["lat_1", "lat_2", "lon_0", "lat_0", "nx", "ny", "inc_x", "inc_y", "x_0", "y_0"] + elif projection == "mercator": + required_vars = ["lat_ts", "lon_0", "nx", "ny", "inc_x", "inc_y", "x_0", "y_0"] + else: + raise ValueError("Unknown projection: {0}".format(projection)) + + for var in required_vars: + if var not in kwargs_list: + msg = "Variable {0} has not been defined. ".format(var) + msg += "For a {} projection, it is necessary to define {}".format(projection, required_vars) + raise ValueError(msg) + + for var in kwargs_list: + if var not in required_vars: + msg = "Variable {0} has been defined. ".format(var) + msg += "For a {} projection, you can only define {}".format(projection, required_vars) + raise ValueError(msg) + + if projection is None: + if parallel_method == "Y": + warnings.warn("Parallel method cannot be 'Y' to create points NES. Setting it to 'X'") + sys.stderr.flush() + parallel_method = "X" + elif parallel_method == "T": + raise NotImplementedError("Parallel method T not implemented yet") + nessy = PointsNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, balanced=balanced, + create_nes=True, times=times, **kwargs) + elif projection in ["regular", "global"]: + nessy = LatLonNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, balanced=balanced, + create_nes=True, times=times, **kwargs) + elif projection == "rotated": + nessy = RotatedNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, balanced=balanced, + create_nes=True, times=times, **kwargs) + elif projection == "rotated-nested": + nessy = RotatedNestedNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, balanced=balanced, + create_nes=True, times=times, **kwargs) + elif projection == "lcc": + nessy = LCCNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, balanced=balanced, + create_nes=True, times=times, **kwargs) + elif projection == "mercator": + nessy = MercatorNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, balanced=balanced, + create_nes=True, times=times, **kwargs) + else: + raise NotImplementedError(projection) + + return nessy + + +def from_shapefile(path, method=None, parallel_method="Y", **kwargs): + """ + Create NES from shapefile data. + + 1. Create NES grid. + 2. Create shapefile for grid. + 3. Spatial join to add shapefile variables to NES variables. + + Parameters + ---------- + path : str + Path to shapefile. + method : str + Overlay method. Accepted values: ["nearest", "intersection", None]. + parallel_method : str + Indicates the parallelization method that you want. Default: "Y". + accepted values: ["X", "Y", "T"]. + """ + + # Create NES + nessy = create_nes(comm=None, info=False, parallel_method=parallel_method, **kwargs) + + # Create shapefile for grid + nessy.create_shapefile() + + # Make spatial join + nessy.spatial_join(path, method=method) + + return nessy diff --git a/build/lib/nes/load_nes.py b/build/lib/nes/load_nes.py new file mode 100644 index 0000000000000000000000000000000000000000..542b5837c7b28f0697a3658f4b743e59f24fe69f --- /dev/null +++ b/build/lib/nes/load_nes.py @@ -0,0 +1,340 @@ +#!/usr/bin/env python + +import os +import sys +from numpy import empty +from mpi4py import MPI +from netCDF4 import Dataset +from warnings import warn +from .nc_projections import RotatedNes, PointsNes, PointsNesGHOST, PointsNesProvidentia, LCCNes, LatLonNes, MercatorNes + +DIM_VAR_NAMES = ["lat", "latitude", "lat_bnds", "lon", "longitude", "lon_bnds", "time", "time_bnds", "lev", "level", + "cell_area", "crs", "rotated_pole", "x", "y", "rlat", "rlon", "Lambert_conformal", "mercator"] + + +def open_netcdf(path, comm=None, info=False, parallel_method="Y", avoid_first_hours=0, avoid_last_hours=0, + first_level=0, last_level=None, balanced=False): + """ + Open a netCDF file. + + Parameters + ---------- + path : str + Path to the NetCDF file to read. + comm : MPI.COMM + MPI communicator to use in that netCDF. Default: MPI.COMM_WORLD. + info : bool + Indicates if you want to print (stdout) the reading/writing steps. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + parallel_method : str + Indicates the parallelization method that you want. Default: "Y". + Accepted values: ["X", "Y", "T"] + balanced : bool + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + + Returns + ------- + Nes + A Nes object. Variables read in lazy mode (only metadata). + """ + + if comm is None: + comm = MPI.COMM_WORLD + else: + comm = comm + + if not os.path.exists(path): + raise FileNotFoundError(path) + + dataset = Dataset(path, format="NETCDF4", mode="r", parallel=False) + # Parallel is not needed for reading + # if comm.Get_size() == 1: + # dataset = Dataset(path, format="NETCDF4", mode="r", parallel=False) + # else: + # dataset = Dataset(path, format="NETCDF4", mode="r", parallel=True, comm=comm, info=MPI.Info()) + + if __is_rotated(dataset): + # Rotated grids + nessy = RotatedNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) + elif __is_points(dataset): + if parallel_method == "Y": + warn("Parallel method cannot be 'Y' to create points NES. Setting it to 'X'") + sys.stderr.flush() + parallel_method = "X" + if __is_points_ghost(dataset): + # Points - GHOST + nessy = PointsNesGHOST(comm=comm, dataset=dataset, info=info, + parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) + elif __is_points_providentia(dataset): + # Points - Providentia + nessy = PointsNesProvidentia(comm=comm, dataset=dataset, info=info, + parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=False, + balanced=balanced,) + else: + # Points - non-GHOST + nessy = PointsNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) + elif __is_lcc(dataset): + # Lambert conformal conic grids + nessy = LCCNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) + elif __is_mercator(dataset): + # Mercator grids + nessy = MercatorNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) + else: + # Regular grids + nessy = LatLonNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) + + return nessy + + +def __is_rotated(dataset): + """ + Check if the netCDF is in rotated pole projection or not. + + Parameters + ---------- + dataset : Dataset + netcdf4-python open dataset object. + + Returns + ------- + value : bool + Indicated if the netCDF is a rotated one. + """ + + if "rotated_pole" in dataset.variables.keys(): + return True + elif ("rlat" in dataset.dimensions) and ("rlon" in dataset.dimensions): + return True + else: + return False + + +def __is_points(dataset): + """ + Check if the netCDF is a points dataset in non-GHOST format or not. + + Parameters + ---------- + dataset : Dataset + netcdf4-python open dataset object. + + Returns + ------- + value : bool + Indicated if the netCDF is a points non-GHOST one. + """ + + if "station" in dataset.dimensions: + return True + else: + return False + + +def __is_points_ghost(dataset): + """ + Check if the netCDF is a points dataset in GHOST format or not. + + Parameters + ---------- + dataset : Dataset + netcdf4-python open dataset object. + + Returns + ------- + value : bool + Indicated if the netCDF is a points GHOST one. + """ + + if "N_flag_codes" in dataset.dimensions and "N_qa_codes" in dataset.dimensions: + return True + else: + return False + + +def __is_points_providentia(dataset): + """ + Check if the netCDF is a points dataset in Providentia format or not. + + Parameters + ---------- + dataset : Dataset + netcdf4-python open dataset object. + + Returns + ------- + value : bool + Indicated if the netCDF is a points Providentia one. + """ + + if (("grid_edge" in dataset.dimensions) and ("model_latitude" in dataset.dimensions) and + ("model_longitude" in dataset.dimensions)): + return True + else: + return False + + +def __is_lcc(dataset): + """ + Check if the netCDF is in Lambert Conformal Conic (LCC) projection or not. + + Parameters + ---------- + dataset : Dataset + netcdf4-python open dataset object. + + Returns + ------- + value : bool + Indicated if the netCDF is an LCC one. + """ + + if "Lambert_Conformal" in dataset.variables.keys() or "Lambert_conformal" in dataset.variables.keys(): + return True + else: + return False + + +def __is_mercator(dataset): + """ + Check if the netCDF is in Mercator projection or not. + + Parameters + ---------- + dataset : Dataset + netcdf4-python open dataset object. + + Returns + ------- + value : bool + Indicated if the netCDF is a Mercator one. + """ + + if "mercator" in dataset.variables.keys(): + return True + else: + return False + + +def concatenate_netcdfs(nessy_list, comm=None, info=False, parallel_method="Y", avoid_first_hours=0, avoid_last_hours=0, + first_level=0, last_level=None, balanced=False): + """ + Concatenate variables form different sources. + + Parameters + ---------- + nessy_list : list + A List of Nes objects or list of paths to concatenate. + comm : MPI.Comm + MPI Communicator. + info: bool + Indicates if you want to get reading/writing info. + parallel_method : str + Indicates the parallelization method that you want. Default: "Y". + accepted values: ["X", "Y", "T"]. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + + Returns + ------- + Nes + A Nes object with all the variables. + """ + if not isinstance(nessy_list, list): + raise AttributeError("You must pass a list of NES objects or paths.") + + if isinstance(nessy_list[0], str): + nessy_first = open_netcdf(nessy_list[0], + comm=comm, + parallel_method=parallel_method, + info=info, + avoid_first_hours=avoid_first_hours, + avoid_last_hours=avoid_last_hours, + first_level=first_level, + last_level=last_level, + balanced=balanced + ) + nessy_first.load() + else: + nessy_first = nessy_list[0] + for i, aux_nessy in enumerate(nessy_list[1:]): + if isinstance(aux_nessy, str): + nc_add = Dataset(filename=aux_nessy, mode="r") + for var_name, var_info in nc_add.variables.items(): + if var_name not in DIM_VAR_NAMES: + nessy_first.variables[var_name] = {} + var_dims = var_info.dimensions + # Read data in 4 dimensions + if len(var_dims) < 2: + data = var_info[:] + elif len(var_dims) == 2: + data = var_info[nessy_first.read_axis_limits["y_min"]:nessy_first.read_axis_limits["y_max"], + nessy_first.read_axis_limits["x_min"]:nessy_first.read_axis_limits["x_max"]] + data = data.reshape(1, 1, data.shape[-2], data.shape[-1]) + elif len(var_dims) == 3: + if "strlen" in var_dims: + data = var_info[nessy_first.read_axis_limits["y_min"]:nessy_first.read_axis_limits["y_max"], + nessy_first.read_axis_limits["x_min"]:nessy_first.read_axis_limits["x_max"], + :] + data_aux = empty(shape=(data.shape[0], data.shape[1]), dtype=object) + for lat_n in range(data.shape[0]): + for lon_n in range(data.shape[1]): + data_aux[lat_n, lon_n] = "".join( + data[lat_n, lon_n].tobytes().decode("ascii").replace("\x00", "")) + data = data_aux.reshape((1, 1, data_aux.shape[-2], data_aux.shape[-1])) + else: + data = var_info[nessy_first.read_axis_limits["t_min"]:nessy_first.read_axis_limits["t_max"], + nessy_first.read_axis_limits["y_min"]:nessy_first.read_axis_limits["y_max"], + nessy_first.read_axis_limits["x_min"]:nessy_first.read_axis_limits["x_max"]] + data = data.reshape(data.shape[-3], 1, data.shape[-2], data.shape[-1]) + elif len(var_dims) == 4: + data = var_info[nessy_first.read_axis_limits["t_min"]:nessy_first.read_axis_limits["t_max"], + nessy_first.read_axis_limits["z_min"]:nessy_first.read_axis_limits["z_max"], + nessy_first.read_axis_limits["y_min"]:nessy_first.read_axis_limits["y_max"], + nessy_first.read_axis_limits["x_min"]:nessy_first.read_axis_limits["x_max"]] + else: + raise TypeError("{} data shape is nto accepted".format(var_dims)) + + nessy_first.variables[var_name]["data"] = data + # Avoid some attributes + for attrname in var_info.ncattrs(): + if attrname not in ["missing_value", "_FillValue"]: + value = getattr(var_info, attrname) + if value in ["unitless", "-"]: + value = "" + nessy_first.variables[var_name][attrname] = value + nc_add.close() + + else: + nessy_first.concatenate(aux_nessy) + + return nessy_first diff --git a/build/lib/nes/methods/__init__.py b/build/lib/nes/methods/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..35b63462d5b404acdb58af6b19d83d92cf1ac7ba --- /dev/null +++ b/build/lib/nes/methods/__init__.py @@ -0,0 +1,8 @@ +from .vertical_interpolation import add_4d_vertical_info +from .vertical_interpolation import interpolate_vertical +from .horizontal_interpolation import interpolate_horizontal +from .spatial_join import spatial_join + +__all__ = [ + 'add_4d_vertical_info', 'interpolate_vertical', 'interpolate_horizontal', 'spatial_join' +] diff --git a/build/lib/nes/methods/cell_measures.py b/build/lib/nes/methods/cell_measures.py new file mode 100644 index 0000000000000000000000000000000000000000..185d0331506668b336a367052ec7eb7c60b8aaf0 --- /dev/null +++ b/build/lib/nes/methods/cell_measures.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python +from numpy import empty, newaxis, array, arcsin, tan, fabs, arctan, sqrt, radians, cos, sin, column_stack +from copy import deepcopy + + +def calculate_grid_area(self): + """ + Get coordinate bounds and call function to calculate the area of each cell of a grid. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + """ + + # Create bounds if they do not exist + if self.lat_bnds is None or self.lon_bnds is None: + self.create_spatial_bounds() + + # Get spatial number of vertices + spatial_nv = self.lat_bnds["data"].shape[-1] + + # Reshape bounds + if spatial_nv == 2: + + aux_shape = (self.lat_bnds["data"].shape[0], self.lon_bnds["data"].shape[0], 4) + lon_bnds_aux = empty(aux_shape) + lon_bnds_aux[:, :, 0] = self.lon_bnds["data"][newaxis, :, 0] + lon_bnds_aux[:, :, 1] = self.lon_bnds["data"][newaxis, :, 1] + lon_bnds_aux[:, :, 2] = self.lon_bnds["data"][newaxis, :, 1] + lon_bnds_aux[:, :, 3] = self.lon_bnds["data"][newaxis, :, 0] + + lon_bnds = lon_bnds_aux + del lon_bnds_aux + + lat_bnds_aux = empty(aux_shape) + lat_bnds_aux[:, :, 0] = self.lat_bnds["data"][:, newaxis, 0] + lat_bnds_aux[:, :, 1] = self.lat_bnds["data"][:, newaxis, 0] + lat_bnds_aux[:, :, 2] = self.lat_bnds["data"][:, newaxis, 1] + lat_bnds_aux[:, :, 3] = self.lat_bnds["data"][:, newaxis, 1] + + lat_bnds = lat_bnds_aux + del lat_bnds_aux + + else: + lon_bnds = self.lon_bnds["data"] + lat_bnds = self.lat_bnds["data"] + + # Reshape bounds and assign as grid corner coordinates + grid_corner_lon = deepcopy(lon_bnds).reshape(lon_bnds.shape[0]*lon_bnds.shape[1], + lon_bnds.shape[2]) + grid_corner_lat = deepcopy(lat_bnds).reshape(lat_bnds.shape[0]*lat_bnds.shape[1], + lat_bnds.shape[2]) + + # Calculate cell areas + grid_area = calculate_cell_area(grid_corner_lon, grid_corner_lat, + earth_radius_minor_axis=self.earth_radius[0], + earth_radius_major_axis=self.earth_radius[1]) + + return grid_area + + +def calculate_geometry_area(geometry_list, earth_radius_minor_axis=6356752.3142, + earth_radius_major_axis=6378137.0): + """ + Get coordinate bounds and call function to calculate the area of each cell of a set of geometries. + + Parameters + ---------- + geometry_list : List + A List with polygon geometries. + earth_radius_minor_axis : float + Radius of the minor axis of the Earth. + earth_radius_major_axis : float + Radius of the major axis of the Earth. + """ + + geometry_area = empty(shape=(len(geometry_list,))) + + for geom_ind in range(0, len(geometry_list)): + + # Calculate the area of each geometry in multipolygon and collection objects + if geometry_list[geom_ind].geom_type in ["MultiPolygon", "GeometryCollection"]: + multi_geom_area = 0 + for multi_geom_ind in range(0, len(geometry_list[geom_ind].geoms)): + if geometry_list[geom_ind].geoms[multi_geom_ind].geom_type == "Point": + continue + geometry_corner_lon, geometry_corner_lat = ( + geometry_list[geom_ind].geoms[multi_geom_ind].exterior.coords.xy) + geometry_corner_lon = array(geometry_corner_lon) + geometry_corner_lat = array(geometry_corner_lat) + geom_area = __mod_huiliers_area(geometry_corner_lon, geometry_corner_lat) + multi_geom_area += geom_area + geometry_area[geom_ind] = multi_geom_area * earth_radius_minor_axis * earth_radius_major_axis + + # Calculate the area of each geometry + else: + geometry_corner_lon, geometry_corner_lat = geometry_list[geom_ind].exterior.coords.xy + geometry_corner_lon = array(geometry_corner_lon) + geometry_corner_lat = array(geometry_corner_lat) + geom_area = __mod_huiliers_area(geometry_corner_lon, geometry_corner_lat) + geometry_area[geom_ind] = geom_area * earth_radius_minor_axis * earth_radius_major_axis + + return geometry_area + + +def calculate_cell_area(grid_corner_lon, grid_corner_lat, + earth_radius_minor_axis=6356752.3142, earth_radius_major_axis=6378137.0): + """ + Calculate the area of each cell of a grid. + + Parameters + ---------- + grid_corner_lon : array + An Array with longitude bounds of grid. + grid_corner_lat : array + An Array with longitude bounds of grid. + earth_radius_minor_axis : float + Radius of the minor axis of the Earth. + earth_radius_major_axis : float + Radius of the major axis of the Earth. + """ + + # Calculate area for each grid cell + n_cells = grid_corner_lon.shape[0] + area = empty(shape=(n_cells,)) + for i in range(0, n_cells): + area[i] = __mod_huiliers_area(grid_corner_lon[i], grid_corner_lat[i]) + + return area*earth_radius_minor_axis*earth_radius_major_axis + + +def __mod_huiliers_area(cell_corner_lon, cell_corner_lat): + """ + Calculate the area of each cell according to Huilier's theorem. + Reference: CDO (https://earth.bsc.es/gitlab/ces/cdo/). + + Parameters + ---------- + cell_corner_lon : array + Longitude boundaries of each cell. + cell_corner_lat : array + Latitude boundaries of each cell. + """ + + my_sum = 0 + + # Get points 0 (bottom left) and 1 (bottom right) in Earth coordinates + point_0 = __lon_lat_to_cartesian(cell_corner_lon[0], cell_corner_lat[0], earth_radius_major_axis=1) + point_1 = __lon_lat_to_cartesian(cell_corner_lon[1], cell_corner_lat[1], earth_radius_major_axis=1) + point_0, point_1 = point_0[0], point_1[0] + + # Get number of vertices + if cell_corner_lat[0] == cell_corner_lat[-1]: + spatial_nv = len(cell_corner_lon) - 1 + else: + spatial_nv = len(cell_corner_lon) + + for i in range(2, spatial_nv): + + # Get point 2 (top right) in Earth coordinates + point_2 = __lon_lat_to_cartesian(cell_corner_lon[i], cell_corner_lat[i], earth_radius_major_axis=1) + point_2 = point_2[0] + + # Calculate area of triangle between points 0, 1 and 2 + my_sum += __tri_area(point_0, point_1, point_2) + + # Copy to calculate area of next triangle + if i == (spatial_nv - 1): + point_1 = deepcopy(point_2) + + return my_sum + + +def __tri_area(point_0, point_1, point_2): + """ + Calculate area between three points that form a triangle. + Reference: CDO (https://earth.bsc.es/gitlab/ces/cdo/). + + Parameters + ---------- + point_0 : array + Position of first point in cartesian coordinates. + point_1 : array + Position of second point in cartesian coordinates. + point_2 : array + Position of third point in cartesian coordinates. + """ + + # Get length of side a (between point 0 and 1) + tmp_vec = __cross_product(point_0, point_1) + sin_a = __norm(tmp_vec) + a = arcsin(sin_a) + + # Get length of side b (between point 0 and 2) + tmp_vec = __cross_product(point_0, point_2) + sin_b = __norm(tmp_vec) + b = arcsin(sin_b) + + # Get length of side c (between point 1 and 2) + tmp_vec = __cross_product(point_2, point_1) + sin_c = __norm(tmp_vec) + c = arcsin(sin_c) + + # Calculate area + s = 0.5*(a+b+c) + t = tan(s*0.5) * tan((s - a)*0.5) * tan((s - b)*0.5) * tan((s - c)*0.5) + area = fabs(4.0 * arctan(sqrt(fabs(t)))) + + return area + + +def __cross_product(a, b): + """ + Calculate cross product between two points. + + Parameters + ---------- + a : array + Position of point A in cartesian coordinates. + b : array + Position of point B in cartesian coordinates. + """ + + return [a[1]*b[2] - a[2]*b[1], + a[2]*b[0] - a[0]*b[2], + a[0]*b[1] - a[1]*b[0]] + + +def __norm(cp): + """ + Normalize the result of the cross product operation. + + Parameters + ---------- + cp : array + Cross product between two points. + """ + + return sqrt(cp[0]*cp[0] + cp[1]*cp[1] + cp[2]*cp[2]) + + +# noinspection DuplicatedCode +def __lon_lat_to_cartesian(lon, lat, earth_radius_major_axis=6378137.0): + """ + Calculate lon, lat coordinates of a point on a sphere. + + Parameters + ---------- + lon : array + Longitude values. + lat : array + Latitude values. + earth_radius_major_axis : float + Radius of the major axis of the Earth. + """ + + lon_r = radians(lon) + lat_r = radians(lat) + + x = earth_radius_major_axis * cos(lat_r) * cos(lon_r) + y = earth_radius_major_axis * cos(lat_r) * sin(lon_r) + z = earth_radius_major_axis * sin(lat_r) + + return column_stack([x, y, z]) diff --git a/build/lib/nes/methods/horizontal_interpolation.py b/build/lib/nes/methods/horizontal_interpolation.py new file mode 100644 index 0000000000000000000000000000000000000000..25efef619990c478d55226474f09a6165ae06537 --- /dev/null +++ b/build/lib/nes/methods/horizontal_interpolation.py @@ -0,0 +1,762 @@ +#!/usr/bin/env python + +import sys +import os +import nes +from warnings import warn, filterwarnings +from numpy import (ma, empty, nansum, concatenate, pad, nan, array, float64, int64, float32, meshgrid, expand_dims, + reciprocal, arange, uint32, array_split, radians, cos, sin, column_stack, zeros) +from pandas import concat, DataFrame +from mpi4py import MPI +from scipy import spatial +from filelock import FileLock +from datetime import datetime +from copy import deepcopy +from pyproj import Proj, Transformer, CRS +import gc + +# CONSTANTS +NEAREST_OPTS = ["NearestNeighbour", "NearestNeighbours", "nn", "NN"] +CONSERVATIVE_OPTS = ["Conservative", "Area_Conservative", "cons", "conservative", "area"] + + +def interpolate_horizontal(self, dst_grid, weight_matrix_path=None, kind="NearestNeighbour", n_neighbours=4, + info=False, to_providentia=False, only_create_wm=False, wm=None, flux=False): + """ + Horizontal methods from one grid to another one. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + dst_grid : nes.Nes + Final projection Nes object. + weight_matrix_path : str, None + Path to the weight matrix to read/create. + kind : str + Kind of horizontal interpolation. Accepted values: ["NearestNeighbour", "Conservative"]. + n_neighbours : int + Used if kind == NearestNeighbour. Number of nearest neighbours to interpolate. Default: 4. + info : bool + Indicates if you want to print extra info during the methods process. + to_providentia : bool + Indicates if we want the interpolated grid in Providentia format. + only_create_wm : bool + Indicates if you want to only create the Weight Matrix. + wm : Nes + Weight matrix Nes File. + flux : bool + Indicates if you want to calculate the weight matrix for flux variables. + """ + if info and self.master: + print("Creating Weight Matrix") + + # Obtain weight matrix + if self.parallel_method == "T": + weights, idx = __get_weights_idx_t_axis(self, dst_grid, weight_matrix_path, kind, n_neighbours, + only_create_wm, wm, flux) + elif self.parallel_method in ["Y", "X"]: + weights, idx = __get_weights_idx_xy_axis(self, dst_grid, weight_matrix_path, kind, n_neighbours, + only_create_wm, wm, flux) + else: + raise NotImplementedError("Parallel method {0} is not implemented yet for horizontal interpolations.".format( + self.parallel_method) + "Use 'T'") + + if info and self.master: + print("Weight Matrix done!") + if only_create_wm: + # weights for only_create is the WM NES object + return weights + + # idx[idx < 0] = nan + idx = ma.masked_array(idx, mask=idx == -999) + # idx = array(idx, dtype=float) + # idx[idx < 0] = nan + # weights[weights < 0] = nan + weights = ma.masked_array(weights, mask=weights == -999) + # weights = array(weights, dtype=float) + # weights[weights < 0] = nan + + # Copy NES + final_dst = dst_grid.copy() + + sys.stdout.flush() + final_dst.set_communicator(dst_grid.comm) + + # Remove original file information + final_dst.__ini_path = None + final_dst.netcdf = None + final_dst.dataset = None + + # Return final_dst + final_dst.lev = self.lev + final_dst.set_full_levels(self.get_full_levels()) + final_dst.time = self.time + final_dst.set_full_times(self.get_full_times()) + final_dst.hours_start = self.hours_start + final_dst.hours_end = self.hours_end + + if info and self.master: + print("Applying weights") + # Apply weights + for var_name, var_info in self.variables.items(): + if info and self.master: + print("\t{var} horizontal interpolation".format(var=var_name)) + sys.stdout.flush() + src_shape = var_info["data"].shape + if isinstance(dst_grid, nes.PointsNes): + dst_shape = (src_shape[0], src_shape[1], idx.shape[-1]) + else: + dst_shape = (src_shape[0], src_shape[1], idx.shape[-2], idx.shape[-1]) + # Creating new variable without data + final_dst.variables[var_name] = {attr_name: attr_value for attr_name, attr_value in var_info.items() + if attr_name != "data"} + # Creating empty data + final_dst.variables[var_name]["data"] = empty(dst_shape) + + # src_data = var_info["data"].reshape((src_shape[0], src_shape[1], src_shape[2] * src_shape[3])) + for time in range(dst_shape[0]): + for lev in range(dst_shape[1]): + src_aux = __get_src_data(self.comm, var_info["data"][time, lev], idx, self.parallel_method) + final_dst.variables[var_name]["data"][time, lev] = nansum(weights * src_aux, axis=1) + + if isinstance(dst_grid, nes.PointsNes): + # Removing level axis + if src_shape[1] != 1: + raise IndexError("Data with vertical levels cannot be interpolated to points") + final_dst.variables[var_name]["data"] = final_dst.variables[var_name]["data"].reshape( + (src_shape[0], idx.shape[-1])) + if isinstance(dst_grid, nes.PointsNesGHOST) and not to_providentia: + final_dst = final_dst.to_points() + + final_dst.global_attrs = self.global_attrs + + if info and self.master: + print("Formatting") + + if to_providentia: + # self = experiment to interpolate (regular, rotated, etc.) + # final_dst = interpolated experiment (points) + if isinstance(final_dst, nes.PointsNes): + model_centre_lat, model_centre_lon = self.create_providentia_exp_centre_coordinates() + grid_edge_lat, grid_edge_lon = self.create_providentia_exp_grid_edge_coordinates() + final_dst = final_dst.to_providentia(model_centre_lon=model_centre_lon, + model_centre_lat=model_centre_lat, + grid_edge_lon=grid_edge_lon, + grid_edge_lat=grid_edge_lat) + else: + msg = "The final projection must be points to interpolate an experiment and get it in Providentia format." + warn(msg) + sys.stderr.flush() + else: + # Convert dimensions (time, lev, lat, lon) or (time, lat, lon) to (time, station) for interpolated variables + # and reshape data + if isinstance(final_dst, nes.PointsNes): + for var_name, var_info in final_dst.variables.items(): + if len(var_info["dimensions"]) != len(var_info["data"].shape): + final_dst.variables[var_name]["dimensions"] = ("time", "station") + + return final_dst + + +def __get_src_data(comm, var_data, idx, parallel_method): + """ + To obtain the needed src data to interpolate. + + Parameters + ---------- + comm : MPI.Comm. + MPI communicator. + var_data : array + Rank source data. + idx : array + Index of the needed data in a 2D flatten way. + parallel_method: str + Source parallel method. + + Returns + ------- + array + Flatten source needed data. + """ + + if parallel_method == "T": + var_data = var_data.flatten() + else: + var_data = comm.gather(var_data, root=0) + if comm.Get_rank() == 0: + if parallel_method == "Y": + axis = 0 + elif parallel_method == "X": + axis = 1 + else: + raise NotImplementedError(parallel_method) + var_data = concatenate(var_data, axis=axis) + var_data = var_data.flatten() + + var_data = comm.bcast(var_data) + + var_data = pad(var_data, [1, 1], "constant", constant_values=nan).take(idx + 1, mode="clip") + + return var_data + + +# noinspection DuplicatedCode +def __get_weights_idx_t_axis(self, dst_grid, weight_matrix_path, kind, n_neighbours, only_create, wm, flux): + """ + To obtain the weights and source data index through the T axis. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + dst_grid : nes.Nes + Final projection Nes object. + weight_matrix_path : str, None + Path to the weight matrix to read/create. + kind : str + Kind of horizontal interpolation. Accepted values: ["NearestNeighbour", "Conservative"]. + n_neighbours : int + Used if kind == NearestNeighbour. Number of nearest neighbours to interpolate. Default: 4. + only_create : bool + Indicates if you want to only create the Weight Matrix. + wm : Nes + Weight matrix Nes File. + flux : bool + Indicates if you want to calculate the weight matrix for flux variables. + + Returns + ------- + tuple + Weights and source data index. + """ + weight_matrix = None + + if wm is not None: + weight_matrix = wm + + elif weight_matrix_path is not None: + with FileLock(weight_matrix_path + "{0:03d}.lock".format(self.rank)): + if os.path.isfile(weight_matrix_path): + if self.master: + weight_matrix = __read_weight_matrix(weight_matrix_path, comm=MPI.COMM_SELF) + else: + weight_matrix = True + if kind in NEAREST_OPTS: + if self.master: + if len(weight_matrix.lev["data"]) != n_neighbours: + warn("The selected weight matrix does not have the same number of nearest neighbours." + + "Re-calculating again but not saving it.") + sys.stderr.flush() + weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours) + else: + weight_matrix = True + + else: + if self.master: + if kind in NEAREST_OPTS: + weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours, + wm_path=weight_matrix_path) + elif kind in CONSERVATIVE_OPTS: + weight_matrix = __create_area_conservative_weight_matrix( + self, dst_grid, wm_path=weight_matrix_path, flux=flux) + else: + raise NotImplementedError(kind) + else: + weight_matrix = True + + if os.path.exists(weight_matrix_path + "{0:03d}.lock".format(self.rank)): + os.remove(weight_matrix_path + "{0:03d}.lock".format(self.rank)) + else: + if self.master: + if kind in NEAREST_OPTS: + weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours) + elif kind in CONSERVATIVE_OPTS: + weight_matrix = __create_area_conservative_weight_matrix(self, dst_grid, flux=flux) + else: + raise NotImplementedError(kind) + else: + weight_matrix = True + + if only_create: + return weight_matrix, None + + if self.master: + if kind in NEAREST_OPTS: + # Normalize to 1 + weights = array(array(weight_matrix.variables["weight"]["data"], dtype=float64) / + array(weight_matrix.variables["weight"]["data"], dtype=float64).sum(axis=1), + dtype=float64) + else: + weights = array(weight_matrix.variables["weight"]["data"], dtype=float64) + idx = array(weight_matrix.variables["idx"]["data"][0], dtype=int) + else: + weights = None + idx = None + + weights = self.comm.bcast(weights, root=0) + idx = self.comm.bcast(idx, root=0) + + return weights, idx + + +# noinspection DuplicatedCode +def __get_weights_idx_xy_axis(self, dst_grid, weight_matrix_path, kind, n_neighbours, only_create, wm, flux): + """ + To obtain the weights and source data index through the X or Y axis. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + dst_grid : nes.Nes + Final projection Nes object. + weight_matrix_path : str, None + Path to the weight matrix to read/create. + kind : str + Kind of horizontal interpolation. Accepted values: ["NearestNeighbour", "Conservative"]. + n_neighbours : int + Used if kind == NearestNeighbour. Number of nearest neighbours to interpolate. Default: 4. + only_create : bool + Indicates if you want to only create the Weight Matrix. + wm : Nes + Weight matrix Nes File. + flux : bool + Indicates if you want to calculate the weight matrix for flux variables. + + Returns + ------- + tuple + Weights and source data index. + """ + weight_matrix = None + + if isinstance(dst_grid, nes.PointsNes) and weight_matrix_path is not None: + if self.master: + warn("To point weight matrix cannot be saved.") + sys.stderr.flush() + weight_matrix_path = None + + if wm is not None: + weight_matrix = wm + + elif weight_matrix_path is not None: + with FileLock(weight_matrix_path + "{0:03d}.lock".format(self.rank)): + if os.path.isfile(weight_matrix_path): + if self.master: + weight_matrix = __read_weight_matrix(weight_matrix_path, comm=MPI.COMM_SELF) + else: + weight_matrix = True + if kind in NEAREST_OPTS: + if self.master: + if len(weight_matrix.lev["data"]) != n_neighbours: + warn("The selected weight matrix does not have the same number of nearest neighbours." + + "Re-calculating again but not saving it.") + sys.stderr.flush() + weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours) + else: + weight_matrix = True + else: + if kind in NEAREST_OPTS: + if self.master: + weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours, + wm_path=weight_matrix_path) + else: + weight_matrix = True + elif kind in CONSERVATIVE_OPTS: + weight_matrix = __create_area_conservative_weight_matrix( + self, dst_grid, wm_path=weight_matrix_path, flux=flux) + else: + raise NotImplementedError(kind) + + if os.path.exists(weight_matrix_path + "{0:03d}.lock".format(self.rank)): + os.remove(weight_matrix_path + "{0:03d}.lock".format(self.rank)) + else: + if kind in NEAREST_OPTS: + if self.master: + weight_matrix = __create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours) + else: + weight_matrix = True + elif kind in CONSERVATIVE_OPTS: + weight_matrix = __create_area_conservative_weight_matrix(self, dst_grid, flux=flux) + else: + raise NotImplementedError(kind) + + if only_create: + return weight_matrix, None + + # Normalize to 1 + if self.master: + if kind in NEAREST_OPTS: + weights = array(array(weight_matrix.variables["weight"]["data"], dtype=float64) / + array(weight_matrix.variables["weight"]["data"], dtype=float64).sum(axis=1), + dtype=float64) + else: + weights = array(weight_matrix.variables["weight"]["data"], dtype=float64) + idx = array(weight_matrix.variables["idx"]["data"][0], dtype=int64) + else: + weights = None + idx = None + + weights = self.comm.bcast(weights, root=0) + idx = self.comm.bcast(idx, root=0) + + # if isinstance(dst_grid, nes.PointsNes): + # print("weights 1 ->", weights.shape) + # print("idx 1 ->", idx.shape) + # weights = weights[:, dst_grid.write_axis_limits["x_min"]:dst_grid.write_axis_limits["x_max"]] + # idx = idx[dst_grid.write_axis_limits["x_min"]:dst_grid.write_axis_limits["x_max"]] + # else: + weights = weights[:, :, dst_grid.write_axis_limits["y_min"]:dst_grid.write_axis_limits["y_max"], + dst_grid.write_axis_limits["x_min"]:dst_grid.write_axis_limits["x_max"]] + idx = idx[:, dst_grid.write_axis_limits["y_min"]:dst_grid.write_axis_limits["y_max"], + dst_grid.write_axis_limits["x_min"]:dst_grid.write_axis_limits["x_max"]] + # print("weights 2 ->", weights.shape) + # print("idx 2 ->", idx.shape) + + return weights, idx + + +def __read_weight_matrix(weight_matrix_path, comm=None, parallel_method="T"): + """ + Read weight matrix. + + Parameters + ---------- + weight_matrix_path : str + Path of the weight matrix. + comm : MPI.Comm + A Communicator to read the weight matrix. + parallel_method : str + Nes parallel method to read the weight matrix. + + Returns + ------- + nes.Nes + Weight matrix. + """ + + weight_matrix = nes.open_netcdf(path=weight_matrix_path, comm=comm, parallel_method=parallel_method, balanced=True) + weight_matrix.load() + + # In previous versions of NES weight was called inverse_dists + if "inverse_dists" in weight_matrix.variables.keys(): + weight_matrix.variables["weight"] = weight_matrix.variables["inverse_dists"] + + weight_matrix.variables["weight"]["data"][weight_matrix.variables["weight"]["data"] <= 0] = nan + weight_matrix.variables["weight"]["data"][weight_matrix.variables["idx"]["data"] <= 0] = nan + + return weight_matrix + + +# noinspection DuplicatedCode,PyProtectedMember +def __create_nn_weight_matrix(self, dst_grid, n_neighbours=4, wm_path=None, info=False): + """ + To create the weight matrix with the nearest neighbours method. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + dst_grid : nes.Nes + Final projection Nes object. + n_neighbours : int + Used if kind == NearestNeighbour. Number of nearest neighbours to interpolate. Default: 4. + wm_path : str + Path where write the weight matrix. + info: bool + Indicates if you want to print extra info during the methods process. + + Returns + ------- + nes.Nes + Weight matrix. + """ + # Only master is here. + if info and self.master: + print("\tCreating Nearest Neighbour Weight Matrix with {0} neighbours".format(n_neighbours)) + sys.stdout.flush() + # Source + src_lat = array(self._full_lat["data"], dtype=float32) + src_lon = array(self._full_lon["data"], dtype=float32) + + # 1D to 2D coordinates + if len(src_lon.shape) == 1: + src_lon, src_lat = meshgrid(src_lon, src_lat) + + # Destination + dst_lat = array(dst_grid._full_lat["data"], dtype=float32) + dst_lon = array(dst_grid._full_lon["data"], dtype=float32) + + if isinstance(dst_grid, nes.PointsNes): + dst_lat = expand_dims(dst_grid._full_lat["data"], axis=0) + dst_lon = expand_dims(dst_grid._full_lon["data"], axis=0) + else: + # 1D to 2D coordinates + if len(dst_lon.shape) == 1: + dst_lon, dst_lat = meshgrid(dst_lon, dst_lat) + + # calculate N nearest neighbour inverse distance weights (and indices) + # from gridcells centres of model 1 to each grid cell centre of model 2 + # model geographic longitude/latitude coordinates are first converted + # to cartesian ECEF (Earth Centred, Earth Fixed) coordinates, before + # calculating distances. + + # src_mod_xy = lon_lat_to_cartesian(src_lon.flatten(), src_lat.flatten()) + # dst_mod_xy = lon_lat_to_cartesian(dst_lon.flatten(), dst_lat.flatten()) + + src_mod_xy = __lon_lat_to_cartesian_ecef(src_lon.flatten(), src_lat.flatten()) + dst_mod_xy = __lon_lat_to_cartesian_ecef(dst_lon.flatten(), dst_lat.flatten()) + + # generate KDtree using model 1 coordinates (i.e. the model grid you are + # interpolating from) + src_tree = spatial.cKDTree(src_mod_xy) + + # get n-neighbour nearest distances/indices (ravel form) of model 1 grid cell + # centres from each model 2 grid cell centre + + dists, idx = src_tree.query(dst_mod_xy, k=n_neighbours) + # self.nearest_neighbour_inds = \ + # column_stack(unravel_index(idx, lon.shape)) + + weight_matrix = dst_grid.copy() + weight_matrix.time = [datetime(year=2000, month=1, day=1, hour=0, second=0, microsecond=0)] + weight_matrix._full_time = [datetime(year=2000, month=1, day=1, hour=0, second=0, microsecond=0)] + weight_matrix._full_time_bnds = None + weight_matrix.time_bnds = None + weight_matrix.last_level = None + weight_matrix.first_level = 0 + weight_matrix.hours_start = 0 + weight_matrix.hours_end = 0 + + weight_matrix.set_communicator(MPI.COMM_SELF) + # take the reciprocals of the nearest neighbours distances + dists[dists < 1] = 1 + inverse_dists = reciprocal(dists) + + inverse_dists_transf = inverse_dists.T.reshape((1, n_neighbours, dst_lon.shape[0], dst_lon.shape[1])) + weight_matrix.variables["weight"] = {"data": inverse_dists_transf, "units": "m"} + idx_transf = idx.T.reshape((1, n_neighbours, dst_lon.shape[0], dst_lon.shape[1])) + weight_matrix.variables["idx"] = {"data": idx_transf, "units": ""} + weight_matrix.lev = {"data": arange(inverse_dists_transf.shape[1]), "units": ""} + weight_matrix._full_lev = {"data": arange(inverse_dists_transf.shape[1]), "units": ""} + if wm_path is not None: + weight_matrix.to_netcdf(wm_path) + + return weight_matrix + + +# noinspection DuplicatedCode +def __create_area_conservative_weight_matrix(self, dst_nes, wm_path=None, flux=False, info=False): + """ + To create the weight matrix with the area conservative method. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + dst_nes : nes.Nes + Final projection Nes object. + wm_path : str + Path where write the weight matrix. + flux : bool + Indicates if you want to calculate the weight matrix for flux variables. + info: bool + Indicates if you want to print extra info during the methods process. + + Returns + ------- + nes.Nes + Weight matrix. + """ + + if info and self.master: + print("\tCreating area conservative Weight Matrix") + sys.stdout.flush() + + my_crs = CRS.from_proj4("+proj=latlon") # Common projection for both shapefiles + + # Get a portion of the destiny grid + if dst_nes.shapefile is None: + dst_nes.create_shapefile() + dst_grid = deepcopy(dst_nes.shapefile) + + # Formatting Destination grid + dst_grid.to_crs(crs=my_crs, inplace=True) + dst_grid["FID_dst"] = dst_grid.index + + # Preparing Source grid + if self.shapefile is None: + self.create_shapefile() + src_grid = deepcopy(self.shapefile) + + # Formatting Source grid + src_grid.to_crs(crs=my_crs, inplace=True) + + # Serialize index intersection function to avoid memory problems + if self.size > 1 and self.parallel_method != "T": + src_grid = self.comm.gather(src_grid, root=0) + dst_grid = self.comm.gather(dst_grid, root=0) + if self.master: + src_grid = concat(src_grid) + dst_grid = concat(dst_grid) + if self.master: + src_grid["FID_src"] = src_grid.index + src_grid = src_grid.reset_index() + dst_grid = dst_grid.reset_index() + fid_src, fid_dst = dst_grid.sindex.query(src_grid.geometry, predicate="intersects") + + # Calculate intersected areas and fractions + intersection_df = DataFrame(columns=["FID_src", "FID_dst"]) + + intersection_df["FID_src"] = array(src_grid.loc[fid_src, "FID_src"], dtype=uint32) + intersection_df["FID_dst"] = array(dst_grid.loc[fid_dst, "FID_dst"], dtype=uint32) + + intersection_df["geometry_src"] = src_grid.loc[fid_src, "geometry"].values + intersection_df["geometry_dst"] = dst_grid.loc[fid_dst, "geometry"].values + del src_grid, dst_grid, fid_src, fid_dst + # Split the array into smaller arrays in order to scatter the data among the processes + intersection_df = array_split(intersection_df, self.size) + else: + intersection_df = None + + intersection_df = self.comm.scatter(intersection_df, root=0) + + if info and self.master: + print("\t\tGrids created and ready to interpolate") + sys.stdout.flush() + if True: + # No Warnings Zone + filterwarnings("ignore") + # intersection_df["weight"] = array(intersection_df.apply( + # lambda x: x["geometry_src"].intersection(x["geometry_dst"]).buffer(0).area / x["geometry_src"].area, + # axis=1), dtype=float64) + if flux: + intersection_df["weight"] = array(intersection_df.apply( + lambda x: (x["geometry_src"].intersection(x["geometry_dst"]).buffer(0).area / x["geometry_src"].area) * + (nes.Nes.calculate_geometry_area([x["geometry_src"]])[0] / + nes.Nes.calculate_geometry_area([x["geometry_dst"]])[0]), + axis=1), dtype=float64) + else: + intersection_df["weight"] = array(intersection_df.apply( + lambda x: x["geometry_src"].intersection(x["geometry_dst"]).buffer(0).area / x["geometry_src"].area, + axis=1), dtype=float64) + + intersection_df.drop(columns=["geometry_src", "geometry_dst"], inplace=True) + gc.collect() + filterwarnings("default") + + # Format & Clean + if info and self.master: + print("\t\tWeights calculated. Formatting weight matrix.") + sys.stdout.flush() + + # Initialising weight matrix + if self.parallel_method != "T": + intersection_df = self.comm.gather(intersection_df, root=0) + if self.master: + if self.parallel_method != "T": + intersection_df = concat(intersection_df) + intersection_df = intersection_df.set_index( + ["FID_dst", intersection_df.groupby("FID_dst").cumcount()]).rename_axis(("FID", "level")).sort_index() + intersection_df.rename(columns={"FID_src": "idx"}, inplace=True) + weight_matrix = dst_nes.copy() + weight_matrix.time = [datetime(year=2000, month=1, day=1, hour=0, second=0, microsecond=0)] + weight_matrix._full_time = [datetime(year=2000, month=1, day=1, hour=0, second=0, microsecond=0)] + weight_matrix._full_time_bnds = None + weight_matrix.time_bnds = None + weight_matrix.last_level = None + weight_matrix.first_level = 0 + weight_matrix.hours_start = 0 + weight_matrix.hours_end = 0 + + weight_matrix.set_communicator(MPI.COMM_SELF) + + weight_matrix.set_levels({"data": arange(intersection_df.index.get_level_values("level").max() + 1), + "dimensions": ("lev",), + "units": "", + "positive": "up"}) + + # Creating Weight matrix empty variables + wm_shape = weight_matrix.get_full_shape() + shape = (1, len(weight_matrix.lev["data"]), wm_shape[0], wm_shape[1],) + shape_flat = (1, len(weight_matrix.lev["data"]), wm_shape[0] * wm_shape[1],) + + weight_matrix.variables["weight"] = {"data": empty(shape_flat), "units": "-"} + weight_matrix.variables["weight"]["data"][:] = -999 + weight_matrix.variables["idx"] = {"data": empty(shape_flat), "units": "-"} + weight_matrix.variables["idx"]["data"][:] = -999 + + # Filling Weight matrix variables + for aux_lev in weight_matrix.lev["data"]: + aux_data = intersection_df.xs(level="level", key=aux_lev) + weight_matrix.variables["weight"]["data"][0, aux_lev, aux_data.index] = aux_data.loc[:, "weight"].values + weight_matrix.variables["idx"]["data"][0, aux_lev, aux_data.index] = aux_data.loc[:, "idx"].values + # Re-shaping + weight_matrix.variables["weight"]["data"] = weight_matrix.variables["weight"]["data"].reshape(shape) + weight_matrix.variables["idx"]["data"] = weight_matrix.variables["idx"]["data"].reshape(shape) + if wm_path is not None: + if info and self.master: + print("\t\tWeight matrix saved at {0}".format(wm_path)) + sys.stdout.flush() + weight_matrix.to_netcdf(wm_path) + else: + weight_matrix = True + return weight_matrix + + +# noinspection DuplicatedCode +def __lon_lat_to_cartesian(lon, lat, radius=6378137.0): + """ + Calculate lon, lat coordinates of a point on a sphere. + + DEPRECATED!!!! + + Parameters + ---------- + lon : array + Longitude values. + lat : array + Latitude values. + radius : float + Radius of the sphere to get the distances. + """ + + lon_r = radians(lon) + lat_r = radians(lat) + + x = radius * cos(lat_r) * cos(lon_r) + y = radius * cos(lat_r) * sin(lon_r) + z = radius * sin(lat_r) + + return column_stack([x, y, z]) + + +def __lon_lat_to_cartesian_ecef(lon, lat): + """ + Convert observational/model geographic longitude/latitude coordinates to cartesian ECEF (Earth Centred, + Earth Fixed) coordinates, assuming WGS84 datum and ellipsoid, and that all heights = 0. + ECEF coordinates represent positions (in meters) as X, Y, Z coordinates, approximating the earth surface + as an ellipsoid of revolution. + This conversion is for the subsequent calculation of Euclidean distances of the model grid cell centres + from each observational station. + Defining the distance between two points on the earth's surface as simply the Euclidean distance + between the two lat/lon pairs could lead to inaccurate results depending on the distance + between two points (i.e. 1 deg. of longitude varies with latitude). + + Parameters + ---------- + lon : array + Longitude values. + lat : array + Latitude values. + """ + + lla = Proj(proj="latlong", ellps="WGS84", datum="WGS84") + ecef = Proj(proj="geocent", ellps="WGS84", datum="WGS84") + + # x, y, z = pyproj.transform(lla, ecef, lon, lat, zeros(lon.shape), radians=False) + # Deprecated: https://pyproj4.github.io/pyproj/stable/gotchas.html#upgrading-to-pyproj-2-from-pyproj-1 + transformer = Transformer.from_proj(lla, ecef) + x, y, z = transformer.transform(lon, lat, zeros(lon.shape), radians=False) + return column_stack([x, y, z]) diff --git a/build/lib/nes/methods/spatial_join.py b/build/lib/nes/methods/spatial_join.py new file mode 100644 index 0000000000000000000000000000000000000000..eb35864b4fb5e3398e6ef6a3b9f34edea59cd661 --- /dev/null +++ b/build/lib/nes/methods/spatial_join.py @@ -0,0 +1,305 @@ +#!/usr/bin/env python + +import sys +from warnings import warn, filterwarnings +from geopandas import sjoin_nearest, sjoin, read_file +from pandas import DataFrame +from numpy import array, uint32, nan +from shapely.errors import TopologicalError + + +def spatial_join(self, ext_shp, method=None, var_list=None, info=False, apply_bbox=True): + """ + Compute overlay intersection of two GeoPandasDataFrames. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + ext_shp : GeoPandasDataFrame or str + File or path from where the data will be obtained on the intersection. + method : str + Overlay method. Accepted values: ["nearest", "intersection", "centroid"]. + var_list : List or None or str + Variables that will be included in the resulting shapefile. + info : bool + Indicates if you want to print the process info. + apply_bbox : bool + Indicates if you want to reduce the shapefile to a bbox. + """ + + if self.master and info: + print("Starting spatial join") + if isinstance(var_list, str): + # Transforming string (variable name) to a list with length 0 + var_list = [var_list] + + # Create source shapefile if it does not exist + if self.shapefile is None: + if self.master and info: + print("\tCreating shapefile") + sys.stdout.flush() + self.create_shapefile() + + ext_shp = __prepare_external_shapefile(self, ext_shp=ext_shp, var_list=var_list, info=info, + apply_bbox=apply_bbox) + + if method == "nearest": + # Nearest centroids to the shapefile polygons + __spatial_join_nearest(self, ext_shp=ext_shp, info=info) + elif method == "intersection": + # Intersect the areas of the shapefile polygons, outside the shapefile there will be NaN + __spatial_join_intersection(self, ext_shp=ext_shp, info=info) + elif method == "centroid": + # Centroids that fall on the shapefile polygons, outside the shapefile there will be NaN + __spatial_join_centroid(self, ext_shp=ext_shp, info=info) + + else: + accepted_values = ["nearest", "intersection", "centroid"] + raise NotImplementedError("{0} is not implemented. Choose from: {1}".format(method, accepted_values)) + + return None + + +def __prepare_external_shapefile(self, ext_shp, var_list, info=False, apply_bbox=True): + """ + Prepare the external shapefile. + + It is high recommended to pass ext_shp parameter as string because it will clip the external shapefile to the rank. + + 1. Read if it is not already read + 2. Filter variables list + 3. Standardize projections + + Parameters + ---------- + self : nes.Nes + A Nes Object. + ext_shp : geopandas.GeoDataFrame or str + External shapefile or path to it. + var_list : List[str] or None + External shapefile variables to be computed. + info : bool + Indicates if you want to print the information. + apply_bbox : bool + Indicates if you want to reduce the shapefile to a bbox. + + Returns + ------- + GeoDataFrame + External shapefile. + """ + + if isinstance(ext_shp, str): + # Reading external shapefile + if self.master and info: + print("\tReading external shapefile") + # ext_shp = read_file(ext_shp, include_fields=var_list, mask=self.shapefile.geometry) + if apply_bbox: + ext_shp = read_file(ext_shp, include_fields=var_list, bbox=__get_bbox(self)) + else: + ext_shp = read_file(ext_shp, include_fields=var_list) + else: + msg = "WARNING!!! " + msg += "External shapefile already read. If you pass the path to the shapefile instead of the opened shapefile " + msg += "a best usage of memory is performed because the external shape will be clipped while reading." + warn(msg) + sys.stderr.flush() + ext_shp.reset_index(inplace=True) + if var_list is not None: + ext_shp = ext_shp.loc[:, var_list + ["geometry"]] + + self.comm.Barrier() + if self.master and info: + print("\t\tReading external shapefile done!") + + # Standardizing projection + ext_shp = ext_shp.to_crs(self.shapefile.crs) + + return ext_shp + + +def __get_bbox(self): + """ + Obtain the bounding box of the rank data (lon_min, lat_min, lon_max, lat_max). + + Parameters + ---------- + self : nes.Nes + A Nes Object. + + Returns + ------- + tuple + Bounding box + """ + + bbox = (self.lon_bnds["data"].min(), self.lat_bnds["data"].min(), + self.lon_bnds["data"].max(), self.lat_bnds["data"].max(), ) + + return bbox + + +# noinspection DuplicatedCode +def __spatial_join_nearest(self, ext_shp, info=False): + """ + Perform the spatial join using the nearest method. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + ext_shp : GeoDataFrame + External shapefile. + info : bool + Indicates if you want to print the information. + """ + + if self.master and info: + print("\tNearest spatial joint") + sys.stdout.flush() + grid_shp = self.get_centroids_from_coordinates() + + # From geodetic coordinates (e.g. 4326) to meters (e.g. 4328) to use sjoin_nearest + # TODO: Check if the projection 4328 does not distort the coordinates too much + # https://gis.stackexchange.com/questions/372564/ + # userwarning-when-trying-to-get-centroid-from-a-polygon-geopandas + # ext_shp = ext_shp.to_crs("EPSG:4328") + # grid_shp = grid_shp.to_crs("EPSG:4328") + + # Calculate spatial joint by distance + aux_grid = sjoin_nearest(grid_shp, ext_shp, distance_col="distance") + + # Get data from closest shapes to centroids + del aux_grid["geometry"], aux_grid["index_right"] + self.shapefile.loc[aux_grid.index, aux_grid.columns] = aux_grid + + var_list = list(ext_shp.columns) + var_list.remove("geometry") + for var_name in var_list: + self.shapefile.loc[:, var_name] = array(self.shapefile.loc[:, var_name], dtype=ext_shp[var_name].dtype) + + return None + + +# noinspection DuplicatedCode +def __spatial_join_centroid(self, ext_shp, info=False): + """ + Perform the spatial join using the centroid method. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + ext_shp : GeoDataFrame + External shapefile. + info : bool + Indicates if you want to print the information. + """ + + if self.master and info: + print("\tCentroid spatial join") + sys.stdout.flush() + if info and self.master: + print("\t\tCalculating centroids") + sys.stdout.flush() + + # Get centroids + grid_shp = self.get_centroids_from_coordinates() + + # Calculate spatial joint + if info and self.master: + print("\t\tCalculating centroid spatial join") + sys.stdout.flush() + aux_grid = sjoin(grid_shp, ext_shp, predicate="within") + + # Get data from shapes where there are centroids, rest will be NaN + del aux_grid["geometry"], aux_grid["index_right"] + self.shapefile.loc[aux_grid.index, aux_grid.columns] = aux_grid + + var_list = list(ext_shp.columns) + var_list.remove("geometry") + for var_name in var_list: + self.shapefile.loc[:, var_name] = array(self.shapefile.loc[:, var_name], dtype=ext_shp[var_name].dtype) + + return None + + +def __spatial_join_intersection(self, ext_shp, info=False): + """ + Perform the spatial join using the intersection method. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + ext_shp : GeoDataFrame + External shapefile. + info : bool + Indicates if you want to print the information. + """ + + var_list = list(ext_shp.columns) + var_list.remove("geometry") + + grid_shp = self.shapefile + grid_shp["FID_grid"] = grid_shp.index + grid_shp = grid_shp.reset_index() + + # Get intersected areas + # inp, res = ext_shp.sindex.query(grid_shp.geometry, predicate="intersects") + inp, res = grid_shp.sindex.query(ext_shp.geometry, predicate="intersects") + + if info: + print("\t\tRank {0:03d}: {1} intersected areas found".format(self.rank, len(inp))) + sys.stdout.flush() + + # Calculate intersected areas and fractions + intersection = DataFrame(columns=["FID", "ext_shp_id", "weight"]) + intersection["FID"] = array(grid_shp.loc[res, "FID_grid"], dtype=uint32) + intersection["ext_shp_id"] = array(inp, dtype=uint32) + + if len(intersection) > 0: + if True: + # No Warnings Zone + counts = intersection["FID"].value_counts() + filterwarnings("ignore") + intersection.loc[:, "weight"] = 1. + + for i, row in intersection.iterrows(): + if isinstance(i, int) and i % 1000 == 0 and info: + print("\t\t\tRank {0:03d}: {1:.3f} %".format(self.rank, i * 100 / len(intersection))) + sys.stdout.flush() + # Filter to do not calculate percentages over 100% grid cells spatial joint + if counts[row["FID"]] > 1: + try: + intersection.loc[i, "weight"] = grid_shp.loc[res[i], "geometry"].intersection( + ext_shp.loc[inp[i], "geometry"]).area / grid_shp.loc[res[i], "geometry"].area + except TopologicalError: + # If for some reason the geometry is corrupted it should work with the buffer function + ext_shp.loc[[inp[i]], "geometry"] = ext_shp.loc[[inp[i]], "geometry"].buffer(0) + intersection.loc[i, "weight"] = grid_shp.loc[res[i], "geometry"].intersection( + ext_shp.loc[inp[i], "geometry"]).area / grid_shp.loc[res[i], "geometry"].area + # intersection["intersect_area"] = intersection.apply( + # lambda x: x["geometry_grid"].intersection(x["geometry_ext"]).area, axis=1) + intersection.drop(intersection[intersection["weight"] <= 0].index, inplace=True) + + filterwarnings("default") + + # Choose the biggest area from intersected areas with multiple options + intersection.sort_values("weight", ascending=False, inplace=True) + intersection = intersection.drop_duplicates(subset="FID", keep="first") + intersection = intersection.sort_values("FID").set_index("FID") + + for var_name in var_list: + self.shapefile.loc[intersection.index, var_name] = array( + ext_shp.loc[intersection["ext_shp_id"], var_name]) + + else: + for var_name in var_list: + self.shapefile.loc[:, var_name] = nan + + for var_name in var_list: + self.shapefile.loc[:, var_name] = array(self.shapefile.loc[:, var_name], dtype=ext_shp[var_name].dtype) + + return None diff --git a/build/lib/nes/methods/vertical_interpolation.py b/build/lib/nes/methods/vertical_interpolation.py new file mode 100644 index 0000000000000000000000000000000000000000..23ca71260678eadbedf31fa6d246801ae82dbe2a --- /dev/null +++ b/build/lib/nes/methods/vertical_interpolation.py @@ -0,0 +1,335 @@ +#!/usr/bin/env python + +import sys +from numpy import nan, flip, cumsum, nanmean, empty, ndarray, ma, float64, array, interp, where +from scipy.interpolate import interp1d +from copy import copy + + +def add_4d_vertical_info(self, info_to_add): + """ + To add the vertical information from other source. + + Parameters + ---------- + self : nes.Nes + Source Nes object. + info_to_add : nes.Nes, str + Nes object with the vertical information as variable or str with the path to the NetCDF file that contains + the vertical data. + """ + + vertical_var = list(self.concatenate(info_to_add)) + self.vertical_var_name = vertical_var[0] + + return None + + +def __parse_extrapolate(extrapolate) -> tuple: + """ + Parses the "extrapolate" parameter and returns a tuple representing the extrapolation options. + + Parameters + ---------- + extrapolate : bool or tuple or None or number or NaN + If bool: + - If True, both extrapolation options are set to "extrapolate". + - If False, extrapolation options are set to ("bottom", "top"). + If tuple: + - The first element represents the extrapolation option for the lower bound. + - The second element represents the extrapolation option for the upper bound. + - If any element is bool: + - If True, it represents "extrapolate". + - If False: + - If it"s the first element, it represents "bottom". + - If it"s the second element, it represents "top". + - If any element is None, it is replaced with numpy.nan. + - Other numeric values are kept as they are. + - If any element is NaN, it is kept as NaN. + If None: + - Both extrapolation options are set to (NaN, NaN). + If number: + - Both extrapolation options are set to the provided number. + If NaN: + - Both extrapolation options are set to NaN. + + Returns + ------- + tuple + A tuple representing the extrapolation options. If the input is invalid, it returns + ("extrapolate", "extrapolate"). + """ + if isinstance(extrapolate, bool): + if extrapolate: + extrapolate_options = ("extrapolate", "extrapolate") + else: + extrapolate_options = ("bottom", "top") + elif isinstance(extrapolate, tuple): + extrapolate_options = [None, None] + for i in range(len(extrapolate)): + if isinstance(extrapolate[i], bool): + if extrapolate[i]: + extrapolate_options[i] = "extrapolate" + else: + if i == 0: + extrapolate_options[i] = "bottom" + else: + extrapolate_options[i] = "top" + elif extrapolate[i] is None: + extrapolate_options[i] = nan + else: + extrapolate_options[i] = extrapolate[i] + extrapolate_options = tuple(extrapolate_options) + elif extrapolate is None: + extrapolate_options = ("bottom", "top") + else: + extrapolate_options = (extrapolate, extrapolate) + + return extrapolate_options + + +def interpolate_vertical(self, new_levels, new_src_vertical=None, kind="linear", extrapolate_options=False, info=None, + overwrite=False): + """ + Vertical interpolation. + + Parameters + ---------- + self : Nes + Source Nes object. + new_levels : List + A List of new vertical levels. + new_src_vertical : nes.Nes, str + Nes object with the vertical information as variable or str with the path to the NetCDF file that contains + the vertical data. + kind : str + Vertical methods type. + extrapolate_options : bool or tuple or None or number or NaN + If bool: + - If True, both extrapolation options are set to "extrapolate". + - If False, extrapolation options are set to ("bottom", "top"). + If tuple: + - The first element represents the extrapolation option for the lower bound. + - The second element represents the extrapolation option for the upper bound. + - If any element is bool: + - If True, it represents "extrapolate". + - If False: + - If it"s the first element, it represents "bottom". + - If it"s the second element, it represents "top". + - If any element is None, it is replaced with numpy.nan. + - Other numeric values are kept as they are. + - If any element is NaN, it is kept as NaN. + If None: + - Both extrapolation options are set to (NaN, NaN). + If number: + - Both extrapolation options are set to the provided number. + If NaN: + - Both extrapolation options are set to NaN. + info: None, bool + Indicates if you want to print extra information. + overwrite: bool + Indicates if you want to compute the vertical interpolation in the same object or not. + """ + src_levels_aux = None + fill_value = None + + extrapolate_options = __parse_extrapolate(extrapolate_options) + do_extrapolation = "extrapolate" in extrapolate_options + + if len(self.lev) == 1: + raise RuntimeError("1D data cannot be vertically interpolated.") + if not overwrite: + self = self.copy(copy_vars=True) + if info is None: + info = self.info + + if new_src_vertical is not None: + self.add_4d_vertical_info(new_src_vertical) + if new_levels[0] > new_levels[-1]: + ascendant = False + else: + ascendant = True + + nz_new = len(new_levels) + + if self.vertical_var_name is None: + # To use current level data + current_level = True + # Checking old order + src_levels = self.lev["data"] + if src_levels[0] > src_levels[-1]: + if not ascendant: + do_flip = False + else: + do_flip = True + src_levels = flip(src_levels) + else: + if ascendant: + do_flip = False + else: + do_flip = True + src_levels = flip(src_levels) + else: + current_level = False + src_levels = self.variables[self.vertical_var_name]["data"] + if self.vertical_var_name == "layer_thickness": + src_levels = flip(cumsum(flip(src_levels, axis=1), axis=1)) + else: + # src_levels = flip(src_levels, axis=1) + pass + # Checking old order + if nanmean(src_levels[:, 0, :, :]) > nanmean(src_levels[:, -1, :, :]): + if not ascendant: + do_flip = False + else: + do_flip = True + src_levels = flip(src_levels, axis=1) + else: + if ascendant: + do_flip = False + else: + do_flip = True + src_levels = flip(src_levels, axis=1) + + # Loop over variables + for var_name in self.variables.keys(): + if self.variables[var_name]["data"] is None: + # Load data if it is not loaded yet + self.load(var_name) + + if var_name != self.vertical_var_name: + if do_flip: + self.variables[var_name]["data"] = flip(self.variables[var_name]["data"], axis=1) + if info and self.master: + print("\t{var} vertical methods".format(var=var_name)) + sys.stdout.flush() + nt, nz, ny, nx = self.variables[var_name]["data"].shape + dst_data = empty((nt, nz_new, ny, nx), dtype=self.variables[var_name]["data"].dtype) + for t in range(nt): + # if info and self.rank == self.size - 1: + if self.info and self.master: + print("\t\t{3} time step {0} ({1}/{2}))".format(self.time[t], t + 1, nt, var_name)) + sys.stdout.flush() + for j in range(ny): + for i in range(nx): + if len(src_levels.shape) == 1: + # To use 1D level information + curr_level_values = src_levels + else: + # To use 4D level data + curr_level_values = src_levels[t, :, j, i] + try: + # Check if all values are identical or masked + if ((isinstance(curr_level_values, ndarray) and + (curr_level_values == curr_level_values[0]).all()) or + (isinstance(curr_level_values, ma.core.MaskedArray) and + curr_level_values.mask.all())): + kind = "slinear" + else: + kind = kind # "cubic" + + # Filtering filling values to extrapolation + fill_value = [nan, nan] + if "bottom" in extrapolate_options: + if ascendant: + fill_value[0] = float64(self.variables[var_name]["data"][t, 0, j, i]) + else: + fill_value[0] = float64(self.variables[var_name]["data"][t, -1, j, i]) + else: + fill_value[0] = extrapolate_options[0] + if "top" in extrapolate_options: + if ascendant: + fill_value[1] = float64(self.variables[var_name]["data"][t, -1, j, i]) + else: + fill_value[1] = float64(self.variables[var_name]["data"][t, 0, j, i]) + else: + fill_value[1] = extrapolate_options[1] + fill_value = tuple(fill_value) + + # We force the methods with float64 to avoid negative values + # We don"t know why the negatives appears with float34 + if current_level: + # 1D vertical component + src_levels_aux = src_levels + else: + # 4D vertical component + src_levels_aux = src_levels[t, :, j, i] + + if kind == "linear" and ascendant and not do_extrapolation: + dst_data[t, :, j, i] = array( + interp(new_levels, + array(src_levels_aux, dtype=float64), + array(self.variables[var_name]["data"][t, :, j, i], dtype=float64), + left=fill_value[0], right=fill_value[1]), + dtype=self.variables[var_name]["data"].dtype) + else: + if not do_extrapolation: + dst_data[t, :, j, i] = array( + interp1d(array(src_levels_aux, dtype=float64), + array(self.variables[var_name]["data"][t, :, j, i], dtype=float64), + kind=kind, + bounds_error=False, + fill_value=fill_value)(new_levels), + dtype=self.variables[var_name]["data"].dtype) + else: + # If extrapolation first we need to extrapolate all (below & above) + dst_data[t, :, j, i] = array( + interp1d(array(src_levels_aux, dtype=float64), + array(self.variables[var_name]["data"][t, :, j, i], + dtype=float64), + kind=kind, + bounds_error=False, + fill_value="extrapolate")(new_levels), + dtype=self.variables[var_name]["data"].dtype) + # Check values below the lower vertical level + if fill_value[0] != "extrapolate": + if ascendant: + idx_bellow = where(new_levels < src_levels_aux[0]) + else: + idx_bellow = where(new_levels > src_levels_aux[0]) + dst_data[t, idx_bellow, j, i] = fill_value[0] + # Check values above the upper vertical level + if fill_value[1] != "extrapolate": + if ascendant: + idx_above = where(new_levels > src_levels_aux[-1]) + else: + idx_above = where(new_levels < src_levels_aux[-1]) + dst_data[t, idx_above, j, i] = fill_value[1] + except Exception as e: + print("time lat lon", t, j, i) + print("***********************") + print("LEVELS", src_levels_aux) + print("DATA", array(self.variables[var_name]["data"][t, :, j, i], dtype=float64)) + print("METHOD", kind) + print("FILL_VALUE", fill_value) + print("+++++++++++++++++++++++") + raise Exception(str(e)) + # if level_array is not None: + # dst_data[t, :, j, i] = array(f(level_array), dtype=float32) + + self.variables[var_name]["data"] = copy(dst_data) + # print(self.variables[var_name]["data"]) + + # Update level information + new_lev_info = {"data": array(new_levels)} + if "positive" in self.lev.keys(): + # Vertical level direction + if flip: + self.reverse_level_direction() + new_lev_info["positive"] = self.lev["positive"] + + if self.vertical_var_name is not None: + for var_attr, attr_info in self.variables[self.vertical_var_name].items(): + if var_attr not in ["data", "dimensions", "crs", "grid_mapping"]: + new_lev_info[var_attr] = copy(attr_info) + self.free_vars(self.vertical_var_name) + self.vertical_var_name = None + + self.set_levels(new_lev_info) + + # Remove original file information + self.__ini_path = None + self.dataset = None + self.dataset = None + + return self diff --git a/build/lib/nes/nc_projections/__init__.py b/build/lib/nes/nc_projections/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4839ec52dbf1b050f4f70db7b4c0e963459fbaf3 --- /dev/null +++ b/build/lib/nes/nc_projections/__init__.py @@ -0,0 +1,15 @@ +from .default_nes import Nes +from .latlon_nes import LatLonNes +from .rotated_nes import RotatedNes +from .rotated_nested_nes import RotatedNestedNes +from .points_nes import PointsNes +from .points_nes_ghost import PointsNesGHOST +from .points_nes_providentia import PointsNesProvidentia +from .lcc_nes import LCCNes +from .mercator_nes import MercatorNes +# from .raster_nes import RasterNes + +__all__ = [ + 'MercatorNes', 'Nes', 'LatLonNes', 'RotatedNes', 'RotatedNestedNes', 'PointsNes', 'PointsNesGHOST', + 'PointsNesProvidentia', 'LCCNes', +] diff --git a/build/lib/nes/nc_projections/default_nes.py b/build/lib/nes/nc_projections/default_nes.py new file mode 100644 index 0000000000000000000000000000000000000000..d7c28c9d08c932165274279e8d99c0eaecdb9b52 --- /dev/null +++ b/build/lib/nes/nc_projections/default_nes.py @@ -0,0 +1,4252 @@ +#!/usr/bin/env python + +import sys +from gc import collect +from warnings import warn +from numpy import (array, ndarray, abs, mean, diff, dstack, append, tile, empty, unique, stack, vstack, full, isnan, + flipud, nan, float32, float64, ma, generic, character, issubdtype, arange, newaxis, concatenate, + split, cumsum, zeros, column_stack) +from pandas import Index, concat +from geopandas import GeoDataFrame +from datetime import timedelta, datetime +from netCDF4 import Dataset, num2date, date2num, stringtochar +from mpi4py import MPI +from shapely.geometry import Polygon, Point +from copy import deepcopy, copy +from dateutil.relativedelta import relativedelta +from typing import Union, List, Dict, Any +from pyproj import Proj, Transformer +from ..methods import vertical_interpolation, horizontal_interpolation, cell_measures, spatial_join +from ..nes_formats import to_netcdf_cams_ra, to_netcdf_monarch, to_monarch_units, to_netcdf_cmaq, to_cmaq_units, \ + to_netcdf_wrf_chem, to_wrf_chem_units + + +class Nes(object): + """ + A class to handle netCDF data with parallel processing capabilities using MPI. + + Attributes + ---------- + comm : MPI.Comm + MPI communicator. + rank : int + MPI rank. + master : bool + True when rank == 0. + size : int + Size of the communicator. + info : bool + Indicates if you want to print reading/writing info. + __ini_path : str + Path to the original file to read when open_netcdf is called. + hours_start : int + Number of hours to avoid from the first original values. + hours_end : int + Number of hours to avoid from the last original values. + dataset : Dataset + netcdf4-python Dataset. + variables : Dict[str, Dict[str, Any]] + Variables information. The dictionary structure is: + { + var_name: { + "data": ndarray or None, # Array values or None if the variable is not loaded. + attr_name: attr_value, # Variable attributes. + ... + }, + ... + } + _full_time : List[datetime] + Complete list of original time step values. + _full_lev : Dict[str, array] + Vertical level dictionary with the complete "data" key for all the values and the rest of the attributes. + { + "data": ndarray, # Array of vertical level values. + attr_name: attr_value, # Vertical level attributes. + ... + } + _full_lat : dict + Latitudes dictionary with the complete "data" key for all the values and the rest of the attributes. + { + "data": ndarray, # Array of latitude values. + attr_name: attr_value, # Latitude attributes. + ... + } + _full_lon : dict + Longitudes dictionary with the complete "data" key for all the values and the rest of the attributes. + { + "data": ndarray, # Array of longitude values. + attr_name: attr_value, # Longitude attributes. + ... + } + _full_lat_bnds : dict + Latitude bounds dictionary with the complete "data" key for the latitudinal boundaries of each grid and the + rest of the attributes. + { + "data": ndarray, # Array of latitude bounds. + attr_name: attr_value, # Latitude bounds attributes. + ... + } + _full_lon_bnds : dict + Longitude bounds dictionary with the complete "data" key for the longitudinal boundaries of each grid and the + rest of the attributes. + { + "data": ndarray, # Array of longitude bounds. + attr_name: attr_value, # Longitude bounds attributes. + ... + } + parallel_method : str + Parallel method to read/write. Can be chosen from any of the following axes to parallelize: "T", "Y", or "X". + read_axis_limits : dict + Dictionary with the 4D limits of the rank data to read. Structure: + { + "t_min": int, "t_max": int, # Time axis limits. + "z_min": int, "z_max": int, # Vertical axis limits. + "y_min": int, "y_max": int, # Latitudinal axis limits. + "x_min": int, "x_max": int, # Longitudinal axis limits. + } + write_axis_limits : dict + Dictionary with the 4D limits of the rank data to write. Structure: + { + "t_min": int, "t_max": int, # Time axis limits. + "z_min": int, "z_max": int, # Vertical axis limits. + "y_min": int, "y_max": int, # Latitudinal axis limits. + "x_min": int, "x_max": int, # Longitudinal axis limits. + } + time : List[datetime] + List of time steps of the rank data. + lev : dict + Vertical levels dictionary with the portion of "data" corresponding to the rank values. Structure: + { + "data": ndarray, # Array of vertical level values for the rank. + attr_name: attr_value, # Vertical level attributes. + ... + } + lat : dict + Latitudes dictionary with the portion of "data" corresponding to the rank values. Structure: + { + "data": ndarray, # Array of latitude values for the rank. + attr_name: attr_value, # Latitude attributes. + ... + } + lon : dict + Longitudes dictionary with the portion of "data" corresponding to the rank values. Structure: + { + "data": ndarray, # Array of longitude values for the rank. + attr_name: attr_value, # Longitude attributes. + ... + } + lat_bnds : dict + Latitude bounds dictionary with the portion of "data" for the latitudinal boundaries corresponding to the rank + values. + Structure: + { + "data": ndarray, # Array of latitude bounds for the rank. + attr_name: attr_value, # Latitude bounds attributes. + ... + } + lon_bnds : dict + Longitude bounds dictionary with the portion of "data" for the longitudinal boundaries corresponding to the + rank values. + Structure: + { + "data": ndarray, # Array of longitude bounds for the rank. + attr_name: attr_value, # Longitude bounds attributes. + ... + } + global_attrs : dict + Global attributes with the attribute name as key and data as values. Structure: + { + attr_name: attr_value, # Global attribute name and value. + ... + } + _var_dim : tuple + Name of the Y and X dimensions for the variables. + _lat_dim : tuple + Name of the dimensions of the Latitude values. + _lon_dim : tuple + Name of the dimensions of the Longitude values. + projection : Proj + Grid projection. + projection_data : dict + Dictionary with the projection information. Structure: + { + proj_param: proj_value, # Projection parameters. + ... + } + """ + def __init__(self, comm: Union[MPI.Comm, None] = None, path: Union[str, None] = None, info: bool = False, + dataset: Union[Dataset, None] = None, parallel_method: str = "Y", avoid_first_hours: int = 0, + avoid_last_hours: int = 0, first_level: int = 0, last_level: Union[int, None] = None, + create_nes: bool = False, balanced: bool = False, times: Union[List[datetime], None] = None, + **kwargs) -> None: + """ + Initialize the Nes class + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset or None + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default over Y axis + accepted values: ["X", "Y", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int or None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : List[datetime] or None + List of times to substitute the current ones while creation. + """ + + # MPI Initialization + if comm is None: + self.comm = MPI.COMM_WORLD + else: + self.comm = comm + self.rank = self.comm.Get_rank() + self.master = self.rank == 0 + self.size = self.comm.Get_size() + + # General info + self.info = info + self.__ini_path = path + self.shapefile = None + + # Selecting info + self.hours_start = avoid_first_hours + self.hours_end = avoid_last_hours + self.first_level = first_level + self.last_level = last_level + self.lat_min = None + self.lat_max = None + self.lon_min = None + self.lon_max = None + self.balanced = balanced + + # Define parallel method + self.parallel_method = parallel_method + self.serial_nc = None # Place to store temporally the serial Nes instance + + # Get minor and major axes of Earth + self.earth_radius = self.get_earth_radius("WGS84") + + # Time resolution and climatology will be modified, if needed, during the time variable reading + self._time_resolution = "hours" + self._climatology = False + self._climatology_var_name = "climatology_bounds" # Default var_name but can be changed if the input is dif + + # NetCDF object + if create_nes: + + self.dataset = None + + # Set string length + self.strlen = None + + # Initialize variables + self.variables = {} + + # Projection data This is duplicated due to if it is needed to create the object NES needs that info to + # create coordinates data. + self.projection_data = self._get_projection_data(create_nes, **kwargs) + self.projection = self._get_pyproj_projection() + + # Complete dimensions + self._full_time = times + + self._full_time_bnds = self.__get_time_bnds(create_nes) + self._full_lat_bnds, self._full_lon_bnds = self.__get_coordinates_bnds(create_nes) + self._full_lev = {"data": array([0]), "units": "", "positive": "up"} + self._full_lat, self._full_lon = self._create_centre_coordinates(**kwargs) + + # Set axis limits for parallel reading + self.read_axis_limits = self._get_read_axis_limits() + self.write_axis_limits = self._get_write_axis_limits() + + # Dimensions screening + self.time = self.get_full_times()[self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"]] + self.time_bnds = self.get_full_time_bnds() + self.lev = self.get_full_levels() + self.lat_bnds = self.get_full_latitudes_boundaries() + self.lon_bnds = self.get_full_longitudes_boundaries() + + # Cell measures screening + self.cell_measures = self.__get_cell_measures(create_nes) + + # Set NetCDF attributes + self.global_attrs = self.__get_global_attributes(create_nes) + + else: + if dataset is not None: + self.dataset = dataset + elif self.__ini_path is not None: + self._open() + + # Get string length + self.strlen = self._get_strlen() + + # Lazy variables + self.variables = self._get_lazy_variables() + + # Complete dimensions + self._full_time = self.__get_time() + self._full_time_bnds = self.__get_time_bnds() + self._full_lev = self._get_coordinate_dimension(["lev", "level", "lm", "plev"]) + self._full_lat = self._get_coordinate_dimension(["lat", "latitude", "latitudes"]) + self._full_lon = self._get_coordinate_dimension(["lon", "longitude", "longitudes"]) + self._full_lat_bnds, self._full_lon_bnds = self.__get_coordinates_bnds() + + # Complete cell measures + self._cell_measures = self.__get_cell_measures() + + # Set axis limits for parallel reading + self.read_axis_limits = self._get_read_axis_limits() + # Set axis limits for parallel writing + self.write_axis_limits = self._get_write_axis_limits() + + # Dimensions screening + self.time = self.get_full_times()[self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"]] + self.time_bnds = self.get_full_time_bnds() + self.lev = self._get_coordinate_values(self.get_full_levels(), "Z") + self.lat = self._get_coordinate_values(self.get_full_latitudes(), "Y") + self.lon = self._get_coordinate_values(self.get_full_longitudes(), "X") + self.lat_bnds = self._get_coordinate_values(self.get_full_latitudes_boundaries(), "Y", bounds=True) + self.lon_bnds = self._get_coordinate_values(self.get_full_longitudes_boundaries(), "X", bounds=True) + + # Cell measures screening + self.cell_measures = self._get_cell_measures_values(self._cell_measures) + + # Set NetCDF attributes + self.global_attrs = self.__get_global_attributes() + + # Projection data + self.projection_data = self._get_projection_data(create_nes, **kwargs) + self.projection = self._get_pyproj_projection() + + # Writing options + self.zip_lvl = 0 + + # Dimensions information + self._var_dim = None + self._lat_dim = None + self._lon_dim = None + + self.vertical_var_name = None + + # Filtering (portion of the filter coordinates function) + idx = self._get_idx_intervals() + if self.master: + self.set_full_times(self._full_time[idx["idx_t_min"]:idx["idx_t_max"]]) + self._full_lev["data"] = self._full_lev["data"][idx["idx_z_min"]:idx["idx_z_max"]] + + self.hours_start = 0 + self.hours_end = 0 + self.last_level = None + self.first_level = None + + def __test_mpi__(self, num_test=None): + print(f"{self.rank} Barrier {num_test}") + sys.stdout.flush() + self.comm.Barrier() + if self.master: + data = 1 + else: + data = 0 + data = self.comm.bcast(data, root=0) + print(f"{self.rank} data {data}") + sys.stdout.flush() + return None + + @staticmethod + def new(comm=None, path=None, info=False, dataset=None, parallel_method="Y", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the Nes class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default over Y axis + accepted values: ["X", "Y", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int or None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : List[datetime] or None + List of times to substitute the current ones while creation. + """ + + new = Nes(comm=comm, path=path, info=info, dataset=dataset, parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, + last_level=last_level, create_nes=create_nes, balanced=balanced, times=times, **kwargs) + + return new + + def _get_strlen(self): + """ + Get the strlen + + Returns + ------- + int + Max length of the string data + """ + + if "strlen" in self.dataset.dimensions: + strlen = self.dataset.dimensions["strlen"].size + else: + return None + + return strlen + + def set_strlen(self, strlen=75): + """ + Set the strlen + + 75 is the standard value used in GHOST data + + Parameters + ---------- + strlen : int or None + Max length of the string + """ + + self.strlen = strlen + + return None + + def __del__(self): + """ + To delete the Nes object and close all the open datasets. + """ + + self.close() + try: + self.free_vars(list(self.variables.keys())) + del self.variables + del self.time + del self._full_time + del self.time_bnds + del self._full_time_bnds + del self.lev + del self._full_lev + del self.lat + del self._full_lat + del self.lon + del self._full_lon + del self._full_lat_bnds + del self.lat_bnds + del self._full_lon_bnds + del self.lon_bnds + del self.strlen + del self.shapefile + for cell_measure in self.cell_measures.keys(): + if self.cell_measures[cell_measure]["data"] is not None: + del self.cell_measures[cell_measure]["data"] + del self.cell_measures + except (AttributeError, KeyError): + pass + + del self + collect() + + return None + + def __getstate__(self): + """ + Read the CSV file that contains all the Reduce variable specifications. + + Returns + ------- + state : dict + Dictionary with the class parameters. + """ + + d = self.__dict__ + state = {k: d[k] for k in d if k not in ["comm", "variables", "dataset", "cell_measures"]} + + return state + + def __setstate__(self, state): + """ + Set the state of the class. + + Parameters + ---------- + state: dict + Dictionary with the class parameters. + """ + + self.__dict__ = state + + return None + + def __add__(self, other): + """ + Sum two NES objects + + Parameters + ---------- + other : Nes + A Nes to be summed + + Returns + ------- + Nes + Summed Nes object + """ + nessy = self.copy(copy_vars=True) + for var_name in other.variables.keys(): + if var_name not in nessy.variables.keys(): + # Create New variable + nessy.variables[var_name] = deepcopy(other.variables[var_name]) + else: + nessy.variables[var_name]["data"] += other.variables[var_name]["data"] + return nessy + + def __radd__(self, other): + if other == 0 or other is None: + return self + else: + return self.__add__(other) + + def __getitem__(self, key: str) -> Union[array, None]: + """ + Retrieve the data associated with the specified key. + + Parameters + ---------- + key : str + The key to retrieve the data for. + + Returns + ------- + Union[array, None] + The data associated with the specified key, or None if the key + does not exist. + + Notes + ----- + This method allows accessing data in the variables dictionary using + dictionary-like syntax, e.g., obj[key]["data"]. + + """ + return self.variables[key]["data"] + + def copy(self, copy_vars: bool = False): + """ + Copy the Nes object. + The copy will avoid to copy the communicator, dataset and variables by default. + + Parameters + ---------- + copy_vars: bool + Indicates if you want to copy the variables (in lazy mode). + + Returns + ------- + nessy : Nes + Copy of the Nes object. + """ + + nessy = deepcopy(self) + nessy.dataset = None + if copy_vars: + nessy.set_communicator(self.comm) + nessy.variables = deepcopy(self.variables) + nessy.cell_measures = deepcopy(self.cell_measures) + else: + nessy.variables = {} + nessy.cell_measures = {} + + return nessy + + def get_full_times(self) -> List[datetime]: + """ + Retrieve the complete list of original time step values. + + Returns + ------- + List[datetime] + The complete list of original time step values from the netCDF data. + """ + if self.master: + data = self._full_time + else: + data = None + data = self.comm.bcast(data, root=0) + + if not isinstance(data, list): + data = list(data) + return data + + def get_full_time_bnds(self) -> List[datetime]: + """ + Retrieve the complete list of original time step boundaries. + + Returns + ------- + List[datetime] + The complete list of original time step boundary values from the netCDF data. + """ + data = self.comm.bcast(self._full_time_bnds) + return data + + def get_full_levels(self) -> Dict[str, Any]: + """ + Retrieve the complete vertical level information. + + Returns + ------- + Dict[str, Any] + A dictionary containing the complete vertical level data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of vertical level values. + attr_name: attr_value, # Vertical level attributes. + ... + } + """ + data = self.comm.bcast(self._full_lev) + return data + + def get_full_latitudes(self) -> Dict[str, Any]: + """ + Retrieve the complete latitude information. + + Returns + ------- + Dict[str, Any] + A dictionary containing the complete latitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of latitude values. + attr_name: attr_value, # Latitude attributes. + ... + } + """ + data = self.comm.bcast(self._full_lat) + + return data + + def get_full_longitudes(self) -> Dict[str, Any]: + """ + Retrieve the complete longitude information. + + Returns + ------- + Dict[str, Any] + A dictionary containing the complete longitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of longitude values. + attr_name: attr_value, # Longitude attributes. + ... + } + """ + data = self.comm.bcast(self._full_lon) + return data + + def get_full_latitudes_boundaries(self) -> Dict[str, Any]: + """ + Retrieve the complete latitude boundaries information. + + Returns + ------- + Dict[str, Any] + A dictionary containing the complete latitude boundaries data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of latitude boundaries values. + attr_name: attr_value, # Latitude boundaries attributes. + ... + } + """ + data = self.comm.bcast(self._full_lat_bnds) + return data + + def get_full_longitudes_boundaries(self) -> Dict[str, Any]: + """ + Retrieve the complete longitude boundaries information. + + Returns + ------- + Dict[str, Any] + A dictionary containing the complete longitude boundaries data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of longitude boundaries values. + attr_name: attr_value, # Longitude boundaries attributes. + ... + } + """ + data = self.comm.bcast(self._full_lon_bnds) + return data + + def set_full_times(self, data: List[datetime]) -> None: + """ + Set the complete list of original time step values. + + Parameters + ---------- + data : List[datetime] + The complete list of original time step values to set. + """ + if self.master: + self._full_time = data + return None + + def set_full_time_bnds(self, data: List[datetime]) -> None: + """ + Set the complete list of original time step boundaries. + + Parameters + ---------- + data : List[datetime] + The complete list of original time step boundary values to set. + """ + if self.master: + self._full_time_bnds = data + return None + + def set_full_levels(self, data: Dict[str, Any]) -> None: + """ + Set the complete vertical level information. + + Parameters + ---------- + data : Dict[str, Any] + A dictionary containing the complete vertical level data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of vertical level values. + attr_name: attr_value, # Vertical level attributes. + ... + } + """ + if self.master: + self._full_lev = data + return None + + def set_full_latitudes(self, data: Dict[str, Any]) -> None: + """ + Set the complete latitude information. + + Parameters + ---------- + data : Dict[str, Any] + A dictionary containing the complete latitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of latitude values. + attr_name: attr_value, # Latitude attributes. + ... + } + """ + if self.master: + self._full_lat = data + return None + + def set_full_longitudes(self, data: Dict[str, Any]) -> None: + """ + Set the complete longitude information. + + Parameters + ---------- + data : Dict[str, Any] + A dictionary containing the complete longitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of longitude values. + attr_name: attr_value, # Longitude attributes. + ... + } + """ + if self.master: + self._full_lon = data + return None + + def set_full_latitudes_boundaries(self, data: Dict[str, Any]) -> None: + """ + Set the complete latitude boundaries information. + + Parameters + ---------- + data : Dict[str, Any] + A dictionary containing the complete latitude boundaries data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of latitude boundaries values. + attr_name: attr_value, # Latitude boundaries attributes. + ... + } + """ + if self.master: + self._full_lat_bnds = data + return None + + def set_full_longitudes_boundaries(self, data: Dict[str, Any]) -> None: + """ + Set the complete longitude boundaries information. + + Parameters + ---------- + data : Dict[str, Any] + A dictionary containing the complete longitude boundaries data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of longitude boundaries values. + attr_name: attr_value, # Longitude boundaries attributes. + ... + } + """ + if self.master: + self._full_lon_bnds = data + + return None + + def get_fids(self, use_read=False): + """ + Obtain the FIDs in a 2D format. + + Parameters + ---------- + use_read : bool + Indicate if you want to use the read_axis_limits + + Returns + ------- + array + 2D array with the FID data. + """ + if self.master: + fids = arange(self._full_lat["data"].shape[0] * self._full_lon["data"].shape[-1]) + fids = fids.reshape((self._full_lat["data"].shape[0], self._full_lon["data"].shape[-1])) + else: + fids = None + fids = self.comm.bcast(fids) + + if use_read: + fids = fids[self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + else: + fids = fids[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] + return fids + + def get_full_shape(self): + """ + Obtain the Full 2D shape of tha data + + Returns + ------- + tuple + 2D shape of tha data. + """ + if self.master: + shape = (self._full_lat["data"].shape[0], self._full_lon["data"].shape[-1]) + else: + shape = None + shape = self.comm.bcast(shape) + + return shape + + def set_level_direction(self, new_direction): + """ + Set the direction of the vertical level values. + + Parameters + ---------- + new_direction : str + The new direction for the vertical levels. Must be either "up" or "down". + + Returns + ------- + bool + True if the direction was set successfully. + + Raises + ------ + ValueError + If `new_direction` is not "up" or "down". + """ + if new_direction not in ["up", "down"]: + raise ValueError(f"Level direction mus be up or down. '{new_direction}' is not a valid option") + if self.master: + self._full_lev["positive"] = new_direction + self.lev["positive"] = new_direction + + return True + + def reverse_level_direction(self): + """ + Reverse the current direction of the vertical level values. + + Returns + ------- + bool + True if the direction was reversed successfully. + """ + if "positive" in self.lev.keys(): + if self.lev["positive"] == "up": + if self.master: + self._full_lev["positive"] = "down" + self.lev["positive"] = "down" + else: + if self.master: + self._full_lev["positive"] = "up" + self.lev["positive"] = "up" + return True + + def clear_communicator(self): + """ + Erase the communicator and the parallelization indexes. + """ + + self.comm = None + self.rank = 0 + self.master = 0 + self.size = 0 + + return None + + def set_communicator(self, comm): + """ + Set a new communicator and the correspondent parallelization indexes. + + Parameters + ---------- + comm: MPI.COMM + Communicator to be set. + """ + + self.comm = comm + self.rank = self.comm.Get_rank() + self.master = self.rank == 0 + self.size = self.comm.Get_size() + + self.read_axis_limits = self._get_read_axis_limits() + self.write_axis_limits = self._get_write_axis_limits() + + return None + + def set_climatology(self, is_climatology): + """ + Set whether the dataset represents climatological data. + + Parameters + ---------- + is_climatology : bool + A boolean indicating if the dataset represents climatological data. + + Returns + ------- + None + + Raises + ------ + TypeError + If `is_climatology` is not a boolean. + """ + if not isinstance(is_climatology, bool): + raise TypeError("Only boolean values are accepted") + self._climatology = is_climatology + return None + + def get_climatology(self): + """ + Get whether the dataset represents climatological data. + + Returns + ------- + bool + True if the dataset represents climatological data, False otherwise. + """ + return self._climatology + + def set_levels(self, levels): + """ + Modify the original level values with new ones. + + Parameters + ---------- + levels : dict + Dictionary with the new level information to be set. + """ + self.set_full_levels(deepcopy(levels)) + self.lev = deepcopy(levels) + + return None + + def set_time(self, time_list): + """ + Modify the original level values with new ones. + + Parameters + ---------- + time_list : List[datetime] + List of time steps + """ + if self.parallel_method == "T": + raise TypeError("Cannot set time on a 'T' parallel method") + self.set_full_times(deepcopy(time_list)) + self.time = deepcopy(time_list) + + return None + + def set_time_bnds(self, time_bnds): + """ + Modify the original time bounds values with new ones. + + Parameters + ---------- + time_bnds : List + AList with the new time bounds information to be set. + """ + + correct_format = True + for time_bnd in array(time_bnds).flatten(): + if not isinstance(time_bnd, datetime): + print("{0} is not a datetime object".format(time_bnd)) + correct_format = False + if correct_format: + if len(self.get_full_times()) == len(time_bnds): + self.set_full_time_bnds(deepcopy(time_bnds)) + self.time_bnds = deepcopy(time_bnds) + else: + msg = "WARNING!!! " + msg += "The given time bounds list has a different length than the time array. " + msg += "(time:{0}, bnds:{1}). Time bounds will not be set.".format(len(self.time), len(time_bnds)) + warn(msg) + sys.stderr.flush() + else: + msg = "WARNING!!! " + msg += "There is at least one element in the time bounds to be set that is not a datetime object. " + msg += "Time bounds will not be set." + warn(msg) + sys.stderr.flush() + + return None + + def set_time_resolution(self, new_resolution): + """ + Set the time resolution for the dataset. + + Parameters + ---------- + new_resolution : str + The new time resolution. Accepted values are "second", "seconds", "minute", "minutes", + "hour", "hours", "day", "days". + + Returns + ------- + bool + True if the time resolution was set successfully. + + Raises + ------ + ValueError + If `new_resolution` is not one of the accepted values. + """ + accepted_resolutions = ["second", "seconds", "minute", "minutes", "hour", "hours", "day", "days"] + if new_resolution in accepted_resolutions: + self._time_resolution = new_resolution + else: + raise ValueError(f"Time resolution '{new_resolution}' is not accepted. " + + f"Use one of this: {accepted_resolutions}") + return True + + @staticmethod + def _create_single_spatial_bounds(coordinates, inc, spatial_nv=2, inverse=False): + """ + Calculate the vertices coordinates. + + Parameters + ---------- + coordinates : array + Coordinates in degrees (latitude or longitude). + inc : float + Increment between centre values. + spatial_nv : int + Non-mandatory parameter that informs the number of vertices that the boundaries must have. Default: 2. + inverse : bool + For some grid latitudes. + + Returns + ---------- + bounds : array + An Array with as many elements as vertices for each value of coords. + """ + + # Create new arrays moving the centres half increment less and more. + coords_left = coordinates - inc / 2 + coords_right = coordinates + inc / 2 + + # Defining the number of corners needed. 2 to regular grids and 4 for irregular ones. + if spatial_nv == 2: + # Create an array of N arrays of 2 elements to store the floor and the ceil values for each cell + bounds = dstack((coords_left, coords_right)) + bounds = bounds.reshape((len(coordinates), spatial_nv)) + elif spatial_nv == 4: + # Create an array of N arrays of 4 elements to store the corner values for each cell + # It can be stored in clockwise starting form the left-top element, or in inverse mode. + if inverse: + bounds = dstack((coords_left, coords_left, coords_right, coords_right)) + else: + bounds = dstack((coords_left, coords_right, coords_right, coords_left)) + else: + raise ValueError("The number of vertices of the boundaries must be 2 or 4.") + + return bounds + + def create_spatial_bounds(self): + """ + Calculate longitude and latitude bounds and set them. + """ + # Latitudes + full_lat = self.get_full_latitudes() + inc_lat = abs(mean(diff(full_lat["data"]))) + lat_bnds = self._create_single_spatial_bounds(full_lat["data"], inc_lat, spatial_nv=2) + + self.set_full_latitudes_boundaries({"data": deepcopy(lat_bnds)}) + self.lat_bnds = {"data": lat_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], :]} + + # Longitudes + full_lon = self.get_full_longitudes() + inc_lon = abs(mean(diff(full_lon["data"]))) + lon_bnds = self._create_single_spatial_bounds(full_lon["data"], inc_lon, spatial_nv=2) + + self.set_full_longitudes_boundaries({"data": deepcopy(lon_bnds)}) + self.lon_bnds = {"data": lon_bnds[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :]} + + return None + + def get_spatial_bounds_mesh_format(self): + """ + Get the spatial bounds in the pcolormesh format: + + see: https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.pcolormesh.html + + Returns + ------- + lon_bnds_mesh : numpy.ndarray + Longitude boundaries in the mesh format + lat_bnds_mesh : numpy.ndarray + Latitude boundaries in the mesh format + """ + if self.size > 1: + raise RuntimeError("NES.get_spatial_bounds_mesh_format() function only works in serial mode.") + if self.lat_bnds is None: + self.create_spatial_bounds() + + if self.lat_bnds["data"].shape[-1] == 2: + # get the lat_b and lon_b first rows + lat_b_0 = append(self.lat_bnds["data"][:, 0], self.lat_bnds["data"][-1, -1]) + lon_b_0 = append(self.lon_bnds["data"][:, 0], self.lon_bnds["data"][-1, -1]) + # expand lat_band lon_b in 2D + lat_bnds_mesh = tile(lat_b_0, (len(self.lon["data"]) + 1, 1)).transpose() + lon_bnds_mesh = tile(lon_b_0, (len(self.lat["data"]) + 1, 1)) + + elif self.lat_bnds["data"].shape[-1] == 4: + # Irregular quadrilateral polygon cell definition + lat_bnds_mesh = empty((self.lat["data"].shape[0] + 1, self.lat["data"].shape[1] + 1)) + lat_bnds_mesh[:-1, :-1] = self.lat_bnds["data"][:, :, 0] + lat_bnds_mesh[:-1, 1:] = self.lat_bnds["data"][:, :, 1] + lat_bnds_mesh[1:, 1:] = self.lat_bnds["data"][:, :, 2] + lat_bnds_mesh[1:, :-1] = self.lat_bnds["data"][:, :, 3] + + lon_bnds_mesh = empty((self.lat["data"].shape[0] + 1, self.lat["data"].shape[1] + 1)) + lon_bnds_mesh[:-1, :-1] = self.lon_bnds["data"][:, :, 0] + lon_bnds_mesh[:-1, 1:] = self.lon_bnds["data"][:, :, 1] + lon_bnds_mesh[1:, 1:] = self.lon_bnds["data"][:, :, 2] + lon_bnds_mesh[1:, :-1] = self.lon_bnds["data"][:, :, 3] + else: + raise RuntimeError("Invalid number of vertices: {0}".format(self.lat_bnds["data"].shape[-1])) + + return lon_bnds_mesh, lat_bnds_mesh + + def free_vars(self, var_list): + """ + Erase the selected variables from the variables' information. + + Parameters + ---------- + var_list : List or str + List (or single string) of the variables to be loaded. + """ + + if isinstance(var_list, str): + var_list = [var_list] + + if self.variables is not None: + for var_name in var_list: + if var_name in self.variables: + if "data" in self.variables[var_name].keys(): + del self.variables[var_name]["data"] + del self.variables[var_name] + collect() + + return None + + def keep_vars(self, var_list): + """ + Keep the selected variables and erases the rest. + + Parameters + ---------- + var_list : List or str + List (or single string) of the variables to be loaded. + """ + + if isinstance(var_list, str): + var_list = [var_list] + + to_remove = list(set(self.variables.keys()).difference(set(var_list))) + + self.free_vars(to_remove) + + return None + + @property + def get_time_interval(self): + """ + Calculate the interrval of hours between time steps. + + Returns + ------- + int + Number of hours between time steps. + """ + if self.master: + time_interval = self._full_time[1] - self._full_time[0] + time_interval = int(time_interval.seconds // 3600) + else: + time_interval = None + + return self.comm.bcast(time_interval) + + def sel_time(self, time, inplace=True): + """ + To select only one time step. + + Parameters + ---------- + time : datetime + Time stamp to select. + inplace : bool + Indicates if you want a copy with the selected time step (False) or to modify te existing one (True). + + Returns + ------- + Nes + A Nes object with the data (and metadata) of the selected time step. + """ + + if not inplace: + aux_nessy = self.copy(copy_vars=False) + aux_nessy.comm = self.comm + else: + aux_nessy = self + + aux_nessy.hours_start = 0 + aux_nessy.hours_end = 0 + + idx_time = aux_nessy.time.index(time) + + aux_nessy.time = [self.time[idx_time]] + aux_nessy._full_time = aux_nessy.time + for var_name, var_info in self.variables.items(): + if copy: + aux_nessy.variables[var_name] = {} + for att_name, att_value in var_info.items(): + if att_name == "data": + if att_value is None: + raise ValueError("{} data not loaded".format(var_name)) + aux_nessy.variables[var_name][att_name] = att_value[[idx_time]] + else: + aux_nessy.variables[var_name][att_name] = att_value + else: + aux_nessy.variables[var_name]["data"] = aux_nessy.variables[var_name]["data"][[idx_time]] + + return aux_nessy + + def sel(self, hours_start=None, time_min=None, hours_end=None, time_max=None, lev_min=None, lev_max=None, + lat_min=None, lat_max=None, lon_min=None, lon_max=None): + """ + Select a slice of time, vertical level, latitude, or longitude given minimum and maximum limits. + + Parameters + ---------- + hours_start : int, optional + The number of hours from the start to begin the selection. + time_min : datetime, optional + The minimum datetime for the time selection. Mutually exclusive with `hours_start`. + hours_end : int, optional + The number of hours from the end to end the selection. + time_max : datetime, optional + The maximum datetime for the time selection. Mutually exclusive with `hours_end`. + lev_min : int, optional + The minimum vertical level index for the selection. + lev_max : int, optional + The maximum vertical level index for the selection. + lat_min : float, optional + The minimum latitude for the selection. + lat_max : float, optional + The maximum latitude for the selection. + lon_min : float, optional + The minimum longitude for the selection. + lon_max : float, optional + The maximum longitude for the selection. + + Returns + ------- + None + + Raises + ------ + ValueError + If any variables are already loaded or if mutually exclusive parameters are both provided. + + Notes + ----- + This method updates the selection criteria for the dataset and recalculates the read and write axis limits + accordingly. It also updates the time, level, latitude, and longitude slices based on the new criteria. + """ + full_time = self.get_full_times() + loaded_vars = False + for var_info in self.variables.values(): + if var_info["data"] is not None: + loaded_vars = True + if loaded_vars: + raise ValueError("Some variables have been loaded. Use select function before load.") + + # First time filter + if hours_start is not None: + if time_min is not None: + raise ValueError("Choose to select by hours_start or time_min but not both") + self.hours_start = hours_start + elif time_min is not None: + if time_min <= full_time[0]: + self.hours_start = 0 + else: + self.hours_start = int((time_min - full_time[0]).total_seconds() // 3600) + + # Last time filter + if hours_end is not None: + if time_max is not None: + raise ValueError("Choose to select by hours_end or time_max but not both") + self.hours_end = hours_end + elif time_max is not None: + if time_max >= full_time[-1]: + self.hours_end = 0 + else: + self.hours_end = int((full_time[-1] - time_max).total_seconds() // 3600) + + # Level filter + self.first_level = lev_min + self.last_level = lev_max + + # Coordinate filter + self.lat_min = lat_min + self.lat_max = lat_max + self.lon_min = lon_min + self.lon_max = lon_max + + # New axis limits + self.read_axis_limits = self._get_read_axis_limits() + + # Dimensions screening + self.time = self.get_full_times()[self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"]] + self.time_bnds = self.get_full_time_bnds() + self.lev = self._get_coordinate_values(self.get_full_levels(), "Z") + self.lat = self._get_coordinate_values(self.get_full_latitudes(), "Y") + self.lon = self._get_coordinate_values(self.get_full_longitudes(), "X") + + self.lat_bnds = self._get_coordinate_values(self.get_full_latitudes_boundaries(), "Y", bounds=True) + self.lon_bnds = self._get_coordinate_values(self.get_full_longitudes_boundaries(), "X", bounds=True) + + # Filter dimensions + self._filter_coordinates_selection() + + # Removing complete coordinates + self.write_axis_limits = self._get_write_axis_limits() + + return None + + def _filter_coordinates_selection(self): + """ + Use the selection limits to filter time, lev, lat, lon, lon_bnds and lat_bnds. + """ + + idx = self._get_idx_intervals() + + if self.master: + self._full_time = self._full_time[idx["idx_t_min"]:idx["idx_t_max"]] + self._full_lev["data"] = self._full_lev["data"][idx["idx_z_min"]:idx["idx_z_max"]] + + if len(self._full_lat["data"].shape) == 1: + # Regular projection + self._full_lat["data"] = self._full_lat["data"][idx["idx_y_min"]:idx["idx_y_max"]] + self._full_lon["data"] = self._full_lon["data"][idx["idx_x_min"]:idx["idx_x_max"]] + + if self._full_lat_bnds is not None: + self._full_lat_bnds["data"] = self._full_lat_bnds["data"][idx["idx_y_min"]:idx["idx_y_max"], :] + if self._full_lon_bnds is not None: + self._full_lon_bnds["data"] = self._full_lon_bnds["data"][idx["idx_x_min"]:idx["idx_x_max"], :] + else: + # Irregular projections + self._full_lat["data"] = self._full_lat["data"][idx["idx_y_min"]:idx["idx_y_max"], + idx["idx_x_min"]:idx["idx_x_max"]] + self._full_lon["data"] = self._full_lon["data"][idx["idx_y_min"]:idx["idx_y_max"], + idx["idx_x_min"]:idx["idx_x_max"]] + + if self._full_lat_bnds is not None: + self._full_lat_bnds["data"] = self._full_lat_bnds["data"][idx["idx_y_min"]:idx["idx_y_max"], + idx["idx_x_min"]:idx["idx_x_max"], :] + if self._full_lon_bnds is not None: + self._full_lon_bnds["data"] = self._full_lon_bnds["data"][idx["idx_y_min"]:idx["idx_y_max"], + idx["idx_x_min"]:idx["idx_x_max"], :] + + self.hours_start = 0 + self.hours_end = 0 + self.last_level = None + self.first_level = None + self.lat_min = None + self.lat_max = None + self.lon_max = None + self.lon_min = None + + return None + + def _get_projection_data(self, create_nes, **kwargs): + """ + Retrieves projection data based on grid details. + + Parameters + ---------- + create_nes : bool + Flag indicating whether to create new object (True) or use existing (False). + **kwargs : dict + Additional keyword arguments for specifying projection details. + """ + + raise NotImplementedError("Must be implemented on inner class.") + + @staticmethod + def _get_pyproj_projection(): + """ + Retrieves Pyproj projection data based on grid details. + + """ + + raise NotImplementedError("Must be implemented on inner class.") + + def _get_idx_intervals(self): + """ + Calculate the index intervals + + Returns + ------- + dict + Dictionary with the index intervals + """ + full_lat = self.get_full_latitudes() + full_lon = self.get_full_longitudes() + idx = {"idx_t_min": self._get_time_id(self.hours_start, first=True), + "idx_t_max": self._get_time_id(self.hours_end, first=False), + "idx_z_min": self.first_level, + "idx_z_max": self.last_level} + + # Axis Y + if self.lat_min is None: + idx["idx_y_min"] = 0 + else: + idx["idx_y_min"] = self._get_coordinate_id(full_lat["data"], self.lat_min, axis=0) + if self.lat_max is None: + idx["idx_y_max"] = full_lat["data"].shape[0] + else: + idx["idx_y_max"] = self._get_coordinate_id(full_lat["data"], self.lat_max, axis=0) + 1 + + if idx["idx_y_min"] > idx["idx_y_max"]: + idx_aux = copy(idx["idx_y_min"]) + idx["idx_y_min"] = idx["idx_y_max"] + idx["idx_y_max"] = idx_aux + + # Axis X + + if self.lon_min is None: + idx["idx_x_min"] = 0 + else: + if len(full_lon["data"].shape) == 1: + axis = 0 + else: + axis = 1 + idx["idx_x_min"] = self._get_coordinate_id(full_lon["data"], self.lon_min, axis=axis) + if self.lon_max is None: + idx["idx_x_max"] = full_lon["data"].shape[-1] + else: + if len(full_lon["data"].shape) == 1: + axis = 0 + else: + axis = 1 + idx["idx_x_max"] = self._get_coordinate_id(full_lon["data"], self.lon_max, axis=axis) + 1 + + if idx["idx_x_min"] > idx["idx_x_max"]: + idx_aux = copy(idx["idx_x_min"]) + idx["idx_x_min"] = idx["idx_x_max"] + idx["idx_x_max"] = idx_aux + return idx + + # ================================================================================================================== + # Statistics + # ================================================================================================================== + + def last_time_step(self): + """ + Modify variables to keep only the last time step. + """ + + if self.parallel_method == "T": + raise NotImplementedError("Statistics are not implemented on time axis parallelization method.") + aux_time = self.get_full_times()[0].replace(hour=0, minute=0, second=0, microsecond=0) + self.set_full_times([aux_time]) + self.time = [aux_time] + + for var_name, var_info in self.variables.items(): + if var_info["data"] is None: + self.load(var_name) + aux_data = var_info["data"][-1, :] + if len(aux_data.shape) == 3: + aux_data = aux_data.reshape((1, aux_data.shape[0], aux_data.shape[1], aux_data.shape[2])) + self.variables[var_name]["data"] = aux_data + self.hours_start = 0 + self.hours_end = 0 + + return None + + def daily_statistic(self, op, type_op="calendar"): + """ + Calculate daily statistic. + + Parameters + ---------- + op : str + Statistic to perform. Accepted values: "max", "mean" and "min". + type_op : str + Type of statistic to perform. Accepted values: "calendar", "alltsteps", and "withoutt0". + - "calendar": Calculate the statistic using the time metadata. It will avoid single time step by day + calculations + - "alltsteps": Calculate a single time statistic with all the time steps. + - "withoutt0": Calculate a single time statistic with all the time steps avoiding the first one. + """ + + if self.parallel_method == "T": + raise NotImplementedError("Statistics are not implemented on time axis parallel method.") + time_interval = self.get_time_interval + if type_op == "calendar": + aux_time_bounds = [] + aux_time = [] + day_list = [date_aux.day for date_aux in self.time] + for var_name, var_info in self.variables.items(): + if var_info["data"] is None: + self.load(var_name) + stat_data = None + for day in unique(day_list): + idx_first = next(i for i, val in enumerate(day_list, 0) if val == day) + idx_last = len(day_list) - next(i for i, val in enumerate(reversed(day_list), 1) if val == day) + if idx_first != idx_last: # To avoid single time step statistic + if idx_last != len(day_list): + if op == "mean": + data_aux = var_info["data"][idx_first:idx_last + 1, :, :, :].mean(axis=0) + elif op == "max": + data_aux = var_info["data"][idx_first:idx_last + 1, :, :, :].max(axis=0) + elif op == "min": + data_aux = var_info["data"][idx_first:idx_last + 1, :, :, :].min(axis=0) + else: + raise NotImplementedError(f"Statistic operation '{op}' is not implemented.") + aux_time_bounds.append([self.time[idx_first], self.time[idx_last]]) + else: + if op == "mean": + data_aux = var_info["data"][idx_first:, :, :, :].mean(axis=0) + elif op == "max": + data_aux = var_info["data"][idx_first:, :, :, :].max(axis=0) + elif op == "min": + data_aux = var_info["data"][idx_first:, :, :, :].min(axis=0) + else: + raise NotImplementedError(f"Statistic operation '{op}' is not implemented.") + aux_time_bounds.append([self.time[idx_first], self.time[-1]]) + + data_aux = data_aux.reshape((1, data_aux.shape[0], data_aux.shape[1], data_aux.shape[2])) + aux_time.append(self.time[idx_first].replace(hour=0, minute=0, second=0)) + # Append over time dimension + if stat_data is None: + stat_data = data_aux.copy() + else: + stat_data = vstack([stat_data, data_aux]) + self.variables[var_name]["data"] = stat_data + self.variables[var_name]["cell_methods"] = "time: {0} (interval: {1}hr)".format(op, time_interval) + self.time = aux_time + self.set_full_times(self.time) + + self.set_time_bnds(aux_time_bounds) + + elif type_op == "alltsteps": + for var_name, var_info in self.variables.items(): + if var_info["data"] is None: + self.load(var_name) + if op == "mean": + aux_data = var_info["data"].mean(axis=0) + elif op == "max": + aux_data = var_info["data"].max(axis=0) + elif op == "min": + aux_data = var_info["data"].min(axis=0) + else: + raise NotImplementedError(f"Statistic operation '{op}' is not implemented.") + if len(aux_data.shape) == 3: + aux_data = aux_data.reshape((1, aux_data.shape[0], aux_data.shape[1], aux_data.shape[2])) + self.variables[var_name]["data"] = aux_data + self.variables[var_name]["cell_methods"] = "time: {0} (interval: {1}hr)".format(op, time_interval) + + aux_time = self.time[0].replace(hour=0, minute=0, second=0, microsecond=0) + aux_time_bounds = [[self.time[0], self.time[-1]]] + self.time = [aux_time] + self.set_full_times(self.time) + + self.set_time_bnds(aux_time_bounds) + + elif type_op == "withoutt0": + for var_name, var_info in self.variables.items(): + if var_info["data"] is None: + self.load(var_name) + if op == "mean": + aux_data = var_info["data"][1:, :].mean(axis=0) + elif op == "max": + aux_data = var_info["data"][1:, :].max(axis=0) + elif op == "min": + aux_data = var_info["data"][1:, :].min(axis=0) + else: + raise NotImplementedError(f"Statistic operation '{op}' is not implemented.") + if len(aux_data.shape) == 3: + aux_data = aux_data.reshape((1, aux_data.shape[0], aux_data.shape[1], aux_data.shape[2])) + self.variables[var_name]["data"] = aux_data + self.variables[var_name]["cell_methods"] = "time: {0} (interval: {1}hr)".format(op, time_interval) + full_time = self.get_full_times() + aux_time = full_time[1].replace(hour=0, minute=0, second=0, microsecond=0) + aux_time_bounds = [[full_time[1], full_time[-1]]] + self.time = [aux_time] + self.set_full_times(self.time) + + self.set_time_bnds(aux_time_bounds) + else: + raise NotImplementedError(f"Statistic operation type '{type_op}' is not implemented.") + self.hours_start = 0 + self.hours_end = 0 + + return None + + @staticmethod + def _get_axis_index_(axis): + + if axis == "T": + value = 0 + elif axis == "Z": + value = 1 + elif axis == "Y": + value = 2 + elif axis == "X": + value = 3 + else: + raise ValueError("Unknown axis: {0}".format(axis)) + + return value + + def sum_axis(self, axis="Z"): + + if self.parallel_method == axis: + raise NotImplementedError( + f"It is not possible to sum the axis with it is parallelized '{self.parallel_method}'") + + for var_name, var_info in self.variables.items(): + if var_info["data"] is not None: + self.variables[var_name]["data"] = self.variables[var_name]["data"].sum( + axis=self._get_axis_index_(axis), keepdims=True) + if axis == "T": + self.variables[var_name]["cell_methods"] = "time: sum (interval: {0}hr)".format( + (self.time[-1] - self.time[0]).total_seconds() // 3600) + + if axis == "T": + self.set_time_bnds([self.time[0], self.time[-1]]) + self.time = [self.time[0]] + self.set_full_times([self.time[0]]) + if axis == "Z": + self.lev["data"] = array([self.lev["data"][0]]) + self.set_full_levels(self.lev) + + return None + + def find_time_id(self, time): + """ + Find index of time in time array. + + Parameters + ---------- + time : datetime + Time element. + + Returns + ------- + int + Index of time element. + """ + + if time in self.time: + return self.time.index(time) + + def rolling_mean(self, var_list=None, hours=8): + """ + Calculate rolling mean for given hours + + Parameters + ---------- + var_list : : List, str, None + List (or single string) of the variables to be loaded. + hours : int, optional + Window hours to calculate rolling mean, by default 8 + + Returns + ------- + Nes + A Nes object + """ + + if self.parallel_method == "T": + raise NotImplementedError("The rolling mean cannot be calculated using the time axis parallel method.") + + aux_nessy = self.copy(copy_vars=False) + aux_nessy.set_communicator(self.comm) + + if isinstance(var_list, str): + var_list = [var_list] + elif var_list is None: + var_list = list(self.variables.keys()) + + for var_name in var_list: + # Load variables if they have not been loaded previously + if self.variables[var_name]["data"] is None: + self.load(var_name) + + # Get original file shape + nessy_shape = self.variables[var_name]["data"].shape + + # Initialise array + aux_nessy.variables[var_name] = {} + aux_nessy.variables[var_name]["data"] = empty(shape=nessy_shape) + aux_nessy.variables[var_name]["dimensions"] = deepcopy(self.variables[var_name]["dimensions"]) + + for curr_time in self.time: + # Get previous time given a set of hours + prev_time = curr_time - timedelta(hours=(hours-1)) + + # Get time indices + curr_time_id = self.find_time_id(curr_time) + prev_time_id = self.find_time_id(prev_time) + + # Get mean if previous time is available + if prev_time_id is not None: + if self.info: + print(f"Calculating mean between {prev_time} and {curr_time}.") + aux_nessy.variables[var_name]["data"][curr_time_id, :, :, :] = self.variables[var_name]["data"][ + prev_time_id:curr_time_id, :, :, :].mean(axis=0, keepdims=True) + # Fill with nan if previous time is not available + else: + if self.info: + msg = f"Mean between {prev_time} and {curr_time} cannot be calculated " + msg += f"because data for {prev_time} is not available." + print(msg) + aux_nessy.variables[var_name]["data"][curr_time_id, :, :, :] = full( + shape=(1, nessy_shape[1], nessy_shape[2], nessy_shape[3]), fill_value=nan) + + return aux_nessy + + # ================================================================================================================== + # Reading + # ================================================================================================================== + + def _get_read_axis_limits(self): + """ + Calculate the 4D reading axis limits depending on if them have to balanced or not. + + Returns + ------- + dict + Dictionary with the 4D limits of the rank data to read. + t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max. + """ + + if self.balanced: + return self._get_read_axis_limits_balanced() + else: + return self._get_read_axis_limits_unbalanced() + + def _get_read_axis_limits_unbalanced(self): + """ + Calculate the 4D reading axis limits. + + Returns + ------- + dict + Dictionary with the 4D limits of the rank data to read. + t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max. + """ + + axis_limits = {"x_min": None, "x_max": None, + "y_min": None, "y_max": None, + "z_min": None, "z_max": None, + "t_min": None, "t_max": None} + + idx = self._get_idx_intervals() + if self.parallel_method == "Y": + y_len = idx["idx_y_max"] - idx["idx_y_min"] + if y_len < self.size: + raise IndexError("More processors (size={0}) selected than Y elements (size={1})".format( + self.size, y_len)) + axis_limits["y_min"] = ((y_len // self.size) * self.rank) + idx["idx_y_min"] + if self.rank + 1 < self.size: + axis_limits["y_max"] = ((y_len // self.size) * (self.rank + 1)) + idx["idx_y_min"] + else: + axis_limits["y_max"] = idx["idx_y_max"] + + # Non parallel filters + axis_limits["x_min"] = idx["idx_x_min"] + axis_limits["x_max"] = idx["idx_x_max"] + + axis_limits["t_min"] = idx["idx_t_min"] + axis_limits["t_max"] = idx["idx_t_max"] + + elif self.parallel_method == "X": + x_len = idx["idx_x_max"] - idx["idx_x_min"] + if x_len < self.size: + raise IndexError("More processors (size={0}) selected than X elements (size={1})".format( + self.size, x_len)) + axis_limits["x_min"] = ((x_len // self.size) * self.rank) + idx["idx_x_min"] + if self.rank + 1 < self.size: + axis_limits["x_max"] = ((x_len // self.size) * (self.rank + 1)) + idx["idx_x_min"] + else: + axis_limits["x_max"] = idx["idx_x_max"] + + # Non parallel filters + axis_limits["y_min"] = idx["idx_y_min"] + axis_limits["y_max"] = idx["idx_y_max"] + + axis_limits["t_min"] = idx["idx_t_min"] + axis_limits["t_max"] = idx["idx_t_max"] + + elif self.parallel_method == "T": + t_len = idx["idx_t_max"] - idx["idx_t_min"] + if t_len < self.size: + raise IndexError("More processors (size={0}) selected than T elements (size={1})".format( + self.size, t_len)) + axis_limits["t_min"] = ((t_len // self.size) * self.rank) + idx["idx_t_min"] + if self.rank + 1 < self.size: + axis_limits["t_max"] = ((t_len // self.size) * (self.rank + 1)) + idx["idx_t_min"] + + # Non parallel filters + axis_limits["y_min"] = idx["idx_y_min"] + axis_limits["y_max"] = idx["idx_y_max"] + + axis_limits["x_min"] = idx["idx_x_min"] + axis_limits["x_max"] = idx["idx_x_max"] + + else: + raise NotImplementedError("Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( + meth=self.parallel_method, accept=["X", "Y", "T"])) + + # Vertical levels selection: + axis_limits["z_min"] = self.first_level + if self.last_level == -1 or self.last_level is None: + self.last_level = None + elif self.last_level + 1 == len(self.get_full_levels()["data"]): + self.last_level = None + else: + self.last_level += 1 + axis_limits["z_max"] = self.last_level + + return axis_limits + + def _get_read_axis_limits_balanced(self): + """ + Calculate the 4D reading balanced axis limits. + + Returns + ------- + dict + Dictionary with the 4D limits of the rank data to read. + t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max. + """ + idx = self._get_idx_intervals() + + fid_dist = {} + if self.parallel_method == "Y": + len_to_split = idx["idx_y_max"] - idx["idx_y_min"] + if len_to_split < self.size: + raise IndexError("More processors (size={0}) selected than Y elements (size={1})".format( + self.size, len_to_split)) + min_axis = "y_min" + max_axis = "y_max" + to_add = idx["idx_y_min"] + + elif self.parallel_method == "X": + len_to_split = idx["idx_x_max"] - idx["idx_x_min"] + if len_to_split < self.size: + raise IndexError("More processors (size={0}) selected than X elements (size={1})".format( + self.size, len_to_split)) + min_axis = "x_min" + max_axis = "x_max" + to_add = idx["idx_x_min"] + elif self.parallel_method == "T": + len_to_split = idx["idx_t_max"] - idx["idx_t_min"] + if len_to_split < self.size: + raise IndexError(f"More processors (size={self.size}) selected than T elements (size={len_to_split})") + min_axis = "t_min" + max_axis = "t_max" + to_add = idx["idx_t_min"] + else: + raise NotImplementedError("Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( + meth=self.parallel_method, accept=["X", "Y", "T"])) + + procs_len = len_to_split // self.size + procs_rows_extended = len_to_split - (procs_len * self.size) + + rows_sum = 0 + for proc in range(self.size): + fid_dist[proc] = {"x_min": 0, "x_max": None, + "y_min": 0, "y_max": None, + "z_min": 0, "z_max": None, + "t_min": 0, "t_max": None} + if proc < procs_rows_extended: + aux_rows = procs_len + 1 + else: + aux_rows = procs_len + + len_to_split -= aux_rows + if len_to_split < 0: + rows = len_to_split + aux_rows + else: + rows = aux_rows + + fid_dist[proc][min_axis] = rows_sum + fid_dist[proc][max_axis] = rows_sum + rows + + if to_add is not None: + fid_dist[proc][min_axis] += to_add + fid_dist[proc][max_axis] += to_add + + # # Last element + # if len_to_split == 0 and to_add == 0: + # fid_dist[proc][max_axis] = None + + rows_sum += rows + + axis_limits = fid_dist[self.rank] + + # Non parallel filters + if self.parallel_method != "T": + axis_limits["t_min"] = idx["idx_t_min"] + axis_limits["t_max"] = idx["idx_t_max"] + if self.parallel_method != "X": + axis_limits["x_min"] = idx["idx_x_min"] + axis_limits["x_max"] = idx["idx_x_max"] + if self.parallel_method != "Y": + axis_limits["y_min"] = idx["idx_y_min"] + axis_limits["y_max"] = idx["idx_y_max"] + + # Vertical levels selection: + axis_limits["z_min"] = self.first_level + if self.last_level == -1 or self.last_level is None: + self.last_level = None + elif self.last_level + 1 == len(self.get_full_levels()["data"]): + self.last_level = None + else: + self.last_level += 1 + axis_limits["z_max"] = self.last_level + + return axis_limits + + def _get_time_id(self, hours, first=True): + """ + Get the index of the corresponding time value. + + Parameters + ---------- + hours : int + Number of hours to avoid. + first : bool + Indicates if you want to avoid from the first hours (True) or from the last (False). + Default: True. + + Returns + ------- + int + Index of the time array. + """ + full_time = self.get_full_times() + + if first: + idx = full_time.index(full_time[0] + timedelta(hours=hours)) + else: + idx = full_time.index(full_time[-1] - timedelta(hours=hours)) + 1 + + return idx + + @staticmethod + def _get_coordinate_id(my_array, value, axis=0): + """ + Get the index of the corresponding coordinate value. + + Parameters + ---------- + my_array : array + An Array with the coordinate data + value : float + Coordinate value to search. + axis : int + Axis where find the value + Default: 0. + + Returns + ------- + int + Index of the coordinate array. + """ + idx = (abs(my_array - value)).argmin(axis=axis).min() + + return idx + + def _open(self): + """ + Open the NetCDF. + """ + + self.dataset = self.__open_netcdf4() + + return None + + def __open_netcdf4(self, mode="r"): + """ + Open the NetCDF with netcdf4-python. + + Parameters + ---------- + mode : str + Inheritance from mode parameter from https://unidata.github.io/netcdf4-python/#Dataset.__init__ + Default: "r" (read-only). + Returns + ------- + netcdf : Dataset + Open dataset. + """ + + if self.size == 1: + netcdf = Dataset(self.__ini_path, format="NETCDF4", mode=mode, parallel=False) + else: + netcdf = Dataset(self.__ini_path, format="NETCDF4", mode=mode, parallel=True, comm=self.comm, + info=MPI.Info()) + self.dataset = netcdf + + return netcdf + + def close(self): + """ + Close the NetCDF with netcdf4-python. + """ + if (hasattr(self, "serial_nc")) and (self.serial_nc is not None): + if self.master: + self.serial_nc.close() + self.serial_nc = None + if (hasattr(self, "dataset")) and (self.dataset is not None): + self.dataset.close() + self.dataset = None + + return None + + @staticmethod + def __get_dates_from_months(time): + """ + Calculates the number of days since the first date + in the "time" list and store in new list: + This is useful when the units are "months since", + which cannot be transformed to dates using "num2date". + + Parameter + --------- + time: List[datetime] + Original time. + + Returns + ------- + time: List + CF compliant time. + """ + + start_date_str = time.units.split("since")[1].lstrip() + start_date = datetime(int(start_date_str[0:4]), int(start_date_str[5:7]), int(start_date_str[8:10])) + + new_time_deltas = [] + + for month_delta in time[:]: + # Transform current_date into number of days since base date + current_date = start_date + relativedelta(months=month_delta) + + # Calculate number of days between base date and the other dates + n_days = int((current_date - start_date).days) + + # Store in list + new_time_deltas.append(n_days) + + return new_time_deltas + + def __parse_time(self, time): + """ + Parses the time to be CF compliant. + + Parameters + ---------- + time: Namespace + Original time. + + Returns + ------- + time : str + CF compliant time. + """ + + units = self.__parse_time_unit(time.units) + + if not hasattr(time, "calendar"): + calendar = "standard" + else: + calendar = time.calendar + + if "months since" in time.units: + units = "days since " + time.units.split("since")[1].lstrip() + time = self.__get_dates_from_months(time) + + time_data = time[:] + + if len(time_data) == 1 and isnan(time_data[0]): + time_data[0] = 0 + + return time_data, units, calendar + + @staticmethod + def __parse_time_unit(t_units): + """ + Parses the time units to be CF compliant. + + Parameters + ---------- + t_units : str + Original time units. + + Returns + ------- + t_units : str + CF compliant time units. + """ + + if "h @" in t_units: + t_units = "hours since {0}-{1}-{2} {3}:{4}:{5} UTC".format( + t_units[4:8], t_units[8:10], t_units[10:12], t_units[13:15], t_units[15:17], t_units[17:-4]) + + return t_units + + @staticmethod + def __get_time_resolution_from_units(units): + """ + Parses the time units to get the time resolution + + Parameters + ---------- + units : str + Time variable units + + Returns + ------- + str + Time variable resolution + """ + if "day" in units or "days" in units: + resolution = "days" + elif "hour" in units or "hours" in units: + resolution = "hours" + elif "minute" in units or "minutes" in units: + resolution = "minutes" + elif "second" in units or "seconds" in units: + resolution = "seconds" + else: + # Default resolution is "hours" + resolution = "hours" + return resolution + + def __get_time(self): + """ + Get the NetCDF file time values. + + Returns + ------- + time : List[datetime] + List of times (datetime) of the NetCDF data. + """ + if self.master: + nc_var = self.dataset.variables["time"] + time_data, units, calendar = self.__parse_time(nc_var) + # Extracting time resolution depending on the units + self._time_resolution = self.__get_time_resolution_from_units(units) + # Checking if it is a climatology dataset + if hasattr(nc_var, "climatology"): + self._climatology = True + self._climatology_var_name = nc_var.climatology + time = num2date(time_data, units, calendar=calendar) + time = [datetime(year=dt.year, month=dt.month, day=dt.day, hour=dt.hour, minute=dt.minute) for dt in time] + else: + time = None + self.free_vars("time") + + return time + + def __get_time_bnds(self, create_nes=False): + """ + Get the NetCDF time bounds values. + + Parameters + ---------- + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + + Returns + ------- + time_bnds : List + A List of time bounds (datetime) of the NetCDF data. + """ + + if not create_nes: + if self.master: + if "time_bnds" in self.dataset.variables.keys() or self._climatology: + time = self.dataset.variables["time"] + if self._climatology: + nc_var = self.dataset.variables[self._climatology_var_name] + else: + nc_var = self.dataset.variables["time_bnds"] + time_bnds = num2date(nc_var[:], self.__parse_time_unit(time.units), + calendar=time.calendar).tolist() + # Iterate over each inner list + for inner_list in time_bnds: + # Create a new list to store datetime objects + new_inner_list = [] + # Iterate over datetime objects within each inner list + for dt in inner_list: + # Access year, month, day, hour, and minute attributes of datetime objects + new_dt = datetime(year=dt.year, month=dt.month, day=dt.day, hour=dt.hour, minute=dt.minute) + # Append the new datetime object to the new inner list + new_inner_list.append(new_dt) + # Replace the old inner list with the new one + time_bnds[time_bnds.index(inner_list)] = new_inner_list + else: + time_bnds = None + else: + time_bnds = None + else: + time_bnds = None + + self.free_vars("time_bnds") + + return time_bnds + + def __get_coordinates_bnds(self, create_nes=False): + """ + Get the NetCDF coordinates bounds values. + + Parameters + ---------- + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + + Returns + ------- + lat_bnds : dict + Latitude bounds of the NetCDF data. + lon_bnds : dict + Longitude bounds of the NetCDF data. + """ + + if not create_nes: + if self.master: + if "lat_bnds" in self.dataset.variables.keys(): + lat_bnds = {"data": self._unmask_array(self.dataset.variables["lat_bnds"][:])} + else: + lat_bnds = None + + if "lon_bnds" in self.dataset.variables.keys(): + lon_bnds = {"data": self._unmask_array(self.dataset.variables["lon_bnds"][:])} + else: + lon_bnds = None + else: + lat_bnds = None + lon_bnds = None + else: + lat_bnds = None + lon_bnds = None + + self.free_vars(["lat_bnds", "lon_bnds"]) + + return lat_bnds, lon_bnds + + def __get_cell_measures(self, create_nes=False): + """ + Get the NetCDF cell measures values. + + Parameters + ---------- + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + + Returns + ------- + dict + Dictionary of cell measures of the NetCDF data. + """ + + c_measures = {} + if self.master: + if not create_nes: + if "cell_area" in self.dataset.variables.keys(): + c_measures["cell_area"] = {} + c_measures["cell_area"]["data"] = self._unmask_array(self.dataset.variables["cell_area"][:]) + c_measures = self.comm.bcast(c_measures, root=0) + + self.free_vars(["cell_area"]) + + return c_measures + + def _get_coordinate_dimension(self, possible_names): + """ + Read the coordinate dimension data. + + This will read the complete data of the coordinate. + + Parameters + ---------- + possible_names: List, str, list + A List (or single string) of the possible names of the coordinate (e.g. ["lat", "latitude"]). + + Returns + ------- + nc_var : dict + Dictionary with the "data" key with the coordinate variable values. and the attributes as other keys. + """ + + if isinstance(possible_names, str): + possible_names = [possible_names] + + try: + dimension_name = set(possible_names).intersection(set(self.variables.keys())).pop() + if self.master: + nc_var = self.variables[dimension_name].copy() + nc_var["data"] = self.dataset.variables[dimension_name][:] + if hasattr(nc_var, "units"): + if nc_var["units"] in ["unitless", "-"]: + nc_var["units"] = "" + else: + nc_var = None + self.free_vars(dimension_name) + except KeyError: + if self.master: + nc_var = {"data": array([0]), + "units": ""} + else: + nc_var = None + + return nc_var + + def _get_coordinate_values(self, coordinate_info, coordinate_axis, bounds=False): + """ + Get the coordinate data of the current portion. + + Parameters + ---------- + coordinate_info : dict, list + Dictionary with the "data" key with the coordinate variable values. and the attributes as other keys. + coordinate_axis : str + Name of the coordinate to extract. Accepted values: ["Z", "Y", "X"]. + bounds : bool + Boolean variable to know if there are coordinate bounds. + Returns + ------- + values : dict + Dictionary with the portion of data corresponding to the rank. + """ + + if coordinate_info is None: + return None + + if not isinstance(coordinate_info, dict): + values = {"data": deepcopy(coordinate_info)} + else: + values = deepcopy(coordinate_info) + + coordinate_len = len(values["data"].shape) + if bounds: + coordinate_len -= 1 + + if coordinate_axis == "Y": + if coordinate_len == 1: + values["data"] = values["data"][self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"]] + elif coordinate_len == 2: + values["data"] = values["data"][self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + else: + raise NotImplementedError("The coordinate has wrong dimensions: {dim}".format( + dim=values["data"].shape)) + elif coordinate_axis == "X": + if coordinate_len == 1: + values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + elif coordinate_len == 2: + values["data"] = values["data"][self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + else: + raise NotImplementedError("The coordinate has wrong dimensions: {dim}".format( + dim=values["data"].shape)) + elif coordinate_axis == "Z": + if coordinate_len == 1: + values["data"] = values["data"][self.read_axis_limits["z_min"]:self.read_axis_limits["z_max"]] + else: + raise NotImplementedError("The coordinate has wrong dimensions: {dim}".format( + dim=values["data"].shape)) + + return values + + def _get_cell_measures_values(self, cell_measures_info): + """ + Get the cell measures data of the current portion. + + Parameters + ---------- + cell_measures_info : dict, list + Dictionary with the "data" key with the cell measures variable values. and the attributes as other keys. + + Returns + ------- + values : dict + Dictionary with the portion of data corresponding to the rank. + """ + + if cell_measures_info is None: + return None + + cell_measures_values = {} + + for cell_measures_var in cell_measures_info.keys(): + + values = deepcopy(cell_measures_info[cell_measures_var]) + coordinate_len = len(values["data"].shape) + + if coordinate_len == 1: + values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + elif coordinate_len == 2: + values["data"] = values["data"][self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + else: + raise NotImplementedError("The coordinate has wrong dimensions: {dim}".format( + dim=values["data"].shape)) + + cell_measures_values[cell_measures_var] = values + + return cell_measures_values + + def _get_lazy_variables(self): + """ + Get all the variables' information. + + Returns + ------- + variables : dict + Dictionary with the variable name as key and another dictionary as value. + De value dictionary will have the "data" key with None as value and all the variable attributes as the + other keys. + e.g. + {"var_name_1": {"data": None, "attr_1": value_1_1, "attr_2": value_1_2, ...}, + "var_name_2": {"data": None, "attr_1": value_2_1, "attr_2": value_2_2, ...}, + ...} + """ + + if self.master: + variables = {} + # Initialise data + for var_name, var_info in self.dataset.variables.items(): + variables[var_name] = {} + variables[var_name]["data"] = None + variables[var_name]["dimensions"] = var_info.dimensions + variables[var_name]["dtype"] = var_info.dtype + if variables[var_name]["dtype"] in [str, object]: + if self.strlen is None: + self.set_strlen() + variables[var_name]["dtype"] = str + + # Avoid some attributes + for attrname in var_info.ncattrs(): + if attrname not in ["missing_value", "_FillValue", "add_offset", "scale_factor"]: + value = getattr(var_info, attrname) + if str(value) in ["unitless", "-"]: + value = "" + variables[var_name][attrname] = value + else: + variables = None + variables = self.comm.bcast(variables, root=0) + + return variables + + def _read_variable(self, var_name): + """ + Read the corresponding variable data according to the current rank. + + Parameters + ---------- + var_name : str + Name of the variable to read. + + Returns + ------- + data: array + Portion of the variable data corresponding to the rank. + """ + + nc_var = self.dataset.variables[var_name] + var_dims = nc_var.dimensions + + # Read data in 4 dimensions + if len(var_dims) < 2: + data = nc_var[:] + elif len(var_dims) == 2: + data = nc_var[self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + data = data.reshape(1, 1, data.shape[-2], data.shape[-1]) + elif len(var_dims) == 3: + if "strlen" in var_dims: + data = nc_var[self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], + :] + data_aux = empty(shape=(data.shape[0], data.shape[1]), dtype=object) + for lat_n in range(data.shape[0]): + for lon_n in range(data.shape[1]): + data_aux[lat_n, lon_n] = "".join( + data[lat_n, lon_n].tobytes().decode("ascii").replace("\x00", "")) + data = data_aux.reshape((1, 1, data_aux.shape[-2], data_aux.shape[-1])) + else: + data = nc_var[self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], + self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + data = data.reshape(data.shape[-3], 1, data.shape[-2], data.shape[-1]) + elif len(var_dims) == 4: + data = nc_var[self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], + self.read_axis_limits["z_min"]:self.read_axis_limits["z_max"], + self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + elif len(var_dims) == 5: + if "strlen" in var_dims: + data = nc_var[self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], + self.read_axis_limits["z_min"]:self.read_axis_limits["z_max"], + self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], + :] + data_aux = empty(shape=(data.shape[0], data.shape[1], data.shape[2], data.shape[3]), dtype=object) + for time_n in range(data.shape[0]): + for lev_n in range(data.shape[1]): + for lat_n in range(data.shape[2]): + for lon_n in range(data.shape[3]): + data_aux[time_n, lev_n, lat_n, lon_n] = "".join( + data[time_n, lev_n, lat_n, lon_n].tobytes().decode("ascii").replace("\x00", "")) + data = data_aux + else: + raise NotImplementedError("Error with {0}. Only can be read netCDF with 4 dimensions or less".format( + var_name)) + else: + raise NotImplementedError("Error with {0}. Only can be read netCDF with 4 dimensions or less".format( + var_name)) + + # Unmask array + data = self._unmask_array(data) + + return data + + def load(self, var_list=None): + """ + Load of the selected variables. + + That function will fill the variable "data" key with the corresponding values. + + Parameters + ---------- + var_list : List, str, None + List (or single string) of the variables to be loaded. + """ + + if (self.__ini_path is None) and (self.dataset is None): + raise RuntimeError("Only data from existing files can be loaded.") + + if self.dataset is None: + self.__open_netcdf4() + close = True + else: + close = False + + if isinstance(var_list, str): + var_list = [var_list] + elif var_list is None: + var_list = list(self.variables.keys()) + + for i, var_name in enumerate(var_list): + if self.info: + print("Rank {0:03d}: Loading {1} var ({2}/{3})".format(self.rank, var_name, i + 1, len(var_list))) + if self.variables[var_name]["data"] is None: + self.variables[var_name]["data"] = self._read_variable(var_name) + # Data type changes when joining characters in read_variable (S1 to S+strlen) + if "strlen" in self.variables[var_name]["dimensions"]: + if self.strlen is None: + self.set_strlen() + self.variables[var_name]["dtype"] = str + self.variables[var_name]["dimensions"] = tuple([x for x in self.variables[var_name]["dimensions"] + if x != "strlen"]) + else: + if self.master: + print("Data for {0} was previously loaded. Skipping variable.".format(var_name)) + if self.info: + print("Rank {0:03d}: Loaded {1} var ({2})".format( + self.rank, var_name, self.variables[var_name]["data"].shape)) + + if close: + self.close() + + return None + + @staticmethod + def _unmask_array(data): + """ + Missing to nan. This operation is done because sometimes the missing value is lost during the calculation. + + Parameters + ---------- + data : array + Masked array to unmask. + + Returns + ------- + array + Unmasked array. + """ + + if isinstance(data, ma.MaskedArray): + try: + data = data.filled(nan) + except TypeError: + msg = "Data missing values cannot be converted to nan." + warn(msg) + sys.stderr.flush() + + return data + + def to_dtype(self, data_type="float32"): + """ Cast variables data into selected data type. + + Parameters + ---------- + data_type : str or Type + Data type, by default "float32" + """ + + for var_name, var_info in self.variables.items(): + if isinstance(var_info["data"], ndarray): + self.variables[var_name]["data"] = self.variables[var_name]["data"].astype(data_type) + self.variables[var_name]["dtype"] = data_type + + return None + + def concatenate(self, aux_nessy): + """ + Concatenate different variables into the same NES object. + + Parameters + ---------- + aux_nessy : Nes, str + Nes object or str with the path to the NetCDF file that contains the variables to add. + + Returns + ------- + list + A List of var names added. + """ + + if isinstance(aux_nessy, str): + aux_nessy = self.new(path=aux_nessy, comm=self.comm, parallel_method=self.parallel_method, + avoid_first_hours=self.hours_start, avoid_last_hours=self.hours_end, + first_level=self.first_level, last_level=self.last_level) + new = True + else: + new = False + for var_name, var_info in aux_nessy.variables.items(): + if var_info["data"] is None: + aux_nessy.read_axis_limits = self.read_axis_limits + aux_nessy.load(var_name) + + new_vars_added = [] + for new_var_name, new_var_data in aux_nessy.variables.items(): + if new_var_name not in self.variables.keys(): + self.variables[new_var_name] = deepcopy(new_var_data) + new_vars_added.append(new_var_name) + + if new: + del aux_nessy + + return new_vars_added + + def __get_global_attributes(self, create_nes=False): + """ + Read the netcdf global attributes. + + Parameters + ---------- + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + + Returns + ------- + gl_attrs : dict + Dictionary with the netCDF global attributes. + """ + + gl_attrs = {} + + if not create_nes: + for attrname in self.dataset.ncattrs(): + gl_attrs[attrname] = getattr(self.dataset, attrname) + + return gl_attrs + + # ================================================================================================================== + # Writing + # ================================================================================================================== + + def _get_write_axis_limits(self): + """ + Calculate the 4D writing axis limits depending on if them have to balanced or not. + + Returns + ------- + dict + Dictionary with the 4D limits of the rank data to write. + t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max. + """ + + if self.balanced: + return self._get_write_axis_limits_balanced() + else: + return self._get_write_axis_limits_unbalanced() + + def _get_write_axis_limits_unbalanced(self): + """ + Calculate the 4D writing axis limits. + + Returns + ------- + dict + Dictionary with the 4D limits of the rank data to write. + t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max. + """ + + axis_limits = {"x_min": None, "x_max": None, + "y_min": None, "y_max": None, + "z_min": None, "z_max": None, + "t_min": None, "t_max": None} + my_shape = self.get_full_shape() + if self.parallel_method == "Y": + y_len = my_shape[0] + axis_limits["y_min"] = (y_len // self.size) * self.rank + if self.rank + 1 < self.size: + axis_limits["y_max"] = (y_len // self.size) * (self.rank + 1) + elif self.parallel_method == "X": + x_len = my_shape[-1] + axis_limits["x_min"] = (x_len // self.size) * self.rank + if self.rank + 1 < self.size: + axis_limits["x_max"] = (x_len // self.size) * (self.rank + 1) + elif self.parallel_method == "T": + t_len = len(self.get_full_times()) + axis_limits["t_min"] = ((t_len // self.size) * self.rank) + if self.rank + 1 < self.size: + axis_limits["t_max"] = (t_len // self.size) * (self.rank + 1) + else: + raise NotImplementedError("Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( + meth=self.parallel_method, accept=["X", "Y", "T"])) + + return axis_limits + + def _get_write_axis_limits_balanced(self): + """ + Calculate the 4D reading balanced axis limits. + + Returns + ------- + dict + Dictionary with the 4D limits of the rank data to read. + t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max. + """ + my_shape = self.get_full_shape() + fid_dist = {} + if self.parallel_method == "Y": + len_to_split = my_shape[0] + min_axis = "y_min" + max_axis = "y_max" + elif self.parallel_method == "X": + len_to_split = my_shape[-1] + min_axis = "x_min" + max_axis = "x_max" + elif self.parallel_method == "T": + len_to_split = len(self.get_full_times()) + min_axis = "t_min" + max_axis = "t_max" + else: + raise NotImplementedError("Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( + meth=self.parallel_method, accept=["X", "Y", "T"])) + + procs_len = len_to_split // self.size + procs_rows_extended = len_to_split - (procs_len * self.size) + + rows_sum = 0 + for proc in range(self.size): + fid_dist[proc] = {"x_min": 0, "x_max": None, + "y_min": 0, "y_max": None, + "z_min": 0, "z_max": None, + "t_min": 0, "t_max": None} + if proc < procs_rows_extended: + aux_rows = procs_len + 1 + else: + aux_rows = procs_len + + len_to_split -= aux_rows + if len_to_split < 0: + rows = len_to_split + aux_rows + else: + rows = aux_rows + + fid_dist[proc][min_axis] = rows_sum + fid_dist[proc][max_axis] = rows_sum + rows + + # Last element + if len_to_split == 0: + fid_dist[proc][max_axis] = None + + rows_sum += rows + + axis_limits = fid_dist[self.rank] + + return axis_limits + + def _create_dimensions(self, netcdf): + """ + Create "time", "time_bnds", "lev", "lon" and "lat" dimensions. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python open dataset. + """ + + # Create time dimension + netcdf.createDimension("time", None) + + # Create time_nv (number of vertices) dimension + full_time_bnds = self.get_full_time_bnds() + if full_time_bnds is not None: + netcdf.createDimension("time_nv", 2) + + # Create lev, lon and lat dimensions + netcdf.createDimension("lev", len(self.lev["data"])) + + # Create string length dimension + if self.strlen is not None: + netcdf.createDimension("strlen", self.strlen) + + return None + + def _create_dimension_variables(self, netcdf): + """ + Create the "time", "time_bnds", "lev", "lat", "lat_bnds", "lon" and "lon_bnds" variables. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python open dataset. + """ + + self._create_dimension_variables_64(netcdf) + + return None + + def _create_dimension_variables_32(self, netcdf): + """ + Create the "time", "time_bnds", "lev", "lat", "lat_bnds", "lon" and "lon_bnds" variables. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python open dataset. + """ + + # TIMES + full_time = self.get_full_times() + full_time_bnds = self.get_full_time_bnds() + time_var = netcdf.createVariable("time", float32, ("time",), zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + time_var.units = "{0} since {1}".format(self._time_resolution, full_time[0].strftime("%Y-%m-%d %H:%M:%S")) + time_var.standard_name = "time" + time_var.calendar = "standard" + time_var.long_name = "time" + if full_time_bnds is not None: + if self._climatology: + time_var.climatology = self._climatology_var_name + else: + time_var.bounds = "time_bnds" + if self.size > 1: + time_var.set_collective(True) + time_var[:] = date2num(full_time[:], time_var.units, time_var.calendar) + + # TIME BOUNDS + if full_time_bnds is not None: + if self._climatology: + time_bnds_var = netcdf.createVariable(self._climatology_var_name, float64, ("time", "time_nv",), + zlib=self.zip_lvl, complevel=self.zip_lvl) + else: + time_bnds_var = netcdf.createVariable("time_bnds", float64, ("time", "time_nv",), + zlib=self.zip_lvl, complevel=self.zip_lvl) + if self.size > 1: + time_bnds_var.set_collective(True) + time_bnds_var[:] = date2num(full_time_bnds, time_var.units, calendar="standard") + + # LEVELS + full_lev = self.get_full_levels() + lev = netcdf.createVariable("lev", float32, ("lev",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + if "units" in full_lev.keys(): + lev.units = full_lev["units"] + else: + lev.units = "" + if "positive" in full_lev.keys(): + lev.positive = full_lev["positive"] + + if self.size > 1: + lev.set_collective(True) + lev[:] = array(full_lev["data"], dtype=float32) + + # LATITUDES + full_lat = self.get_full_latitudes() + full_lat_bnds = self.get_full_latitudes_boundaries() + lat = netcdf.createVariable("lat", float32, self._lat_dim, + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + lat.units = "degrees_north" + lat.axis = "Y" + lat.long_name = "latitude coordinate" + lat.standard_name = "latitude" + if full_lat_bnds is not None: + lat.bounds = "lat_bnds" + if self.size > 1: + lat.set_collective(True) + lat[:] = array(full_lat["data"], dtype=float32) + + # LATITUDES BOUNDS + if full_lat_bnds is not None: + lat_bnds_var = netcdf.createVariable("lat_bnds", float32, + self._lat_dim + ("spatial_nv",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + if self.size > 1: + lat_bnds_var.set_collective(True) + lat_bnds_var[:] = array(full_lat_bnds["data"], dtype=float32) + + # LONGITUDES + full_lon = self.get_full_longitudes() + full_lon_bnds = self.get_full_longitudes_boundaries() + lon = netcdf.createVariable("lon", float32, self._lon_dim, + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + lon.units = "degrees_east" + lon.axis = "X" + lon.long_name = "longitude coordinate" + lon.standard_name = "longitude" + if full_lon_bnds is not None: + lon.bounds = "lon_bnds" + if self.size > 1: + lon.set_collective(True) + lon[:] = array(full_lon["data"], dtype=float32) + + # LONGITUDES BOUNDS + if full_lon_bnds is not None: + lon_bnds_var = netcdf.createVariable("lon_bnds", float32, + self._lon_dim + ("spatial_nv",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + if self.size > 1: + lon_bnds_var.set_collective(True) + lon_bnds_var[:] = array(full_lon_bnds["data"], dtype=float32) + + return None + + def _create_dimension_variables_64(self, netcdf): + """ + Create the "time", "time_bnds", "lev", "lat", "lat_bnds", "lon" and "lon_bnds" variables. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python open dataset. + """ + + # TIMES + full_time = self.get_full_times() + full_time_bnds = self.get_full_time_bnds() + time_var = netcdf.createVariable("time", float64, ("time",), zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + time_var.units = "{0} since {1}".format(self._time_resolution, full_time[0].strftime("%Y-%m-%d %H:%M:%S")) + time_var.standard_name = "time" + time_var.calendar = "standard" + time_var.long_name = "time" + if full_time_bnds is not None: + if self._climatology: + time_var.climatology = self._climatology_var_name + else: + time_var.bounds = "time_bnds" + if self.size > 1: + time_var.set_collective(True) + time_var[:] = date2num(full_time[:], time_var.units, time_var.calendar) + + # TIME BOUNDS + if full_time_bnds is not None: + if self._climatology: + time_bnds_var = netcdf.createVariable(self._climatology_var_name, float64, ("time", "time_nv",), + zlib=self.zip_lvl, complevel=self.zip_lvl) + else: + time_bnds_var = netcdf.createVariable("time_bnds", float64, ("time", "time_nv",), + zlib=self.zip_lvl, complevel=self.zip_lvl) + if self.size > 1: + time_bnds_var.set_collective(True) + time_bnds_var[:] = date2num(full_time_bnds, time_var.units, calendar="standard") + + # LEVELS + full_lev = self.get_full_levels() + lev = netcdf.createVariable("lev", full_lev["data"].dtype, ("lev",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + if "units" in full_lev.keys(): + lev.units = full_lev["units"] + else: + lev.units = "" + if "positive" in full_lev.keys(): + lev.positive = full_lev["positive"] + + if self.size > 1: + lev.set_collective(True) + lev[:] = full_lev["data"] + + # LATITUDES + full_lat = self.get_full_latitudes() + full_lat_bnds = self.get_full_latitudes_boundaries() + lat = netcdf.createVariable("lat", full_lat["data"].dtype, self._lat_dim, + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + lat.units = "degrees_north" + lat.axis = "Y" + lat.long_name = "latitude coordinate" + lat.standard_name = "latitude" + if full_lat_bnds is not None: + lat.bounds = "lat_bnds" + if self.size > 1: + lat.set_collective(True) + lat[:] = full_lat["data"] + + # LATITUDES BOUNDS + if full_lat_bnds is not None: + lat_bnds_var = netcdf.createVariable("lat_bnds", full_lat_bnds["data"].dtype, + self._lat_dim + ("spatial_nv",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + if self.size > 1: + lat_bnds_var.set_collective(True) + lat_bnds_var[:] = full_lat_bnds["data"] + + # LONGITUDES + full_lon = self.get_full_longitudes() + full_lon_bnds = self.get_full_longitudes_boundaries() + lon = netcdf.createVariable("lon", full_lon["data"].dtype, self._lon_dim, + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + lon.units = "degrees_east" + lon.axis = "X" + lon.long_name = "longitude coordinate" + lon.standard_name = "longitude" + if full_lon_bnds is not None: + lon.bounds = "lon_bnds" + if self.size > 1: + lon.set_collective(True) + lon[:] = full_lon["data"] + + # LONGITUDES BOUNDS + if full_lon_bnds is not None: + lon_bnds_var = netcdf.createVariable("lon_bnds", full_lon_bnds["data"].dtype, + self._lon_dim + ("spatial_nv",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + if self.size > 1: + lon_bnds_var.set_collective(True) + lon_bnds_var[:] = full_lon_bnds["data"] + + return None + + def _create_cell_measures(self, netcdf): + + # CELL AREA + if "cell_area" in self.cell_measures.keys(): + cell_area = netcdf.createVariable("cell_area", self.cell_measures["cell_area"]["data"].dtype, self._var_dim, + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + if self.size > 1: + cell_area.set_collective(True) + cell_area[self.read_axis_limits["y_min"]:self.read_axis_limits["y_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] = \ + self.cell_measures["cell_area"]["data"] + + cell_area.long_name = "area of grid cell" + cell_area.standard_name = "cell_area" + cell_area.units = "m2" + + for var_name in self.variables.keys(): + self.variables[var_name]["cell_measures"] = "area: cell_area" + + if self.info: + print("Rank {0:03d}: Cell measures done".format(self.rank)) + return None + + def _str2char(self, data): + + if self.strlen is None: + msg = "String data could not be converted into chars while writing." + msg += " Please, set the maximum string length (set_strlen) before writing." + raise RuntimeError(msg) + + # Get final shape by adding strlen at the end + data_new_shape = data.shape + (self.strlen, ) + + # nD (2D, 3D, 4D) data as 1D string array + data = data.flatten() + + # Split strings into chars (S1) + data_aux = stringtochar(array([v.encode("ascii", "ignore") for v in data]).astype("S" + str(self.strlen))) + data_aux = data_aux.reshape(data_new_shape) + + return data_aux + + def _create_variables(self, netcdf, chunking=False): + """ + Create the netCDF file variables. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python open dataset. + chunking : bool + Indicates if you want to chunk the output netCDF. + """ + + for i, (var_name, var_dict) in enumerate(self.variables.items()): + if isinstance(var_dict["data"], int) and var_dict["data"] == 0: + var_dims = ("time", "lev",) + self._var_dim + var_dtype = float32 + else: + # Get dimensions + if (var_dict["data"] is None) or (len(var_dict["data"].shape) == 4): + var_dims = ("time", "lev",) + self._var_dim + else: + var_dims = self._var_dim + + # Get data type + if "dtype" in var_dict.keys(): + var_dtype = var_dict["dtype"] + if (var_dict["data"] is not None) and (var_dtype != var_dict["data"].dtype): + msg = "WARNING!!! " + msg += "Different data types for variable {0}. ".format(var_name) + msg += "Input dtype={0}. Data dtype={1}.".format(var_dtype, var_dict["data"].dtype) + warn(msg) + sys.stderr.flush() + try: + var_dict["data"] = var_dict["data"].astype(var_dtype) + except Exception as e: # TODO: Detect exception + print(e) + raise TypeError("It was not possible to cast the data to the input dtype.") + else: + var_dtype = var_dict["data"].dtype + if var_dtype is object: + raise TypeError("Data dtype is object. Define dtype explicitly as dictionary key 'dtype'") + + if var_dict["data"] is not None: + + # Ensure data is of type numpy array (to create NES) + if not isinstance(var_dict["data"], (ndarray, generic)): + try: + var_dict["data"] = array(var_dict["data"]) + except AttributeError: + raise AttributeError("Data for variable {0} must be a numpy array.".format(var_name)) + + # Convert list of strings to chars for parallelization + if issubdtype(var_dtype, character): + var_dict["data_aux"] = self._str2char(var_dict["data"]) + var_dims += ("strlen",) + var_dtype = "S1" + + if self.info: + print("Rank {0:03d}: Writing {1} var ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + + if not chunking: + var = netcdf.createVariable(var_name, var_dtype, var_dims, + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + else: + if self.balanced: + raise NotImplementedError("A balanced data cannot be chunked.") + if self.master: + chunk_size = var_dict["data"].shape + else: + chunk_size = None + chunk_size = self.comm.bcast(chunk_size, root=0) + var = netcdf.createVariable(var_name, var_dtype, var_dims, + zlib=self.zip_lvl > 0, complevel=self.zip_lvl, + chunksizes=chunk_size) + if self.info: + print("Rank {0:03d}: Var {1} created ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + if self.size > 1: + var.set_collective(True) + if self.info: + print("Rank {0:03d}: Var {1} collective ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + + for att_name, att_value in var_dict.items(): + if att_name == "data": + if att_value is not None: + if self.info: + print("Rank {0:03d}: Filling {1}".format(self.rank, var_name)) + if "data_aux" in var_dict.keys(): + att_value = var_dict["data_aux"] + if isinstance(att_value, int) and att_value == 0: + var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = 0 + + elif len(att_value.shape) == 5: + if "strlen" in var_dims: + var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + :] = att_value + else: + raise NotImplementedError("It is not possible to write 5D variables.") + + elif len(att_value.shape) == 4: + var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value + + elif len(att_value.shape) == 3: + if "strlen" in var_dims: + var[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + :] = att_value + else: + raise NotImplementedError("It is not possible to write 3D variables.") + + if self.info: + print("Rank {0:03d}: Var {1} data ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + + elif att_name not in ["chunk_size", "var_dims", "dimensions", "dtype", "data_aux"]: + var.setncattr(att_name, att_value) + + if "data_aux" in var_dict.keys(): + del var_dict["data_aux"] + + self._set_var_crs(var) + if self.info: + print("Rank {0:03d}: Var {1} completed ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + + return None + + def append_time_step_data(self, i_time, out_format="DEFAULT"): + """ + Fill the netCDF data for the indicated index time. + + Parameters + ---------- + i_time : int + index of the time step to write + out_format : str + Indicates the output format type to change the units (if needed) + """ + if self.serial_nc is not None: + try: + data = self._gather_data(self.variables) + except KeyError: + # Key Error means string data + data = self.__gather_data_py_object(self.variables) + if self.master: + self.serial_nc.variables = data + self.serial_nc.append_time_step_data(i_time, out_format=out_format) + self.comm.Barrier() + else: + if out_format == "MONARCH": + self.variables = to_monarch_units(self) + elif out_format == "CMAQ": + self.variables = to_cmaq_units(self) + elif out_format == "WRF_CHEM": + self.variables = to_wrf_chem_units(self) + for i, (var_name, var_dict) in enumerate(self.variables.items()): + for att_name, att_value in var_dict.items(): + if att_name == "data": + + if att_value is not None: + if self.info: + print("Rank {0:03d}: Filling {1}".format(self.rank, var_name)) + var = self.dataset.variables[var_name] + if isinstance(att_value, int) and att_value == 0: + var[i_time, + self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = 0 + elif len(att_value.shape) == 4: + var[i_time, + self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value + + elif len(att_value.shape) == 3: + raise NotImplementedError("It is not possible to write 3D variables.") + else: + raise NotImplementedError("SHAPE APPEND ERROR: {0}".format(att_value.shape)) + if self.info: + print("Rank {0:03d}: Var {1} data ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + else: + raise ValueError("Cannot append None Data for {0}".format(var_name)) + else: + # Metadata already writen + pass + + return None + + def _create_centre_coordinates(self, **kwargs): + """ + Calculate centre latitudes and longitudes from grid details. + + Must be implemented on inner classes + + Returns + ---------- + centre_lat : dict + Dictionary with data of centre latitudes in 1D + centre_lon : dict + Dictionary with data of centre longitudes in 1D + """ + + return None + + def _create_metadata(self, netcdf): + """ + Must be implemented on inner class. + """ + + return None + + @staticmethod + def _set_var_crs(var): + """ + Must be implemented on inner class. + + Parameters + ---------- + var : Variable + netCDF4-python variable object. + """ + + return None + + def __to_netcdf_py(self, path, chunking=False, keep_open=False): + """ + Create the NetCDF using netcdf4-python methods. + + Parameters + ---------- + path : str + Path to the output netCDF file. + chunking: bool + Indicates if you want to chunk the output netCDF. + keep_open : bool + Indicates if you want to keep open the NetCDH to fill the data by time-step + """ + + # Open NetCDF + if self.info: + print("Rank {0:03d}: Creating {1}".format(self.rank, path)) + if self.size > 1: + netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=True, comm=self.comm, info=MPI.Info()) + else: + netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=False) + if self.info: + print("Rank {0:03d}: NetCDF ready to write".format(self.rank)) + + # Create dimensions + self._create_dimensions(netcdf) + + # Create dimension variables + self._create_dimension_variables(netcdf) + if self.info: + print("Rank {0:03d}: Dimensions done".format(self.rank)) + + # Create cell measures + self._create_cell_measures(netcdf) + + # Create variables + self._create_variables(netcdf, chunking=chunking) + + # Create metadata + self._create_metadata(netcdf) + + # Close NetCDF + if self.global_attrs is not None: + for att_name, att_value in self.global_attrs.items(): + netcdf.setncattr(att_name, att_value) + netcdf.setncattr("Conventions", "CF-1.7") + + if keep_open: + self.dataset = netcdf + else: + netcdf.close() + + return None + + def __to_netcdf_cams_ra(self, path): + return to_netcdf_cams_ra(self, path) + + def to_netcdf(self, path, compression_level=0, serial=False, info=False, chunking=False, nc_type="NES", + keep_open=False): + """ + Write the netCDF output file. + + Parameters + ---------- + path : str + Path to the output netCDF file. + compression_level : int + Level of compression (0 to 9) Default: 0 (no compression). + serial : bool + Indicates if you want to write in serial or not. Default: False. + info : bool + Indicates if you want to print the information of each writing step by stdout Default: False. + chunking : bool + Indicates if you want a chunked netCDF output. Only available with non-serial writes. Default: False. + nc_type : str + Type to NetCDf to write. "CAMS_RA" or "NES" + keep_open : bool + Indicates if you want to keep open the NetCDH to fill the data by time-step + """ + nc_type = nc_type + old_info = self.info + self.info = info + self.serial_nc = None + self.zip_lvl = compression_level + + # if serial: + if serial and self.size > 1: + try: + data = self._gather_data(self.variables) + except KeyError: + data = self.__gather_data_py_object(self.variables) + try: + c_measures = self._gather_data(self.cell_measures) + except KeyError: + c_measures = self.__gather_data_py_object(self.cell_measures) + if self.master: + new_nc = self.copy(copy_vars=False) + new_nc.set_communicator(MPI.COMM_SELF) + new_nc.variables = data + new_nc.cell_measures = c_measures + if nc_type in ["NES", "DEFAULT"]: + new_nc.__to_netcdf_py(path, keep_open=keep_open) + elif nc_type == "CAMS_RA": + new_nc.__to_netcdf_cams_ra(path) + elif nc_type == "MONARCH": + to_netcdf_monarch(new_nc, path, chunking=chunking, keep_open=keep_open) + elif nc_type == "CMAQ": + to_netcdf_cmaq(new_nc, path, keep_open=keep_open) + elif nc_type == "WRF_CHEM": + to_netcdf_wrf_chem(new_nc, path, keep_open=keep_open) + else: + msg = f"Unknown NetCDF type '{nc_type}'. " + msg += "Use CAMS_RA, MONARCH or NES (or DEFAULT)" + raise ValueError(msg) + self.serial_nc = new_nc + else: + self.serial_nc = True + else: + if nc_type in ["NES", "DEFAULT"]: + self.__to_netcdf_py(path, chunking=chunking, keep_open=keep_open) + elif nc_type == "CAMS_RA": + self.__to_netcdf_cams_ra(path) + elif nc_type == "MONARCH": + to_netcdf_monarch(self, path, chunking=chunking, keep_open=keep_open) + elif nc_type == "CMAQ": + to_netcdf_cmaq(self, path, keep_open=keep_open) + elif nc_type == "WRF_CHEM": + to_netcdf_wrf_chem(self, path, keep_open=keep_open) + else: + msg = f"Unknown NetCDF type '{nc_type}''. " + msg += "Use CAMS_RA, MONARCH or NES (or DEFAULT)" + raise ValueError(msg) + + self.info = old_info + + return None + + def __to_grib2(self, path, grib_keys, grib_template_path, lat_flip=True, info=False): + """ + Private method to write output file with grib2 format. + + Parameters + ---------- + path : str + Path to the output file. + grib_keys : dict + Dictionary with the grib2 keys. + grib_template_path : str + Path to the grib2 file to use as template. + info : bool + Indicates if you want to print extra information during the process. + """ + + from eccodes import codes_grib_new_from_file + from eccodes import codes_keys_iterator_new + from eccodes import codes_keys_iterator_next + from eccodes import codes_keys_iterator_get_name + from eccodes import codes_get_string + from eccodes import codes_keys_iterator_delete + from eccodes import codes_clone + from eccodes import codes_set + from eccodes import codes_set_values + from eccodes import codes_write + from eccodes import codes_release + + fout = open(path, "wb") + + # read template + fin = open(grib_template_path, "rb") + + gid = codes_grib_new_from_file(fin) + if gid is None: + sys.exit(1) + + iterid = codes_keys_iterator_new(gid, "ls") + while codes_keys_iterator_next(iterid): + keyname = codes_keys_iterator_get_name(iterid) + keyval = codes_get_string(gid, keyname) + if info: + print("%s = %s" % (keyname, keyval)) + + codes_keys_iterator_delete(iterid) + for var_name, var_info in self.variables.items(): + for i_time, time in enumerate(self.time): + for i_lev, lev in enumerate(self.lev["data"]): + clone_id = codes_clone(gid) + + # Adding grib2 keys to file + for key, value in grib_keys.items(): + if value not in ["", "None", None, nan]: + try: + codes_set(clone_id, key, value) + except Exception as e: + print(f"Something went wrong while writing the Grib key '{key}': {value}") + raise e + + # Time dependent keys + if "dataTime" in grib_keys.keys() and grib_keys["dataTime"] in ["", "None", None, nan]: + codes_set(clone_id, "dataTime", int(i_time * 100)) + if "stepRange" in grib_keys.keys() and grib_keys["stepRange"] in ["", "None", None, nan]: + n_secs = (time - self.get_full_times()[0]).total_seconds() + codes_set(clone_id, "stepRange", int(n_secs // 3600)) + if "forecastTime" in grib_keys.keys() and grib_keys["forecastTime"] in ["", "None", None, nan]: + n_secs = (time - self.get_full_times()[0]).total_seconds() + codes_set(clone_id, "forecastTime", int(n_secs)) + + # Level dependent keys + if "typeOfFirstFixedSurface" in grib_keys.keys() and \ + grib_keys["typeOfFirstFixedSurface"] in ["", "None", None, nan]: + if float(lev) == 0: + codes_set(clone_id, "typeOfFirstFixedSurface", 1) + # grib_keys["typeOfFirstFixedSurface"] = 1 + else: + codes_set(clone_id, "typeOfFirstFixedSurface", 103) + # grib_keys["typeOfFirstFixedSurface"] = 103 + if "level" in grib_keys.keys() and grib_keys["level"] in ["", "None", None, nan]: + codes_set(clone_id, "level", float(lev)) + + newval = var_info["data"][i_time, i_lev, :, :] + if lat_flip: + newval = flipud(newval) + + # TODO Check default NaN Value + newval[isnan(newval)] = 0. + + codes_set_values(clone_id, array(newval.ravel(), dtype="float64")) + codes_write(clone_id, fout) + del newval + codes_release(gid) + fout.close() + fin.close() + + return None + + def to_grib2(self, path, grib_keys, grib_template_path, lat_flip=True, info=False): + """ + Write output file with grib2 format. + + Parameters + ---------- + path : str + Path to the output file. + grib_keys : dict + Dictionary with the grib2 keys. + grib_template_path : str + Path to the grib2 file to use as template. + lat_flip : bool + Indicates if the latitude values (and data) has to be flipped + info : bool + Indicates if you want to print extra information during the process. + """ + + # if serial: + if self.parallel_method in ["X", "Y"] and self.size > 1: + try: + data = self._gather_data(self.variables) + except KeyError: + data = self.__gather_data_py_object(self.variables) + try: + c_measures = self._gather_data(self.cell_measures) + except KeyError: + c_measures = self.__gather_data_py_object(self.cell_measures) + if self.master: + new_nc = self.copy(copy_vars=False) + new_nc.set_communicator(MPI.COMM_SELF) + new_nc.variables = data + new_nc.cell_measures = c_measures + new_nc.__to_grib2(path, grib_keys, grib_template_path, lat_flip=lat_flip, info=info) + else: + self.__to_grib2(path, grib_keys, grib_template_path, lat_flip=lat_flip, info=info) + + return None + + def create_shapefile(self): + """ + Create spatial GeoDataFrame (shapefile). + + Returns + ------- + shapefile : GeoPandasDataFrame + Shapefile dataframe. + """ + + if self.shapefile is None: + + if self.lat_bnds is None or self.lon_bnds is None: + self.create_spatial_bounds() + + # Reshape arrays to create geometry + aux_shape = (self.lat_bnds["data"].shape[0], self.lon_bnds["data"].shape[0], 4) + lon_bnds_aux = empty(aux_shape) + lon_bnds_aux[:, :, 0] = self.lon_bnds["data"][newaxis, :, 0] + lon_bnds_aux[:, :, 1] = self.lon_bnds["data"][newaxis, :, 1] + lon_bnds_aux[:, :, 2] = self.lon_bnds["data"][newaxis, :, 1] + lon_bnds_aux[:, :, 3] = self.lon_bnds["data"][newaxis, :, 0] + + lon_bnds = lon_bnds_aux + del lon_bnds_aux + + lat_bnds_aux = empty(aux_shape) + lat_bnds_aux[:, :, 0] = self.lat_bnds["data"][:, newaxis, 0] + lat_bnds_aux[:, :, 1] = self.lat_bnds["data"][:, newaxis, 0] + lat_bnds_aux[:, :, 2] = self.lat_bnds["data"][:, newaxis, 1] + lat_bnds_aux[:, :, 3] = self.lat_bnds["data"][:, newaxis, 1] + + lat_bnds = lat_bnds_aux + del lat_bnds_aux + + aux_b_lats = lat_bnds.reshape((lat_bnds.shape[0] * lat_bnds.shape[1], lat_bnds.shape[2])) + aux_b_lons = lon_bnds.reshape((lon_bnds.shape[0] * lon_bnds.shape[1], lon_bnds.shape[2])) + + # Create dataframe cointaining all polygons + geometry = [] + for i in range(aux_b_lons.shape[0]): + geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), + (aux_b_lons[i, 1], aux_b_lats[i, 1]), + (aux_b_lons[i, 2], aux_b_lats[i, 2]), + (aux_b_lons[i, 3], aux_b_lats[i, 3]), + (aux_b_lons[i, 0], aux_b_lats[i, 0])])) + + fids = self.get_fids() + gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=geometry, crs="EPSG:4326") + self.shapefile = gdf + + else: + gdf = self.shapefile + + return gdf + + def write_shapefile(self, path): + """ + Save spatial GeoDataFrame (shapefile). + + Parameters + ---------- + path : str + Path to the output file. + """ + + if self.shapefile is None: + raise ValueError("Shapefile was not created.") + + if self.size == 1: + # In serial, avoid gather + self.shapefile.to_file(path) + else: + # In parallel + data = self.comm.gather(self.shapefile, root=0) + if self.master: + data = concat(data) + data.to_file(path) + + return None + + def to_shapefile(self, path, time=None, lev=None, var_list=None, info=True): + """ + Create shapefile from NES data. + + 1. Create grid shapefile. + 2. Add variables to shapefile (as independent function). + 3. Write shapefile. + + Parameters + ---------- + path : str + Path to the output file. + time : datetime + Time stamp to select. + lev : int + Vertical level to select. + var_list : List, str, None + List (or single string) of the variables to be loaded and saved in the shapefile. + info: bool + Flag to allow/suppress warnings when the 'time' or 'lev' parameters are None. Default is True. + """ + + # If list is not defined, get all variables + if var_list is None: + var_list = list(self.variables.keys()) + else: + if isinstance(var_list, str): + var_list = [var_list] + + # Add warning for unloaded variables + unloaded_vars = [] + for var_name in var_list: + if self.variables[var_name]["data"] is None: + unloaded_vars.append(var_name) + if len(unloaded_vars) > 0: + raise ValueError("The variables {0} need to be loaded/created before using to_shapefile.".format( + unloaded_vars)) + + # Select first vertical level (if needed) + if lev is None: + if info: + msg = "No vertical level has been specified. The first one will be selected." + warn(msg) + sys.stderr.flush() + idx_lev = 0 + else: + if lev not in self.lev["data"]: + raise ValueError("Level {} is not available. Choose from {}".format(lev, self.lev["data"])) + idx_lev = lev + + # Select first time (if needed) + if time is None: + if info: + msg = "No time has been specified. The first one will be selected." + warn(msg) + sys.stderr.flush() + idx_time = 0 + else: + if time not in self.time: + raise ValueError("Time {} is not available. Choose from {}".format(time, self.time)) + idx_time = self.time.index(time) + + # Create shapefile + self.create_shapefile() + + # Load variables from original file and get data for selected time / level + self.add_variables_to_shapefile(var_list, idx_lev, idx_time) + + # Write shapefile + self.write_shapefile(path) + + return None + + def add_variables_to_shapefile(self, var_list, idx_lev=0, idx_time=0): + """ + Add variables data to shapefile. + + var_list : List or str + Variables to be loaded and saved in the shapefile. + idx_lev : int + Index of vertical level for which the data will be saved in the shapefile. + idx_time : int + Index of time for which the data will be saved in the shapefile. + """ + + for var_name in var_list: + self.shapefile[var_name] = self.variables[var_name]["data"][idx_time, idx_lev, :].ravel() + + return None + + def get_centroids_from_coordinates(self): + """ + Get centroids from geographical coordinates. + + Returns + ------- + centroids_gdf: GeoPandasDataFrame + Centroids dataframe. + """ + + # Get centroids from coordinates + centroids = [] + for lat_ind in range(0, len(self.lat["data"])): + for lon_ind in range(0, len(self.lon["data"])): + centroids.append(Point(self.lon["data"][lon_ind], + self.lat["data"][lat_ind])) + + # Create dataframe containing all points + fids = self.get_fids() + centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=centroids, crs="EPSG:4326") + + return centroids_gdf + + def __gather_data_py_object(self, data_to_gather): + """ + Gather all the variable data into the MPI rank 0 to perform a serial write. + + Returns + ------- + data_list: dict + Variables dictionary with all the data from all the ranks. + """ + + data_list = deepcopy(data_to_gather) + for var_name in data_list.keys(): + try: + # noinspection PyArgumentList + data_aux = self.comm.gather(data_list[var_name]["data"], root=0) + if self.rank == 0: + shp_len = len(data_list[var_name]["data"].shape) + add_dimension = False # to Add a dimension + if self.parallel_method == "Y": + if shp_len == 2: + # if is a 2D concatenate over first axis + axis = 0 + elif shp_len == 3: + # if is a 3D concatenate over second axis + axis = 1 + else: + # if is a 4D concatenate over third axis + axis = 2 + elif self.parallel_method == "X": + if shp_len == 2: + # if is a 2D concatenate over second axis + axis = 1 + elif shp_len == 3: + # if is a 3D concatenate over third axis + axis = 2 + else: + # if is a 4D concatenate over forth axis + axis = 3 + elif self.parallel_method == "T": + if shp_len == 2: + # if is a 2D add dimension + add_dimension = True + axis = None # Not used + elif shp_len == 3: + # if is a 3D concatenate over first axis + axis = 0 + else: + # if is a 4D concatenate over second axis + axis = 0 + else: + raise NotImplementedError( + "Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( + meth=self.parallel_method, accept=["X", "Y", "T"])) + if add_dimension: + data_list[var_name]["data"] = stack(data_aux) + else: + data_list[var_name]["data"] = concatenate(data_aux, axis=axis) + except Exception as e: + msg = f"**ERROR** an error has occurred while gathering the '{var_name}' variable.\n" + print(msg) + sys.stderr.write(msg) + print(e) + sys.stderr.write(str(e)) + sys.stderr.flush() + self.comm.Abort(1) + + return data_list + + def _gather_data(self, data_to_gather): + """ + Gather all the variable data into the MPI rank 0 to perform a serial write. + + Returns + ------- + data_to_gather: dict + Variables to gather. + """ + + data_list = deepcopy(data_to_gather) + for var_name in data_list.keys(): + if self.info and self.master: + print("Gathering {0}".format(var_name)) + if data_list[var_name]["data"] is None: + data_list[var_name]["data"] = None + elif isinstance(data_list[var_name]["data"], int) and data_list[var_name]["data"] == 0: + data_list[var_name]["data"] = 0 + else: + shp_len = len(data_list[var_name]["data"].shape) + # Collect local array sizes using the gather communication pattern + rank_shapes = array(self.comm.gather(data_list[var_name]["data"].shape, root=0)) + sendbuf = data_list[var_name]["data"].flatten() + sendcounts = array(self.comm.gather(len(sendbuf), root=0)) + if self.master: + recvbuf = empty(sum(sendcounts), dtype=type(sendbuf.max())) + else: + recvbuf = None + self.comm.Gatherv(sendbuf=sendbuf, recvbuf=(recvbuf, sendcounts), root=0) + if self.master: + recvbuf = split(recvbuf, cumsum(sendcounts)) + # TODO ask + # I don"t understand why it is giving one more split + if len(recvbuf) > len(sendcounts): + recvbuf = recvbuf[:-1] + for i, shape in enumerate(rank_shapes): + recvbuf[i] = recvbuf[i].reshape(shape) + add_dimension = False # to Add a dimension + if self.parallel_method == "Y": + if shp_len == 2: + # if is a 2D concatenate over first axis + axis = 0 + elif shp_len == 3: + # if is a 3D concatenate over second axis + axis = 1 + else: + # if is a 4D concatenate over third axis + axis = 2 + elif self.parallel_method == "X": + if shp_len == 2: + # if is a 2D concatenate over second axis + axis = 1 + elif shp_len == 3: + # if is a 3D concatenate over third axis + axis = 2 + else: + # if is a 4D concatenate over forth axis + axis = 3 + elif self.parallel_method == "T": + if shp_len == 2: + # if is a 2D add dimension + add_dimension = True + axis = None # Not used + elif shp_len == 3: + # if is a 3D concatenate over first axis + axis = 0 + else: + # if is a 4D concatenate over second axis + axis = 0 + else: + raise NotImplementedError( + "Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( + meth=self.parallel_method, accept=["X", "Y", "T"])) + if add_dimension: + data_list[var_name]["data"] = stack(recvbuf) + else: + data_list[var_name]["data"] = concatenate(recvbuf, axis=axis) + + return data_list + + # ================================================================================================================== + # Extra Methods + # ================================================================================================================== + @staticmethod + def lon_lat_to_cartesian_ecef(lon, lat): + """ + # Convert observational/model geographic longitude/latitude coordinates to cartesian ECEF (Earth Centred, + # Earth Fixed) coordinates, assuming WGS84 datum and ellipsoid, and that all heights = 0. + # ECEF coordinates represent positions (in meters) as X, Y, Z coordinates, approximating the earth surface + # as an ellipsoid of revolution. + # This conversion is for the subsequent calculation of Euclidean distances of the model grid cell centres + # from each observational station. + # Defining the distance between two points on the earth's surface as simply the Euclidean distance + # between the two lat/lon pairs could lead to inaccurate results depending on the distance + # between two points (i.e. 1 deg. of longitude varies with latitude). + + Parameters + ---------- + lon : array + Longitude values. + lat : array + Latitude values. + """ + + lla = Proj(proj="latlong", ellps="WGS84", datum="WGS84") + ecef = Proj(proj="geocent", ellps="WGS84", datum="WGS84") + # x, y, z = pyproj.transform(lla, ecef, lon, lat, zeros(lon.shape), radians=False) + # Deprecated: https://pyproj4.github.io/pyproj/stable/gotchas.html#upgrading-to-pyproj-2-from-pyproj-1 + transformer = Transformer.from_proj(lla, ecef) + x, y, z = transformer.transform(lon, lat, zeros(lon.shape), radians=False) + return column_stack([x, y, z]) + + def add_4d_vertical_info(self, info_to_add): + """ + To add the vertical information from other source. + + Parameters + ---------- + info_to_add : nes.Nes, str + Nes object with the vertical information as variable or str with the path to the NetCDF file that contains + the vertical data. + """ + + return vertical_interpolation.add_4d_vertical_info(self, info_to_add) + + def interpolate_vertical(self, new_levels, new_src_vertical=None, kind="linear", extrapolate=None, info=None, + overwrite=False): + """ + Vertical interpolation function. + + Parameters + ---------- + self : Nes + Source Nes object. + new_levels : List + A List of new vertical levels. + new_src_vertical : nes.Nes, str + Nes object with the vertical information as variable or str with the path to the NetCDF file that contains + the vertical data. + kind : str + Vertical methods type. + extrapolate : bool or tuple or None or number or NaN + If bool: + - If True, both extrapolation options are set to "extrapolate". + - If False, extrapolation options are set to ("bottom", "top"). + If tuple: + - The first element represents the extrapolation option for the lower bound. + - The second element represents the extrapolation option for the upper bound. + - If any element is bool: + - If True, it represents "extrapolate". + - If False: + - If it"s the first element, it represents "bottom". + - If it"s the second element, it represents "top". + - If any element is None, it is replaced with numpy.nan. + - Other numeric values are kept as they are. + - If any element is NaN, it is kept as NaN. + If None: + - Both extrapolation options are set to (NaN, NaN). + If number: + - Both extrapolation options are set to the provided number. + If NaN: + - Both extrapolation options are set to NaN. + info: None, bool + Indicates if you want to print extra information. + overwrite: bool + Indicates if you want to compute the vertical interpolation in the same object or not. + """ + + return vertical_interpolation.interpolate_vertical( + self, new_levels, new_src_vertical=new_src_vertical, kind=kind, extrapolate_options=extrapolate, info=info, + overwrite=overwrite) + + def interpolate_horizontal(self, dst_grid, weight_matrix_path=None, kind="NearestNeighbour", n_neighbours=4, + info=False, to_providentia=False, only_create_wm=False, wm=None, flux=False): + """ + Horizontal methods from the current grid to another one. + + Parameters + ---------- + dst_grid : nes.Nes + Final projection Nes object. + weight_matrix_path : str, None + Path to the weight matrix to read/create. + kind : str + Kind of horizontal methods. choices = ["NearestNeighbour", "Conservative"]. + n_neighbours: int + Used if kind == NearestNeighbour. Number of nearest neighbours to interpolate. Default: 4. + info: bool + Indicates if you want to print extra info during the methods process. + to_providentia : bool + Indicates if we want the interpolated grid in Providentia format. + only_create_wm : bool + Indicates if you want to only create the Weight Matrix. + wm : Nes + Weight matrix Nes File. + flux : bool + Indicates if you want to calculate the weight matrix for flux variables. + """ + + return horizontal_interpolation.interpolate_horizontal( + self, dst_grid, weight_matrix_path=weight_matrix_path, kind=kind, n_neighbours=n_neighbours, info=info, + to_providentia=to_providentia, only_create_wm=only_create_wm, wm=wm, flux=flux) + + def spatial_join(self, ext_shp, method=None, var_list=None, info=False, apply_bbox=True): + """ + Compute overlay intersection of two GeoPandasDataFrames. + + Parameters + ---------- + ext_shp : GeoPandasDataFrame or str + File or path from where the data will be obtained on the intersection. + method : str + Overlay method. Accepted values: ["nearest", "intersection", "centroid"]. + var_list : List or None + Variables that will be included in the resulting shapefile. + info : bool + Indicates if you want to print the process info. + apply_bbox : bool + Indicates if you want to reduce the shapefile to a bbox. + """ + + return spatial_join(self, ext_shp=ext_shp, method=method, var_list=var_list, info=info, + apply_bbox=apply_bbox) + + def calculate_grid_area(self, overwrite=True): + """ + Get coordinate bounds and call function to calculate the area of each cell of a grid. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + overwrite : bool + Indicates if we want to overwrite the grid area. + """ + + if ("cell_area" not in self.cell_measures.keys()) or overwrite: + grid_area = cell_measures.calculate_grid_area(self) + grid_area = grid_area.reshape([self.lat["data"].shape[0], self.lon["data"].shape[-1]]) + self.cell_measures["cell_area"] = {"data": grid_area} + else: + grid_area = self.cell_measures["cell_area"]["data"] + + return grid_area + + @staticmethod + def calculate_geometry_area(geometry_list, earth_radius_minor_axis=6356752.3142, + earth_radius_major_axis=6378137.0): + """ + Get coordinate bounds and call function to calculate the area of each cell of a set of geometries. + + Parameters + ---------- + geometry_list : List + A List with polygon geometries. + earth_radius_minor_axis : float + Radius of the minor axis of the Earth. + earth_radius_major_axis : float + Radius of the major axis of the Earth. + """ + + return cell_measures.calculate_geometry_area(geometry_list, earth_radius_minor_axis=earth_radius_minor_axis, + earth_radius_major_axis=earth_radius_major_axis) + + @staticmethod + def get_earth_radius(ellps): + """ + Get minor and major axis of Earth. + + Parameters + ---------- + ellps : str + Spatial reference system. + """ + + # WGS84 with radius defined in Cartopy source code + earth_radius_dict = {"WGS84": [6356752.3142, 6378137.0]} + + return earth_radius_dict[ellps] + + def create_providentia_exp_centre_coordinates(self): + """ + Calculate centre latitudes and longitudes from original coordinates and store as 2D arrays. + + Returns + ---------- + model_centre_lat : dict + Dictionary with data of centre coordinates for latitude in 2D (latitude, longitude). + model_centre_lon : dict + Dictionary with data of centre coordinates for longitude in 2D (latitude, longitude). + """ + + raise NotImplementedError("create_providentia_exp_centre_coordinates function is not implemented by default") + + # noinspection DuplicatedCode + def create_providentia_exp_grid_edge_coordinates(self): + """ + Calculate grid edge latitudes and longitudes and get model grid outline. + + Returns + ---------- + grid_edge_lat : dict + Dictionary with data of grid edge latitudes. + grid_edge_lon : dict + Dictionary with data of grid edge longitudes. + """ + raise NotImplementedError("create_providentia_exp_grid_edge_coordinates function is not implemented by default") diff --git a/build/lib/nes/nc_projections/latlon_nes.py b/build/lib/nes/nc_projections/latlon_nes.py new file mode 100644 index 0000000000000000000000000000000000000000..35d68c8ee3e75b9b3483f953a882c021c2cb368f --- /dev/null +++ b/build/lib/nes/nc_projections/latlon_nes.py @@ -0,0 +1,395 @@ +#!/usr/bin/env python + +from numpy import float64, linspace, meshgrid, mean, diff, append, flip, repeat, concatenate, vstack +from pyproj import Proj +from .default_nes import Nes + + +class LatLonNes(Nes): + """ + + Attributes + ---------- + _var_dim : tuple + A Tuple with the name of the Y and X dimensions for the variables. + ("lat", "lon") for a regular latitude-longitude projection. + _lat_dim : tuple + A Tuple with the name of the dimensions of the Latitude values. + ("lat", ) for a regular latitude-longitude projection. + _lon_dim : tuple + A Tuple with the name of the dimensions of the Longitude values. + ("lon", ) for a regular latitude-longitude projection. + """ + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="Y", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the LatLonNes class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "Y". + Accepted values: ["X", "Y", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + """ + + super(LatLonNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, balanced=balanced, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=create_nes, + times=times, **kwargs) + + if create_nes: + # Dimensions screening + self.lat = self._get_coordinate_values(self.get_full_latitudes(), "Y") + self.lon = self._get_coordinate_values(self.get_full_longitudes(), "X") + + # Set axis limits for parallel writing + self.write_axis_limits = self._get_write_axis_limits() + + self._var_dim = ("lat", "lon") + self._lat_dim = ("lat",) + self._lon_dim = ("lon",) + + self.free_vars("crs") + + @staticmethod + def new(comm=None, path=None, info=False, dataset=None, parallel_method="Y", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the Nes class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "Y". + Accepted values: ["X", "Y", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + """ + + new = LatLonNes(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, + avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, + create_nes=create_nes, balanced=balanced, times=times, **kwargs) + + return new + + @staticmethod + def _get_pyproj_projection(): + """ + Get projection data as in Pyproj library. + + Returns + ---------- + projection : pyproj.Proj + Grid projection. + """ + + projection = Proj(proj="latlong", ellps="WGS84",) + + return projection + + # noinspection DuplicatedCode + def _get_projection_data(self, create_nes, **kwargs): + """ + Retrieves projection data based on grid details. + + Parameters + ---------- + create_nes : bool + Flag indicating whether to create new object (True) or use existing (False). + **kwargs : dict + Additional keyword arguments for specifying projection details. + + Returns + ------- + Dict[str, Any] + A dictionary containing projection data with the following keys: + - "grid_mapping_name" : str + Type of grid mapping (e.g., "latitude_longitude"). + - "semi_major_axis" : float + Semi-major axis of the Earth's ellipsoid. + - "inverse_flattening" : int + Inverse flattening parameter. + - "inc_lat" : float + Increment in latitude. + - "inc_lon" : float + Increment in longitude. + - "lat_orig" : float + Origin latitude of the grid. + - "lon_orig" : float + Origin longitude of the grid. + - "n_lat" : int + Number of grid points along latitude. + - "n_lon" : int + Number of grid points along longitude. + + Notes + ----- + Depending on the `create_nes` flag and input `kwargs`, the method constructs + or retrieves projection data. If `create_nes` is True, the method initializes + projection details based on provided arguments such as increments (`inc_lat`, `inc_lon`), + and if additional keyword arguments (`lat_orig`, `lon_orig`, `n_lat`, `n_lon`) are not provided, + defaults for the global domain are used. If `create_nes` is False, the method checks for + an existing "crs" variable in `self.variables` and retrieves its data, freeing the "crs" variable + afterward to optimize memory usage. + + """ + if create_nes: + projection_data = {"grid_mapping_name": "latitude_longitude", + "semi_major_axis": self.earth_radius[1], + "inverse_flattening": 0, + "inc_lat": kwargs["inc_lat"], + "inc_lon": kwargs["inc_lon"], + } + # Global domain + if len(kwargs) == 2: + projection_data["lat_orig"] = -90 + projection_data["lon_orig"] = -180 + projection_data["n_lat"] = int(180 // float64(projection_data["inc_lat"])) + projection_data["n_lon"] = int(360 // float64(projection_data["inc_lon"])) + # Other domains + else: + projection_data["lat_orig"] = kwargs["lat_orig"] + projection_data["lon_orig"] = kwargs["lon_orig"] + projection_data["n_lat"] = kwargs["n_lat"] + projection_data["n_lon"] = kwargs["n_lon"] + else: + if "crs" in self.variables.keys(): + projection_data = self.variables["crs"] + self.free_vars("crs") + else: + projection_data = {"grid_mapping_name": "latitude_longitude", + "semi_major_axis": self.earth_radius[1], + "inverse_flattening": 0, + } + + if "dtype" in projection_data.keys(): + del projection_data["dtype"] + + if "data" in projection_data.keys(): + del projection_data["data"] + + if "dimensions" in projection_data.keys(): + del projection_data["dimensions"] + + return projection_data + + def _create_dimensions(self, netcdf): + """ + Create "spatial_nv" dimensions and the super dimensions "lev", "time", "time_nv", "lon" and "lat". + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + super(LatLonNes, self)._create_dimensions(netcdf) + + netcdf.createDimension("lon", len(self.get_full_longitudes()["data"])) + netcdf.createDimension("lat", len(self.get_full_latitudes()["data"])) + + # Create spatial_nv (number of vertices) dimension + if (self.lat_bnds is not None) and (self.lon_bnds is not None): + netcdf.createDimension("spatial_nv", 2) + + return None + + def _create_centre_coordinates(self, **kwargs): + """ + Calculate centre latitudes and longitudes from grid details. + + Returns + ---------- + centre_lat : dict + Dictionary with data of centre latitudes in 1D + centre_lon : dict + Dictionary with data of centre longitudes in 1D + """ + + # Get grid resolution + inc_lat = float64(self.projection_data["inc_lat"]) + inc_lon = float64(self.projection_data["inc_lon"]) + + # Get coordinates origen + lat_orig = float64(self.projection_data["lat_orig"]) + lon_orig = float64(self.projection_data["lon_orig"]) + + # Get number of coordinates + n_lat = int(self.projection_data["n_lat"]) + n_lon = int(self.projection_data["n_lon"]) + + # Calculate centre latitudes + lat_c_orig = lat_orig + (inc_lat / 2) + centre_lat = linspace(lat_c_orig, lat_c_orig + (inc_lat * (n_lat - 1)), n_lat, dtype=float64) + + # Calculate centre longitudes + lon_c_orig = lon_orig + (inc_lon / 2) + centre_lon = linspace(lon_c_orig, lon_c_orig + (inc_lon * (n_lon - 1)), n_lon, dtype=float64) + + return {"data": centre_lat}, {"data": centre_lon} + + def create_providentia_exp_centre_coordinates(self): + """ + Calculate centre latitudes and longitudes from original coordinates and store as 2D arrays. + + Returns + ---------- + model_centre_lat : dict + Dictionary with data of centre coordinates for latitude in 2D (latitude, longitude). + model_centre_lon : dict + Dictionary with data of centre coordinates for longitude in 2D (latitude, longitude). + """ + + model_centre_lon_data, model_centre_lat_data = meshgrid(self.lon["data"], self.lat["data"]) + + # Calculate centre latitudes + model_centre_lat = {"data": model_centre_lat_data} + + # Calculate centre longitudes + model_centre_lon = {"data": model_centre_lon_data} + + return model_centre_lat, model_centre_lon + + # noinspection DuplicatedCode + def create_providentia_exp_grid_edge_coordinates(self): + """ + Calculate grid edge latitudes and longitudes and get model grid outline. + + Returns + ---------- + grid_edge_lat : dict + Dictionary with data of grid edge latitudes. + grid_edge_lon : dict + Dictionary with data of grid edge longitudes. + """ + + # Get grid resolution + inc_lon = abs(mean(diff(self.lon["data"]))) + inc_lat = abs(mean(diff(self.lat["data"]))) + + # Get bounds + lat_bounds = self._create_single_spatial_bounds(self.lat["data"], inc_lat) + lon_bounds = self._create_single_spatial_bounds(self.lon["data"], inc_lon) + + # Get latitudes for grid edge + left_edge_lat = append(lat_bounds.flatten()[::2], lat_bounds.flatten()[-1]) + right_edge_lat = flip(left_edge_lat, 0) + top_edge_lat = repeat(lat_bounds[-1][-1], len(self.lon["data"]) - 1) + bottom_edge_lat = repeat(lat_bounds[0][0], len(self.lon["data"])) + lat_grid_edge = concatenate((left_edge_lat, top_edge_lat, right_edge_lat, bottom_edge_lat)) + + # Get longitudes for grid edge + left_edge_lon = repeat(lon_bounds[0][0], len(self.lat["data"]) + 1) + top_edge_lon = lon_bounds.flatten()[1:-1:2] + right_edge_lon = repeat(lon_bounds[-1][-1], len(self.lat["data"]) + 1) + bottom_edge_lon = flip(lon_bounds.flatten()[:-1:2], 0) + lon_grid_edge = concatenate((left_edge_lon, top_edge_lon, right_edge_lon, bottom_edge_lon)) + + # Create grid outline by stacking the edges in both coordinates + model_grid_outline = vstack((lon_grid_edge, lat_grid_edge)).T + grid_edge_lat = {"data": model_grid_outline[:, 1]} + grid_edge_lon = {"data": model_grid_outline[:, 0]} + + return grid_edge_lat, grid_edge_lon + + @staticmethod + def _set_var_crs(var): + """ + Set the grid_mapping to "crs". + + Parameters + ---------- + var : Variable + netCDF4-python variable object. + """ + + var.grid_mapping = "crs" + var.coordinates = "lat lon" + + return None + + def _create_metadata(self, netcdf): + """ + Create the "crs" variable for the rotated latitude longitude grid_mapping. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python Dataset. + """ + + if self.projection_data is not None: + mapping = netcdf.createVariable("crs", "i") + mapping.grid_mapping_name = self.projection_data["grid_mapping_name"] + mapping.semi_major_axis = self.projection_data["semi_major_axis"] + mapping.inverse_flattening = self.projection_data["inverse_flattening"] + + return None + + def to_grib2(self, path, grib_keys, grib_template_path, lat_flip=False, info=False): + """ + Write output file with grib2 format. + + Parameters + ---------- + lat_flip : bool + Indicates if the latitudes have to be flipped + path : str + Path to the output file. + grib_keys : dict + Dictionary with the grib2 keys. + grib_template_path : str + Path to the grib2 file to use as template. + info : bool + Indicates if you want to print extra information during the process. + """ + + return super(LatLonNes, self).to_grib2(path, grib_keys, grib_template_path, lat_flip=lat_flip, info=info) diff --git a/build/lib/nes/nc_projections/lcc_nes.py b/build/lib/nes/nc_projections/lcc_nes.py new file mode 100644 index 0000000000000000000000000000000000000000..f9eda6e108d204c897b97eabb36e9d03993770af --- /dev/null +++ b/build/lib/nes/nc_projections/lcc_nes.py @@ -0,0 +1,630 @@ +#!/usr/bin/env python + +from numpy import float64, linspace, array, mean, diff, append, flip, repeat, concatenate, vstack +from geopandas import GeoDataFrame +from pandas import Index +from pyproj import Proj +from copy import deepcopy +from typing import Dict, Any +from shapely.geometry import Polygon, Point +from .default_nes import Nes + + +class LCCNes(Nes): + """ + + Attributes + ---------- + _full_y : dict + Y coordinates dictionary with the complete "data" key for all the values and the rest of the attributes. + _full_x : dict + X coordinates dictionary with the complete "data" key for all the values and the rest of the attributes. + y : dict + Y coordinates dictionary with the portion of "data" corresponding to the rank values. + x : dict + X coordinates dictionary with the portion of "data" corresponding to the rank values. + _var_dim : tuple + A Tuple with the name of the Y and X dimensions for the variables. + ("y", "x", ) for an LCC projection. + _lat_dim : tuple + A Tuple with the name of the dimensions of the Latitude values. + ("y", "x", ) for an LCC projection. + _lon_dim : tuple + ATuple with the name of the dimensions of the Longitude values. + ("y", "x") for an LCC projection. + """ + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="Y", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the LCCNes class + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "Y". + Accepted values: ["X", "Y", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + """ + self._full_y = None + self._full_x = None + + super(LCCNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, balanced=balanced, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=create_nes, + times=times, **kwargs) + + if create_nes: + # Dimensions screening + self.lat = self._get_coordinate_values(self.get_full_latitudes(), "Y") + self.lon = self._get_coordinate_values(self.get_full_longitudes(), "X") + else: + # Complete dimensions + self._full_y = self._get_coordinate_dimension("y") + self._full_x = self._get_coordinate_dimension("x") + + # Dimensions screening + self.y = self._get_coordinate_values(self.get_full_y(), "Y") + self.x = self._get_coordinate_values(self.get_full_x(), "X") + + # Set axis limits for parallel writing + self.write_axis_limits = self._get_write_axis_limits() + + self._var_dim = ("y", "x") + self._lat_dim = ("y", "x") + self._lon_dim = ("y", "x") + + self.free_vars("crs") + + @staticmethod + def new(comm=None, path=None, info=False, dataset=None, parallel_method="Y", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the Nes class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "Y". + Accepted values: ["X", "Y", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + """ + + new = LCCNes(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, + avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, + create_nes=create_nes, balanced=balanced, times=times, **kwargs) + + return new + + def get_full_y(self) -> Dict[str, Any]: + """ + Retrieve the complete Y information. + + Returns + ------- + Dict[str, Any] + A dictionary containing the complete latitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of latitude values. + attr_name: attr_value, # Latitude attributes. + ... + } + """ + data = self.comm.bcast(self._full_y) + + return data + + def get_full_x(self) -> Dict[str, Any]: + """ + Retrieve the complete X information. + + Returns + ------- + Dict[str, Any] + A dictionary containing the complete longitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of longitude values. + attr_name: attr_value, # Longitude attributes. + ... + } + """ + data = self.comm.bcast(self._full_x) + return data + + def set_full_y(self, data: Dict[str, Any]) -> None: + """ + Set the complete Y information. + + Parameters + ---------- + data : Dict[str, Any] + A dictionary containing the complete latitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of latitude values. + attr_name: attr_value, # Latitude attributes. + ... + } + """ + if self.master: + self._full_y = data + return None + + def set_full_x(self, data: Dict[str, Any]) -> None: + """ + Set the complete rotated longitude information. + + Parameters + ---------- + data : Dict[str, Any] + A dictionary containing the complete longitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of longitude values. + attr_name: attr_value, # Longitude attributes. + ... + } + """ + if self.master: + self._full_x = data + return None + + # noinspection DuplicatedCode + def _filter_coordinates_selection(self): + """ + Use the selection limits to filter y, x, time, lev, lat, lon, lon_bnds and lat_bnds. + """ + + idx = self._get_idx_intervals() + + self.y = self._get_coordinate_values(self.get_full_y(), "Y") + self.x = self._get_coordinate_values(self.get_full_x(), "X") + + self.set_full_y({'data': self.y["data"][idx["idx_y_min"]:idx["idx_y_max"]]}) + self.set_full_x({'data': self.x["data"][idx["idx_x_min"]:idx["idx_x_max"]]}) + + super(LCCNes, self)._filter_coordinates_selection() + + return None + + def _get_pyproj_projection(self): + """ + Get projection data as in Pyproj library. + + Returns + ---------- + projection : pyproj.Proj + Grid projection. + """ + + projection = Proj(proj="lcc", + ellps="WGS84", + R=self.earth_radius[0], + lat_1=float64(self.projection_data["standard_parallel"][0]), + lat_2=float64(self.projection_data["standard_parallel"][1]), + lon_0=float64(self.projection_data["longitude_of_central_meridian"]), + lat_0=float64(self.projection_data["latitude_of_projection_origin"]), + to_meter=1, + x_0=0, + y_0=0, + a=self.earth_radius[1], + k_0=1.0, + ) + + return projection + + def _get_projection_data(self, create_nes, **kwargs): + """ + Retrieves projection data based on grid details. + + Parameters + ---------- + create_nes : bool + Flag indicating whether to create new object (True) or use existing (False). + **kwargs : dict + Additional keyword arguments for specifying projection details. """ + if create_nes: + projection_data = {"grid_mapping_name": "lambert_conformal_conic", + "standard_parallel": [kwargs["lat_1"], kwargs["lat_2"]], + "longitude_of_central_meridian": kwargs["lon_0"], + "latitude_of_projection_origin": kwargs["lat_0"], + "x_0": kwargs["x_0"], "y_0": kwargs["y_0"], + "inc_x": kwargs["inc_x"], "inc_y": kwargs["inc_y"], + "nx": kwargs["nx"], "ny": kwargs["ny"], + } + else: + if "Lambert_Conformal" in self.variables.keys(): + projection_data = self.variables["Lambert_Conformal"] + self.free_vars("Lambert_Conformal") + elif "Lambert_conformal" in self.variables.keys(): + projection_data = self.variables["Lambert_conformal"] + self.free_vars("Lambert_conformal") + else: + # We will never have this condition since the LCC grid will never be correctly detected + # since the function __is_lcc in load_nes only detects LCC grids when there is Lambert_conformal + msg = "There is no variable called Lambert_Conformal, projection has not been defined." + raise RuntimeError(msg) + + if "dtype" in projection_data.keys(): + del projection_data["dtype"] + + if "data" in projection_data.keys(): + del projection_data["data"] + + if "dimensions" in projection_data.keys(): + del projection_data["dimensions"] + + if isinstance(projection_data["standard_parallel"], str): + projection_data["standard_parallel"] = [projection_data["standard_parallel"].split(", ")[0], + projection_data["standard_parallel"].split(", ")[1]] + + return projection_data + + # noinspection DuplicatedCode + def _create_dimensions(self, netcdf): + """ + Create "y", "x" and "spatial_nv" dimensions and the super dimensions "lev", "time", "time_nv", "lon" and "lat" + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + super(LCCNes, self)._create_dimensions(netcdf) + + # Create y and x dimensions + netcdf.createDimension("y", len(self.get_full_y()["data"])) + netcdf.createDimension("x", len(self.get_full_x()["data"])) + + # Create spatial_nv (number of vertices) dimension + if (self.lat_bnds is not None) and (self.lon_bnds is not None): + netcdf.createDimension("spatial_nv", 4) + + return None + + # noinspection DuplicatedCode + def _create_dimension_variables(self, netcdf): + """ + Create the "y" and "x" variables. + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + super(LCCNes, self)._create_dimension_variables(netcdf) + + # LCC Y COORDINATES + full_y = self.get_full_y() + y = netcdf.createVariable("y", full_y["data"].dtype, ("y",)) + y.long_name = "y coordinate of projection" + if "units" in full_y.keys(): + y.units = full_y["units"] + else: + y.units = "m" + y.standard_name = "projection_y_coordinate" + if self.size > 1: + y.set_collective(True) + y[:] = full_y["data"] + + # LCC X COORDINATES + full_x = self.get_full_x() + x = netcdf.createVariable("x", full_x["data"].dtype, ("x",)) + x.long_name = "x coordinate of projection" + if "units" in full_x.keys(): + x.units = full_x["units"] + else: + x.units = "m" + x.standard_name = "projection_x_coordinate" + if self.size > 1: + x.set_collective(True) + x[:] = full_x["data"] + + return None + + # noinspection DuplicatedCode + def _create_centre_coordinates(self, **kwargs): + """ + Calculate centre latitudes and longitudes from grid details. + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + if self.master: + # Get projection details on x + x_0 = float64(self.projection_data["x_0"]) + inc_x = float64(self.projection_data["inc_x"]) + nx = int(self.projection_data["nx"]) + + # Get projection details on y + y_0 = float64(self.projection_data["y_0"]) + inc_y = float64(self.projection_data["inc_y"]) + ny = int(self.projection_data["ny"]) + + # Create a regular grid in metres (1D) + self._full_x = {"data": linspace(x_0 + (inc_x / 2), x_0 + (inc_x / 2) + (inc_x * (nx - 1)), nx, + dtype=float64)} + self._full_y = {"data": linspace(y_0 + (inc_y / 2), y_0 + (inc_y / 2) + (inc_y * (ny - 1)), ny, + dtype=float64)} + + # Create a regular grid in metres (1D to 2D) + x = array([self._full_x["data"]] * len(self._full_y["data"])) + y = array([self._full_y["data"]] * len(self._full_x["data"])).T + + # Calculate centre latitudes and longitudes (UTM to LCC) + centre_lon, centre_lat = self.projection(x, y, inverse=True) + + return {"data": centre_lat}, {"data": centre_lon} + else: + return None, None + + def create_providentia_exp_centre_coordinates(self): + """ + Calculate centre latitudes and longitudes from original coordinates and store as 2D arrays. + + Returns + ---------- + model_centre_lat : dict + Dictionary with data of centre coordinates for latitude in 2D (latitude, longitude). + model_centre_lon : dict + Dictionary with data of centre coordinates for longitude in 2D (latitude, longitude). + """ + + # Get centre latitudes + model_centre_lat = self.lat + + # Get centre longitudes + model_centre_lon = self.lon + + return model_centre_lat, model_centre_lon + + # noinspection DuplicatedCode + def create_providentia_exp_grid_edge_coordinates(self): + """ + Calculate grid edge latitudes and longitudes and get model grid outline. + + Returns + ---------- + grid_edge_lat : dict + Dictionary with data of grid edge latitudes. + grid_edge_lon : dict + Dictionary with data of grid edge longitudes. + """ + # Get grid resolution + inc_x = abs(mean(diff(self.x["data"]))) + inc_y = abs(mean(diff(self.y["data"]))) + + # Get bounds for rotated coordinates + y_bnds = self._create_single_spatial_bounds(self.y["data"], inc_y) + x_bnds = self._create_single_spatial_bounds(self.x["data"], inc_x) + + # Get rotated latitudes for grid edge + left_edge_y = append(y_bnds.flatten()[::2], y_bnds.flatten()[-1]) + right_edge_y = flip(left_edge_y, 0) + top_edge_y = repeat(y_bnds[-1][-1], len(self.x["data"]) - 1) + bottom_edge_y = repeat(y_bnds[0][0], len(self.x["data"])) + y_grid_edge = concatenate((left_edge_y, top_edge_y, right_edge_y, bottom_edge_y)) + + # Get rotated longitudes for grid edge + left_edge_x = repeat(x_bnds[0][0], len(self.y["data"]) + 1) + top_edge_x = x_bnds.flatten()[1:-1:2] + right_edge_x = repeat(x_bnds[-1][-1], len(self.y["data"]) + 1) + bottom_edge_x = flip(x_bnds.flatten()[:-1:2], 0) + x_grid_edge = concatenate((left_edge_x, top_edge_x, right_edge_x, bottom_edge_x)) + + # Get edges for regular coordinates + grid_edge_lon_data, grid_edge_lat_data = self.projection(x_grid_edge, y_grid_edge, inverse=True) + + # Create grid outline by stacking the edges in both coordinates + model_grid_outline = vstack((grid_edge_lon_data, grid_edge_lat_data)).T + grid_edge_lat = {"data": model_grid_outline[:, 1]} + grid_edge_lon = {"data": model_grid_outline[:, 0]} + + return grid_edge_lat, grid_edge_lon + + # noinspection DuplicatedCode + def create_spatial_bounds(self): + """ + Calculate longitude and latitude bounds and set them. + """ + + # Calculate LCC coordinates bounds + full_x = self.get_full_x() + full_y = self.get_full_y() + inc_x = abs(mean(diff(full_x["data"]))) + x_bnds = self._create_single_spatial_bounds(array([full_x["data"]] * len(full_y["data"])), + inc_x, spatial_nv=4) + + inc_y = abs(mean(diff(full_y["data"]))) + y_bnds = self._create_single_spatial_bounds(array([full_y["data"]] * len(full_x["data"])).T, + inc_y, spatial_nv=4, inverse=True) + + # Transform LCC bounds to regular bounds + lon_bnds, lat_bnds = self.projection(x_bnds, y_bnds, inverse=True) + + # Obtain regular coordinates bounds + self.set_full_latitudes_boundaries({"data": deepcopy(lat_bnds)}) + self.lat_bnds = {"data": lat_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + :]} + + self.set_full_longitudes_boundaries({"data": deepcopy(lon_bnds)}) + self.lon_bnds = {"data": lon_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + :]} + + return None + + @staticmethod + def _set_var_crs(var): + """ + Set the grid_mapping to "Lambert_Conformal". + + Parameters + ---------- + var : Variable + netCDF4-python variable object. + """ + + var.grid_mapping = "Lambert_Conformal" + var.coordinates = "lat lon" + + return None + + def _create_metadata(self, netcdf): + """ + Create the "crs" variable for the lambert conformal grid_mapping. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python Dataset + """ + + if self.projection_data is not None: + mapping = netcdf.createVariable("Lambert_Conformal", "i") + mapping.grid_mapping_name = self.projection_data["grid_mapping_name"] + mapping.standard_parallel = self.projection_data["standard_parallel"] + mapping.longitude_of_central_meridian = self.projection_data["longitude_of_central_meridian"] + mapping.latitude_of_projection_origin = self.projection_data["latitude_of_projection_origin"] + + return None + + def to_grib2(self, path, grib_keys, grib_template_path, lat_flip=False, info=False): + """ + Write output file with grib2 format. + + Parameters + ---------- + lat_flip : bool + Indicates if the latitudes need to be flipped Up-Down or Down-Up. Default False. + path : str + Path to the output file. + grib_keys : dict + Dictionary with the grib2 keys. + grib_template_path : str + Path to the grib2 file to use as template. + info : bool + Indicates if you want to print extra information during the process. + """ + + raise NotImplementedError("Grib2 format cannot be written in a Lambert Conformal Conic projection.") + + # noinspection DuplicatedCode + def create_shapefile(self): + """ + Create spatial GeoDataFrame (shapefile). + + Returns + ------- + shapefile : GeoPandasDataFrame + Shapefile dataframe. + """ + + if self.shapefile is None: + + # Get latitude and longitude cell boundaries + if self.lat_bnds is None or self.lon_bnds is None: + self.create_spatial_bounds() + + # Reshape arrays to create geometry + aux_b_lat = self.lat_bnds["data"].reshape((self.lat_bnds["data"].shape[0] * self.lat_bnds["data"].shape[1], + self.lat_bnds["data"].shape[2])) + aux_b_lon = self.lon_bnds["data"].reshape((self.lon_bnds["data"].shape[0] * self.lon_bnds["data"].shape[1], + self.lon_bnds["data"].shape[2])) + + # Get polygons from bounds + geometry = [] + for i in range(aux_b_lon.shape[0]): + geometry.append(Polygon([(aux_b_lon[i, 0], aux_b_lat[i, 0]), + (aux_b_lon[i, 1], aux_b_lat[i, 1]), + (aux_b_lon[i, 2], aux_b_lat[i, 2]), + (aux_b_lon[i, 3], aux_b_lat[i, 3]), + (aux_b_lon[i, 0], aux_b_lat[i, 0])])) + + # Create dataframe containing all polygons + fids = self.get_fids() + gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=geometry, crs="EPSG:4326") + self.shapefile = gdf + + else: + gdf = self.shapefile + + return gdf + + # noinspection DuplicatedCode + def get_centroids_from_coordinates(self): + """ + Get centroids from geographical coordinates. + + Returns + ------- + centroids_gdf: GeoPandasDataFrame + Centroids dataframe. + """ + + # Get centroids from coordinates + centroids = [] + for lat_ind in range(0, self.lon["data"].shape[0]): + for lon_ind in range(0, self.lon["data"].shape[1]): + centroids.append(Point(self.lon["data"][lat_ind, lon_ind], + self.lat["data"][lat_ind, lon_ind])) + + # Create dataframe containing all points + fids = self.get_fids() + centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=centroids, crs="EPSG:4326") + + return centroids_gdf diff --git a/build/lib/nes/nc_projections/mercator_nes.py b/build/lib/nes/nc_projections/mercator_nes.py new file mode 100644 index 0000000000000000000000000000000000000000..520f9bb4bfa9eea3da36fad13971d96e36bd7125 --- /dev/null +++ b/build/lib/nes/nc_projections/mercator_nes.py @@ -0,0 +1,610 @@ +#!/usr/bin/env python + +from numpy import float64, linspace, array, mean, diff, append, flip, repeat, concatenate, vstack +from geopandas import GeoDataFrame +from pandas import Index +from pyproj import Proj +from copy import deepcopy +from typing import Dict, Any +from shapely.geometry import Polygon, Point +from nes.nc_projections.default_nes import Nes + + +class MercatorNes(Nes): + """ + + Attributes + ---------- + _full_y : dict + Y coordinates dictionary with the complete "data" key for all the values and the rest of the attributes. + _full_x : dict + X coordinates dictionary with the complete "data" key for all the values and the rest of the attributes. + y : dict + Y coordinates dictionary with the portion of "data" corresponding to the rank values. + x : dict + X coordinates dictionary with the portion of "data" corresponding to the rank values. + _var_dim : tuple + A Tuple with the name of the Y and X dimensions for the variables. + ("y", "x") for a Mercator projection. + _lat_dim : tuple + A Tuple with the name of the dimensions of the Latitude values. + ("y", "x") for a Mercator projection. + _lon_dim : tuple + A Tuple with the name of the dimensions of the Longitude values. + ("y", "x") for a Mercator projection. + """ + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="Y", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the MercatorNes class + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "Y". + Accepted values: ["X", "Y", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + times : list, None + List of times to substitute the current ones while creation. + + """ + self._full_y = None + self._full_x = None + + super(MercatorNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, balanced=balanced, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=create_nes, + times=times, **kwargs) + + if create_nes: + # Dimensions screening + self.lat = self._get_coordinate_values(self.get_full_latitudes(), "Y") + self.lon = self._get_coordinate_values(self.get_full_longitudes(), "X") + else: + # Complete dimensions + self._full_y = self._get_coordinate_dimension("y") + self._full_x = self._get_coordinate_dimension("x") + + # Dimensions screening + self.y = self._get_coordinate_values(self.get_full_y(), "Y") + self.x = self._get_coordinate_values(self.get_full_x(), "X") + + # Set axis limits for parallel writing + self.write_axis_limits = self._get_write_axis_limits() + + self._var_dim = ("y", "x") + self._lat_dim = ("y", "x") + self._lon_dim = ("y", "x") + + self.free_vars("crs") + + @staticmethod + def new(comm=None, path=None, info=False, dataset=None, parallel_method="Y", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the Nes class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "Y". + Accepted values: ["X", "Y", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + """ + + new = MercatorNes(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, + avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, + create_nes=create_nes, balanced=balanced, times=times, **kwargs) + + return new + + def get_full_y(self) -> Dict[str, Any]: + """ + Retrieve the complete Y information. + + Returns + ------- + Dict[str, Any] + A dictionary containing the complete latitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of latitude values. + attr_name: attr_value, # Latitude attributes. + ... + } + """ + data = self.comm.bcast(self._full_y) + + return data + + def get_full_x(self) -> Dict[str, Any]: + """ + Retrieve the complete X information. + + Returns + ------- + Dict[str, Any] + A dictionary containing the complete longitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of longitude values. + attr_name: attr_value, # Longitude attributes. + ... + } + """ + data = self.comm.bcast(self._full_x) + return data + + def set_full_y(self, data: Dict[str, Any]) -> None: + """ + Set the complete Y information. + + Parameters + ---------- + data : Dict[str, Any] + A dictionary containing the complete latitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of latitude values. + attr_name: attr_value, # Latitude attributes. + ... + } + """ + if self.master: + self._full_y = data + return None + + def set_full_x(self, data: Dict[str, Any]) -> None: + """ + Set the complete rotated longitude information. + + Parameters + ---------- + data : Dict[str, Any] + A dictionary containing the complete longitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of longitude values. + attr_name: attr_value, # Longitude attributes. + ... + } + """ + if self.master: + self._full_x = data + return None + + # noinspection DuplicatedCode + def _filter_coordinates_selection(self): + """ + Use the selection limits to filter y, x, time, lev, lat, lon, lon_bnds and lat_bnds. + """ + + idx = self._get_idx_intervals() + + self.y = self._get_coordinate_values(self.get_full_y(), "Y") + self.x = self._get_coordinate_values(self.get_full_x(), "X") + + self.set_full_y({'data': self.y["data"][idx["idx_y_min"]:idx["idx_y_max"]]}) + self.set_full_x({'data': self.x["data"][idx["idx_x_min"]:idx["idx_x_max"]]}) + + super(MercatorNes, self)._filter_coordinates_selection() + + return None + + def _get_pyproj_projection(self): + """ + Get projection data as in Pyproj library. + + Returns + ---------- + projection : pyproj.Proj + Grid projection. + """ + + projection = Proj(proj="merc", + a=self.earth_radius[1], + b=self.earth_radius[0], + lat_ts=float64(self.projection_data["standard_parallel"]), + lon_0=float64(self.projection_data["longitude_of_projection_origin"]),) + + return projection + + # noinspection DuplicatedCode + def _get_projection_data(self, create_nes, **kwargs): + """ + Retrieves projection data based on grid details. + + Parameters + ---------- + create_nes : bool + Flag indicating whether to create new object (True) or use existing (False). + **kwargs : dict + Additional keyword arguments for specifying projection details. + """ + if create_nes: + projection_data = {"grid_mapping_name": "mercator", + "standard_parallel": kwargs["lat_ts"], + "longitude_of_projection_origin": kwargs["lon_0"], + "x_0": kwargs["x_0"], "y_0": kwargs["y_0"], + "inc_x": kwargs["inc_x"], "inc_y": kwargs["inc_y"], + "nx": kwargs["nx"], "ny": kwargs["ny"], + } + else: + if "mercator" in self.variables.keys(): + projection_data = self.variables["mercator"] + self.free_vars("mercator") + + else: + msg = "There is no variable called mercator, projection has not been defined." + raise RuntimeError(msg) + + if "dtype" in projection_data.keys(): + del projection_data["dtype"] + + if "data" in projection_data.keys(): + del projection_data["data"] + + if "dimensions" in projection_data.keys(): + del projection_data["dimensions"] + + return projection_data + + # noinspection DuplicatedCode + def _create_dimensions(self, netcdf): + """ + Create "y", "x" and "spatial_nv" dimensions and the super dimensions "lev", "time", "time_nv", "lon" and "lat" + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + super(MercatorNes, self)._create_dimensions(netcdf) + + # Create y and x dimensions + netcdf.createDimension("y", len(self.get_full_y()["data"])) + netcdf.createDimension("x", len(self.get_full_x()["data"])) + + # Create spatial_nv (number of vertices) dimension + if (self.lat_bnds is not None) and (self.lon_bnds is not None): + netcdf.createDimension("spatial_nv", 4) + + return None + + # noinspection DuplicatedCode + def _create_dimension_variables(self, netcdf): + """ + Create the "y" and "x" variables. + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + super(MercatorNes, self)._create_dimension_variables(netcdf) + + # MERCATOR Y COORDINATES + full_y = self.get_full_y() + y = netcdf.createVariable("y", full_y["data"].dtype, ("y",)) + y.long_name = "y coordinate of projection" + if "units" in full_y.keys(): + y.units = full_y["units"] + else: + y.units = "m" + y.standard_name = "projection_y_coordinate" + if self.size > 1: + y.set_collective(True) + y[:] = full_y["data"] + + # MERCATOR X COORDINATES + full_x = self.get_full_x() + x = netcdf.createVariable("x", full_x["data"].dtype, ("x",)) + x.long_name = "x coordinate of projection" + if "units" in full_x.keys(): + x.units = full_x["units"] + else: + x.units = "m" + x.standard_name = "projection_x_coordinate" + if self.size > 1: + x.set_collective(True) + x[:] = full_x["data"] + + return None + + # noinspection DuplicatedCode + def _create_centre_coordinates(self, **kwargs): + """ + Calculate centre latitudes and longitudes from grid details. + """ + if self.master: + # Get projection details on x + x_0 = float64(self.projection_data["x_0"]) + inc_x = float64(self.projection_data["inc_x"]) + nx = int(self.projection_data["nx"]) + + # Get projection details on y + y_0 = float64(self.projection_data["y_0"]) + inc_y = float64(self.projection_data["inc_y"]) + ny = int(self.projection_data["ny"]) + + # Create a regular grid in metres (1D) + self._full_x = {"data": linspace(x_0 + (inc_x / 2), x_0 + (inc_x / 2) + (inc_x * (nx - 1)), nx, + dtype=float64)} + self._full_y = {"data": linspace(y_0 + (inc_y / 2), y_0 + (inc_y / 2) + (inc_y * (ny - 1)), ny, + dtype=float64)} + + # Create a regular grid in metres (1D to 2D) + x = array([self._full_x["data"]] * len(self._full_y["data"])) + y = array([self._full_y["data"]] * len(self._full_x["data"])).T + + # Calculate centre latitudes and longitudes (UTM to Mercator) + centre_lon, centre_lat = self.projection(x, y, inverse=True) + + return {"data": centre_lat}, {"data": centre_lon} + else: + return None, None + + def create_providentia_exp_centre_coordinates(self): + """ + Calculate centre latitudes and longitudes from original coordinates and store as 2D arrays. + + Returns + ---------- + model_centre_lat : dict + Dictionary with data of centre coordinates for latitude in 2D (latitude, longitude). + model_centre_lon : dict + Dictionary with data of centre coordinates for longitude in 2D (latitude, longitude). + """ + + # Get centre latitudes + model_centre_lat = self.lat + + # Get centre longitudes + model_centre_lon = self.lon + + return model_centre_lat, model_centre_lon + + # noinspection DuplicatedCode + def create_providentia_exp_grid_edge_coordinates(self): + """ + Calculate grid edge latitudes and longitudes and get model grid outline. + + Returns + ---------- + grid_edge_lat : dict + Dictionary with data of grid edge latitudes. + grid_edge_lon : dict + Dictionary with data of grid edge longitudes. + """ + + # Get grid resolution + inc_x = abs(mean(diff(self.x["data"]))) + inc_y = abs(mean(diff(self.y["data"]))) + + # Get bounds for rotated coordinates + y_bounds = self._create_single_spatial_bounds(self.y["data"], inc_y) + x_bounds = self._create_single_spatial_bounds(self.x["data"], inc_x) + + # Get rotated latitudes for grid edge + left_edge_y = append(y_bounds.flatten()[::2], y_bounds.flatten()[-1]) + right_edge_y = flip(left_edge_y, 0) + top_edge_y = repeat(y_bounds[-1][-1], len(self.x["data"]) - 1) + bottom_edge_y = repeat(y_bounds[0][0], len(self.x["data"])) + y_grid_edge = concatenate((left_edge_y, top_edge_y, right_edge_y, bottom_edge_y)) + + # Get rotated longitudes for grid edge + left_edge_x = repeat(x_bounds[0][0], len(self.y["data"]) + 1) + top_edge_x = x_bounds.flatten()[1:-1:2] + right_edge_x = repeat(x_bounds[-1][-1], len(self.y["data"]) + 1) + bottom_edge_x = flip(x_bounds.flatten()[:-1:2], 0) + x_grid_edge = concatenate((left_edge_x, top_edge_x, right_edge_x, bottom_edge_x)) + + # Get edges for regular coordinates + grid_edge_lon_data, grid_edge_lat_data = self.projection(x_grid_edge, y_grid_edge, inverse=True) + + # Create grid outline by stacking the edges in both coordinates + model_grid_outline = vstack((grid_edge_lon_data, grid_edge_lat_data)).T + grid_edge_lat = {"data": model_grid_outline[:, 1]} + grid_edge_lon = {"data": model_grid_outline[:, 0]} + + return grid_edge_lat, grid_edge_lon + + # noinspection DuplicatedCode + def create_spatial_bounds(self): + """ + Calculate longitude and latitude bounds and set them. + """ + + # Calculate Mercator coordinates bounds + full_x = self.get_full_x() + full_y = self.get_full_y() + inc_x = abs(mean(diff(full_x["data"]))) + x_bnds = self._create_single_spatial_bounds(array([full_x["data"]] * len(full_y["data"])), + inc_x, spatial_nv=4) + + inc_y = abs(mean(diff(full_y["data"]))) + y_bnds = self._create_single_spatial_bounds(array([full_y["data"]] * len(full_x["data"])).T, + inc_y, spatial_nv=4, inverse=True) + + # Transform Mercator bounds to regular bounds + lon_bnds, lat_bnds = self.projection(x_bnds, y_bnds, inverse=True) + + # Obtain regular coordinates bounds + self.set_full_latitudes_boundaries({"data": deepcopy(lat_bnds)}) + self.lat_bnds = {"data": lat_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + :]} + self.set_full_longitudes_boundaries({"data": deepcopy(lon_bnds)}) + self.lon_bnds = {"data": lon_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + :]} + + return None + + @staticmethod + def _set_var_crs(var): + """ + Set the grid_mapping to "mercator". + + Parameters + ---------- + var : Variable + netCDF4-python variable object. + """ + + var.grid_mapping = "mercator" + var.coordinates = "lat lon" + + return None + + def _create_metadata(self, netcdf): + """ + Create the "crs" variable for the Mercator grid_mapping. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python Dataset. + """ + + if self.projection_data is not None: + mapping = netcdf.createVariable("mercator", "i") + mapping.grid_mapping_name = self.projection_data["grid_mapping_name"] + mapping.standard_parallel = self.projection_data["standard_parallel"] + mapping.longitude_of_projection_origin = self.projection_data["longitude_of_projection_origin"] + + return None + + def to_grib2(self, path, grib_keys, grib_template_path, lat_flip=False, info=False): + """ + Write output file with grib2 format. + + Parameters + ---------- + lat_flip : bool + Indicates if you want to flip latitudes Up-Down + path : str + Path to the output file. + grib_keys : dict + Dictionary with the grib2 keys. + grib_template_path : str + Path to the grib2 file to use as template. + info : bool + Indicates if you want to print extra information during the process. + """ + + raise NotImplementedError("Grib2 format cannot be written in a Mercator projection.") + + # noinspection DuplicatedCode + def create_shapefile(self): + """ + Create spatial GeoDataFrame (shapefile). + + Returns + ------- + shapefile : GeoPandasDataFrame + Shapefile dataframe. + """ + + if self.shapefile is None: + + # Get latitude and longitude cell boundaries + if self.lat_bnds is None or self.lon_bnds is None: + self.create_spatial_bounds() + + # Reshape arrays to create geometry + aux_b_lat = self.lat_bnds["data"].reshape((self.lat_bnds["data"].shape[0] * self.lat_bnds["data"].shape[1], + self.lat_bnds["data"].shape[2])) + aux_b_lon = self.lon_bnds["data"].reshape((self.lon_bnds["data"].shape[0] * self.lon_bnds["data"].shape[1], + self.lon_bnds["data"].shape[2])) + + # Get polygons from bounds + geometry = [] + for i in range(aux_b_lon.shape[0]): + geometry.append(Polygon([(aux_b_lon[i, 0], aux_b_lat[i, 0]), + (aux_b_lon[i, 1], aux_b_lat[i, 1]), + (aux_b_lon[i, 2], aux_b_lat[i, 2]), + (aux_b_lon[i, 3], aux_b_lat[i, 3]), + (aux_b_lon[i, 0], aux_b_lat[i, 0])])) + + # Create dataframe containing all polygons + fids = self.get_fids() + gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=geometry, crs="EPSG:4326") + self.shapefile = gdf + + else: + gdf = self.shapefile + + return gdf + + # noinspection DuplicatedCode + def get_centroids_from_coordinates(self): + """ + Get centroids from geographical coordinates. + + Returns + ------- + centroids_gdf: GeoPandasDataFrame + Centroids dataframe. + """ + + # Get centroids from coordinates + centroids = [] + for lat_ind in range(0, self.lon["data"].shape[0]): + for lon_ind in range(0, self.lon["data"].shape[1]): + centroids.append(Point(self.lon["data"][lat_ind, lon_ind], + self.lat["data"][lat_ind, lon_ind])) + + # Create dataframe containing all points + fids = self.get_fids() + centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=centroids, crs="EPSG:4326") + + return centroids_gdf diff --git a/build/lib/nes/nc_projections/points_nes.py b/build/lib/nes/nc_projections/points_nes.py new file mode 100644 index 0000000000000000000000000000000000000000..29022b50874c571580e25cc2239a1ecdaa623352 --- /dev/null +++ b/build/lib/nes/nc_projections/points_nes.py @@ -0,0 +1,755 @@ +#!/usr/bin/env python + +import sys +from warnings import warn +from numpy import float64, arange, array, ndarray, generic, issubdtype, character, concatenate +from pandas import Index +from geopandas import GeoDataFrame, points_from_xy +from pyproj import Proj +from copy import deepcopy +from netCDF4 import date2num +from .default_nes import Nes + + +class PointsNes(Nes): + """ + + Attributes + ---------- + _var_dim : tuple + A Tuple with the name of the Y and X dimensions for the variables. + ("lat", "lon", ) for a points grid. + _lat_dim : tuple + A Tuple with the name of the dimensions of the Latitude values. + ("lat", ) for a points grid. + _lon_dim : tuple + A Tuple with the name of the dimensions of the Longitude values. + ("lon", ) for a points grid. + _station : tuple + A Tuple with the name of the dimensions of the station values. + ("station", ) for a points grid. + """ + + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="X", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the PointsNes class. + + Parameters + ---------- + comm: MPI.Comm + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset or None + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "X". + accepted values: ["X", "T"]. + strlen: int + Maximum length of strings in NetCDF. Default: 75. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + """ + + super(PointsNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=create_nes, + times=times, balanced=balanced, **kwargs) + + if create_nes: + # Dimensions screening + self.lat = self._get_coordinate_values(self.get_full_latitudes(), "X") + self.lon = self._get_coordinate_values(self.get_full_longitudes(), "X") + + # Complete dimensions + self._station = {"data": arange(len(self.get_full_longitudes()["data"]))} + + # Dimensions screening + self.station = self._get_coordinate_values(self._station, "X") + + # Set axis limits for parallel writing + self.write_axis_limits = self._get_write_axis_limits() + + self._var_dim = ("station",) + self._lat_dim = ("station",) + self._lon_dim = ("station",) + + @staticmethod + def new(comm=None, path=None, info=False, dataset=None, parallel_method="X", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, + create_nes=False, balanced=False, times=None, **kwargs): + """ + Initialize the Nes class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "X". + accepted values: ["X", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + """ + + new = PointsNes(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, + avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, + create_nes=create_nes, balanced=balanced, times=times, **kwargs) + + return new + + @staticmethod + def _get_pyproj_projection(): + """ + Get projection data as in Pyproj library. + + Returns + ---------- + projection : pyproj.Proj + Grid projection. + """ + + projection = Proj(proj="latlong", ellps="WGS84",) + + return projection + + def _get_projection_data(self, create_nes, **kwargs): + """ + Retrieves projection data based on grid details. + + Parameters + ---------- + create_nes : bool + Flag indicating whether to create new object (True) or use existing (False). + **kwargs : dict + Additional keyword arguments for specifying projection details. + """ + + return None + + def _create_dimensions(self, netcdf): + """ + Create "time", "time_nv", "station" and "strlen" dimensions. + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + # Create time dimension + netcdf.createDimension("time", None) + + # Create time_nv (number of vertices) dimension + if self.time_bnds is not None: + netcdf.createDimension("time_nv", 2) + + # Create station dimension + # The number of longitudes is equal to the number of stations + netcdf.createDimension("station", len(self.get_full_longitudes()["data"])) + + # Create string length dimension + if self.strlen is not None: + netcdf.createDimension("strlen", self.strlen) + + return None + + # noinspection DuplicatedCode + def _create_dimension_variables(self, netcdf): + """ + Create the "time", "time_bnds", "station", "lat", "lat_bnds", "lon" and "lon_bnds" variables. + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + # TIMES + time_var = netcdf.createVariable("time", float64, ("time",), zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + time_var.units = "hours since {0}".format( + self.get_full_times()[self._get_time_id(self.hours_start, first=True)].strftime("%Y-%m-%d %H:%M:%S")) + time_var.standard_name = "time" + time_var.calendar = "standard" + time_var.long_name = "time" + if self.time_bnds is not None: + time_var.bounds = "time_bnds" + if self.size > 1: + time_var.set_collective(True) + time_var[:] = date2num(self.get_full_times()[self._get_time_id(self.hours_start, first=True): + self._get_time_id(self.hours_end, first=False)], + time_var.units, time_var.calendar) + + # TIME BOUNDS + if self.time_bnds is not None: + time_bnds_var = netcdf.createVariable("time_bnds", float64, ("time", "time_nv",), zlib=self.zip_lvl, + complevel=self.zip_lvl) + if self.size > 1: + time_bnds_var.set_collective(True) + time_bnds_var[:] = date2num(self.get_full_time_bnds(), time_var.units, calendar="standard") + + # STATIONS + stations = netcdf.createVariable("station", float64, ("station",), zlib=self.zip_lvl > 0, + complevel=self.zip_lvl) + stations.units = "" + stations.axis = "X" + stations.long_name = "" + stations.standard_name = "station" + if self.size > 1: + stations.set_collective(True) + stations[:] = self._station["data"] + + # LATITUDES + lat = netcdf.createVariable("lat", float64, self._lat_dim, zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + lat.units = "degrees_north" + lat.axis = "Y" + lat.long_name = "latitude coordinate" + lat.standard_name = "latitude" + if self.lat_bnds is not None: + lat.bounds = "lat_bnds" + if self.size > 1: + lat.set_collective(True) + lat[:] = self.get_full_latitudes()["data"] + + # LONGITUDES + lon = netcdf.createVariable("lon", float64, self._lon_dim, zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + lon.units = "degrees_east" + lon.axis = "X" + lon.long_name = "longitude coordinate" + lon.standard_name = "longitude" + if self.lon_bnds is not None: + lon.bounds = "lon_bnds" + if self.size > 1: + lon.set_collective(True) + lon[:] = self.get_full_longitudes()["data"] + + return None + + # noinspection DuplicatedCode + def _get_coordinate_values(self, coordinate_info, coordinate_axis, bounds=False): + """ + Get the coordinate data of the current portion. + + Parameters + ---------- + coordinate_info : dict, list + Dictionary with the "data" key with the coordinate variable values. and the attributes as other keys. + coordinate_axis : str + Name of the coordinate to extract. Accepted values: ["X"]. + bounds : bool + Boolean variable to know if there are coordinate bounds. + Returns + ------- + values : dict + Dictionary with the portion of data corresponding to the rank. + """ + + if coordinate_info is None: + return None + + if not isinstance(coordinate_info, dict): + values = {"data": deepcopy(coordinate_info)} + else: + values = deepcopy(coordinate_info) + + coordinate_len = len(values["data"].shape) + if bounds: + coordinate_len -= 1 + + if coordinate_axis == "X": + if coordinate_len == 1: + values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + elif coordinate_len == 2: + values["data"] = values["data"][self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + else: + raise NotImplementedError("The coordinate has wrong dimensions: {dim}".format( + dim=values["data"].shape)) + + return values + + def _read_variable(self, var_name): + """ + Read the corresponding variable data according to the current rank. + + Parameters + ---------- + var_name : str + Name of the variable to read. + + Returns + ------- + data: array + Portion of the variable data corresponding to the rank. + """ + + nc_var = self.dataset.variables[var_name] + var_dims = nc_var.dimensions + + # Read data in 1 or 2 dimensions + if len(var_dims) < 2: + data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + elif len(var_dims) == 2: + if "strlen" in var_dims: + data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], :] + data = array(["".join(i.tobytes().decode("ascii").replace("\x00", "")) for i in data], dtype=object) + else: + data = nc_var[self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], + self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + else: + raise NotImplementedError("Error with {0}. Only can be read netCDF with 2 dimensions or less".format( + var_name)) + + # Unmask array + data = self._unmask_array(data) + + return data + + # noinspection DuplicatedCode + def _create_variables(self, netcdf, chunking=False): + """ + Create the netCDF file variables. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python open Dataset. + chunking : bool + Indicates if you want to chunk the output netCDF. + """ + + if self.variables is not None: + for i, (var_name, var_dict) in enumerate(self.variables.items()): + # Get data type + if "dtype" in var_dict.keys(): + var_dtype = var_dict["dtype"] + if (var_dict["data"] is not None) and (var_dtype != var_dict["data"].dtype): + msg = "WARNING!!! " + msg += "Different data types for variable {0}. ".format(var_name) + msg += "Input dtype={0}. Data dtype={1}.".format(var_dtype, var_dict["data"].dtype) + warn(msg) + sys.stderr.flush() + try: + var_dict["data"] = var_dict["data"].astype(var_dtype) + except Exception: # TODO: Detect exception + raise TypeError("It was not possible to cast the data to the input dtype.") + else: + var_dtype = var_dict["data"].dtype + if var_dtype is object: + raise TypeError("Data dtype is object. Define dtype explicitly as dictionary key 'dtype'") + + # Get dimensions when reading datasets + if "dimensions" in var_dict.keys(): + var_dims = var_dict["dimensions"] + # Get dimensions when creating new datasets + else: + if len(var_dict["data"].shape) == 1: + # For data that depends only on station (e.g. station_code) + var_dims = self._var_dim + else: + # For data that is dependent on time and station (e.g. PM10) + var_dims = ("time",) + self._var_dim + + if var_dict["data"] is not None: + + # Ensure data is of type numpy array (to create NES) + if not isinstance(var_dict["data"], (ndarray, generic)): + try: + var_dict["data"] = array(var_dict["data"]) + except AttributeError: + raise AttributeError("Data for variable {0} must be a numpy array.".format(var_name)) + + # Convert list of strings to chars for parallelization + if issubdtype(var_dtype, character): + var_dict["data_aux"] = self._str2char(var_dict["data"]) + var_dims += ("strlen",) + var_dtype = "S1" + + if self.info: + print("Rank {0:03d}: Writing {1} var ({2}/{3})".format(self.rank, var_name, i + 1, + len(self.variables))) + if not chunking: + var = netcdf.createVariable(var_name, var_dtype, var_dims, + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + else: + if self.balanced: + raise NotImplementedError("A balanced data cannot be chunked.") + if self.master: + chunk_size = var_dict["data"].shape + else: + chunk_size = None + chunk_size = self.comm.bcast(chunk_size, root=0) + var = netcdf.createVariable(var_name, var_dtype, var_dims, + zlib=self.zip_lvl > 0, complevel=self.zip_lvl, + chunksizes=chunk_size) + + if self.info: + print("Rank {0:03d}: Var {1} created ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + if self.size > 1: + var.set_collective(True) + if self.info: + print("Rank {0:03d}: Var {1} collective ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + + for att_name, att_value in var_dict.items(): + if att_name == "data": + if self.info: + print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) + if "data_aux" in var_dict.keys(): + att_value = var_dict["data_aux"] + if len(att_value.shape) == 1: + try: + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value + except IndexError: + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape, + att_value.shape)) + except ValueError: + raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape, + att_value.shape)) + elif len(att_value.shape) == 2: + if "strlen" in var_dims: + try: + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :] = att_value + except IndexError: + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :].shape, + att_value.shape)) + except ValueError: + raise ValueError( + "Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]: + self.write_axis_limits["x_max"]].shape, + att_value.shape)) + else: + try: + var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value + except IndexError: + out_shape = var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + out_shape, att_value.shape)) + except ValueError: + out_shape = var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape + raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( + out_shape, att_value.shape)) + + if self.info: + print("Rank {0:03d}: Var {1} data ({2}/{3})".format(self.rank, var_name, i + 1, + len(self.variables))) + elif att_name not in ["chunk_size", "var_dims", "dimensions", "dtype", "data_aux"]: + var.setncattr(att_name, att_value) + + if "data_aux" in var_dict.keys(): + del var_dict["data_aux"] + + self._set_var_crs(var) + if self.info: + print("Rank {0:03d}: Var {1} completed ({2}/{3})".format(self.rank, var_name, i + 1, + len(self.variables))) + + return None + + # noinspection DuplicatedCode + def _gather_data(self, data_to_gather): + """ + Gather all the variable data into the MPI rank 0 to perform a serial write. + + Returns + ------- + data_to_gather: dict + Variables to gather. + """ + data_list = deepcopy(data_to_gather) + for var_name, var_info in data_list.items(): + try: + # noinspection PyArgumentList + data_aux = self.comm.gather(data_list[var_name]["data"], root=0) + if self.rank == 0: + shp_len = len(data_list[var_name]["data"].shape) + if self.parallel_method == "X": + # concatenate over station + if shp_len == 1: + # dimensions = (station) + axis = 0 + elif shp_len == 2: + if "strlen" in var_info["dimensions"]: + # dimensions = (station, strlen) + axis = 0 + else: + # dimensions = (time, station) + axis = 1 + else: + msg = "The points NetCDF must have " + msg += "surface values (without levels)." + raise NotImplementedError(msg) + elif self.parallel_method == "T": + # concatenate over time + if shp_len == 1: + # dimensions = (station) + axis = None + elif shp_len == 2: + if "strlen" in var_info["dimensions"]: + # dimensions = (station, strlen) + axis = None + else: + # dimensions = (time, station) + axis = 0 + else: + msg = "The points NetCDF must only have surface values (without levels)." + raise NotImplementedError(msg) + else: + raise NotImplementedError( + "Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( + meth=self.parallel_method, accept=["X", "T"])) + data_list[var_name]["data"] = concatenate(data_aux, axis=axis) + except Exception as e: + msg = f"**ERROR** an error has occurred while gathering the '{var_name}' variable.\n" + print(msg) + sys.stderr.write(msg) + print(e) + sys.stderr.write(str(e)) + sys.stderr.flush() + self.comm.Abort(1) + + return data_list + + def _create_centre_coordinates(self, **kwargs): + """ + Calculate centre latitudes and longitudes from points. + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + # Calculate centre latitudes + centre_lat = kwargs["lat"] + + # Calculate centre longitudes + centre_lon = kwargs["lon"] + + return {"data": centre_lat}, {"data": centre_lon} + + def _create_metadata(self, netcdf): + """ + Create metadata variables + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + return None + + def create_spatial_bounds(self): + """ + Calculate longitude and latitude bounds and set them. + """ + + raise NotImplementedError("Spatial bounds cannot be created for points datasets.") + + def to_providentia(self, model_centre_lon, model_centre_lat, grid_edge_lon, grid_edge_lat): + """ + Transform a PointsNes into a PointsNesProvidentia object + + Returns + ---------- + points_nes_providentia : nes.Nes + Points Nes Providentia Object + """ + + from .points_nes_providentia import PointsNesProvidentia + + points_nes_providentia = PointsNesProvidentia(comm=self.comm, + info=self.info, + balanced=self.balanced, + parallel_method=self.parallel_method, + avoid_first_hours=self.hours_start, + avoid_last_hours=self.hours_end, + first_level=self.first_level, + last_level=self.last_level, + create_nes=True, + times=self.time, + model_centre_lon=model_centre_lon, + model_centre_lat=model_centre_lat, + grid_edge_lon=grid_edge_lon, + grid_edge_lat=grid_edge_lat, + lat=self.lat["data"], + lon=self.lon["data"] + ) + + # Convert dimensions (time, lev, lat, lon) to (station, time) for interpolated variables and reshape data + variables = {} + interpolated_variables = deepcopy(self.variables) + for var_name, var_info in interpolated_variables.items(): + variables[var_name] = {} + # ("time", "lev", "lat", "lon") or ("time", "lat", "lon") to ("station", "time") + if len(var_info["dimensions"]) != len(var_info["data"].shape): + variables[var_name]["data"] = var_info["data"].T + variables[var_name]["dimensions"] = ("station", "time") + else: + variables[var_name]["data"] = var_info["data"] + variables[var_name]["dimensions"] = var_info["dimensions"] + + # Set variables + points_nes_providentia.variables = variables + + return points_nes_providentia + + def to_grib2(self, path, grib_keys, grib_template_path, lat_flip=False, info=False): + """ + Write output file with grib2 format. + + Parameters + ---------- + lat_flip : bool + Indicates if you want to flip the latitude direction. + path : str + Path to the output file. + grib_keys : dict + Dictionary with the grib2 keys. + grib_template_path : str + Path to the grib2 file to use as template. + info : bool + Indicates if you want to print extra information during the process. + """ + + raise NotImplementedError("Grib2 format cannot be written with point data.") + + def create_shapefile(self): + """ + Create spatial GeoDataFrame (shapefile). + + Returns + ------- + shapefile : GeoPandasDataFrame + Shapefile dataframe. + """ + + if self.shapefile is None: + + # Create dataframe containing all points + gdf = self.get_centroids_from_coordinates() + self.shapefile = gdf + + else: + gdf = self.shapefile + + return gdf + + def get_centroids_from_coordinates(self): + """ + Get centroids from geographical coordinates. + + Returns + ------- + centroids_gdf: GeoPandasDataFrame + Centroids dataframe. + """ + + # Get centroids from coordinates + centroids = points_from_xy(self.lon["data"], self.lat["data"]) + + # Create dataframe containing all points + fids = arange(len(self.get_full_longitudes()["data"])) + fids = fids[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids), + geometry=centroids, + crs="EPSG:4326") + + return centroids_gdf + + def add_variables_to_shapefile(self, var_list, idx_lev=0, idx_time=0): + """ + Add variables data to shapefile. + + var_list : list, str + List (or single string) of the variables to be loaded and saved in the shapefile. + idx_lev : int + Index of vertical level for which the data will be saved in the shapefile. + idx_time : int + Index of time for which the data will be saved in the shapefile. + """ + + if idx_lev != 0: + msg = "Error: Points dataset has no level (Level: {0}).".format(idx_lev) + raise ValueError(msg) + + for var_name in var_list: + # station as dimension + if len(self.variables[var_name]["dimensions"]) == 1: + self.shapefile[var_name] = self.variables[var_name]["data"][:].ravel() + # station and time as dimensions + else: + self.shapefile[var_name] = self.variables[var_name]["data"][idx_time, :].ravel() + + return None + + @staticmethod + def _get_axis_index_(axis): + if axis == "T": + value = 0 + elif axis == "X": + value = 1 + else: + raise ValueError("Unknown axis: {0}".format(axis)) + return value + + @staticmethod + def _set_var_crs(var): + """ + Set the grid_mapping + + Parameters + ---------- + var : Variable + netCDF4-python variable object. + """ + var.coordinates = "lat lon" + + return None diff --git a/build/lib/nes/nc_projections/points_nes_ghost.py b/build/lib/nes/nc_projections/points_nes_ghost.py new file mode 100644 index 0000000000000000000000000000000000000000..0df1c75e64cc6da2dc0b2d139a63ce8d2aba68b7 --- /dev/null +++ b/build/lib/nes/nc_projections/points_nes_ghost.py @@ -0,0 +1,818 @@ +#!/usr/bin/env python + +import sys +from warnings import warn +from numpy import float64, empty, ndarray, generic, array, issubdtype, character, concatenate, int64 +from netCDF4 import date2num +from copy import deepcopy +from .points_nes import PointsNes + + +class PointsNesGHOST(PointsNes): + """ + + Attributes + ---------- + _qa : dict + Quality flags (GHOST checks) dictionary with the complete "data" key for all the values and the rest of the + attributes. + _flag : dict + Data flags (given by data provider) dictionary with the complete "data" key for all the values and the rest of + the attributes. + _qa : dict + Quality flags (GHOST checks) dictionary with the portion of "data" corresponding to the rank values. + _flag : dict + Data flags (given by data provider) dictionary with the portion of "data" corresponding to the rank values. + """ + + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="X", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the PointsNesGHOST class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "X". + Accepted values: ["X"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + """ + + super(PointsNesGHOST, self).__init__(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=create_nes, + times=times, balanced=balanced, **kwargs) + + # Complete dimensions + self._flag = self._get_coordinate_dimension(["flag"]) + self._qa = self._get_coordinate_dimension(["qa"]) + + # Dimensions screening + self.flag = self._get_coordinate_values(self._flag, "X") + self.qa = self._get_coordinate_values(self._qa, "X") + + @staticmethod + def new(comm=None, path=None, info=False, dataset=None, parallel_method="X", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the PointsNesGHOST class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "X". + Accepted values: ["X"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + """ + + new = PointsNesGHOST(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, + avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, + create_nes=create_nes, balanced=balanced, times=times, **kwargs) + + return new + + def _create_dimensions(self, netcdf): + """ + Create "N_flag_codes" and "N_qa_codes" dimensions and the super dimensions + "time", "time_nv", "station", and "strlen". + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + super(PointsNesGHOST, self)._create_dimensions(netcdf) + + # Create N_flag_codes and N_qa_codes dimensions + netcdf.createDimension("N_flag_codes", self._flag["data"].shape[2]) + netcdf.createDimension("N_qa_codes", self._qa["data"].shape[2]) + + return None + + # noinspection DuplicatedCode + def _create_dimension_variables(self, netcdf): + """ + Create the "time", "time_bnds", "station", "lat", "lat_bnds", "lon" and "lon_bnds" variables. + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + # TIMES + time_var = netcdf.createVariable("time", float64, ("time",), zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + time_var.units = "hours since {0}".format( + self.get_full_times()[self._get_time_id(self.hours_start, first=True)].strftime("%Y-%m-%d %H:%M:%S")) + time_var.standard_name = "time" + time_var.calendar = "standard" + time_var.long_name = "time" + if self.time_bnds is not None: + time_var.bounds = "time_bnds" + if self.size > 1: + time_var.set_collective(True) + time_var[:] = date2num(self.get_full_times()[self._get_time_id(self.hours_start, first=True): + self._get_time_id(self.hours_end, first=False)], + time_var.units, time_var.calendar) + + # TIME BOUNDS + if self.time_bnds is not None: + time_bnds_var = netcdf.createVariable("time_bnds", float64, ("time", "time_nv",), zlib=self.zip_lvl, + complevel=self.zip_lvl) + if self.size > 1: + time_bnds_var.set_collective(True) + time_bnds_var[:] = date2num(self.get_full_time_bnds(), time_var.units, calendar="standard") + + # STATIONS + stations = netcdf.createVariable("station", float64, ("station",), zlib=self.zip_lvl > 0, + complevel=self.zip_lvl) + stations.units = "" + stations.axis = "X" + stations.long_name = "" + stations.standard_name = "station" + if self.size > 1: + stations.set_collective(True) + stations[:] = self._station["data"] + + # LATITUDES + lat = netcdf.createVariable("latitude", float64, self._lat_dim, zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + lat.units = "degrees_north" + lat.axis = "Y" + lat.long_name = "latitude coordinate" + lat.standard_name = "latitude" + if self.lat_bnds is not None: + lat.bounds = "lat_bnds" + if self.size > 1: + lat.set_collective(True) + lat[:] = self.get_full_latitudes()["data"] + + # LONGITUDES + lon = netcdf.createVariable("longitude", float64, self._lon_dim, zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + lon.units = "degrees_east" + lon.axis = "X" + lon.long_name = "longitude coordinate" + lon.standard_name = "longitude" + if self.lon_bnds is not None: + lon.bounds = "lon_bnds" + if self.size > 1: + lon.set_collective(True) + lon[:] = self.get_full_longitudes()["data"] + + def erase_flags(self): + + first_time_idx = self._get_time_id(self.hours_start, first=True) + last_time_idx = self._get_time_id(self.hours_end, first=False) + t_len = last_time_idx - first_time_idx + + self._qa["data"] = empty((len(self.get_full_longitudes()["data"]), t_len, 0)) + self._flag["data"] = empty((len(self.get_full_longitudes()["data"]), t_len, 0)) + + return None + + # noinspection DuplicatedCode + def _get_coordinate_values(self, coordinate_info, coordinate_axis, bounds=False): + """ + Get the coordinate data of the current portion. + + Parameters + ---------- + coordinate_info : dict, list + Dictionary with the "data" key with the coordinate variable values. and the attributes as other keys. + coordinate_axis : str + Name of the coordinate to extract. Accepted values: ["X"]. + bounds : bool + Boolean variable to know if there are coordinate bounds. + Returns + ------- + values : dict + Dictionary with the portion of data corresponding to the rank. + """ + + if coordinate_info is None: + return None + + if not isinstance(coordinate_info, dict): + values = {"data": deepcopy(coordinate_info)} + else: + values = deepcopy(coordinate_info) + + coordinate_len = len(values["data"].shape) + if bounds: + coordinate_len -= 1 + + if coordinate_axis == "X": + if coordinate_len == 1: + values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + elif coordinate_len == 2: + values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], + self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"]] + elif coordinate_len == 3: + values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], + self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], :] + else: + raise NotImplementedError("The coordinate has wrong dimensions: {dim}".format( + dim=values["data"].shape)) + + return values + + # noinspection DuplicatedCode + def _read_variable(self, var_name): + """ + Read the corresponding variable data according to the current rank. + + Parameters + ---------- + var_name : str + Name of the variable to read. + + Returns + ------- + data: array + Portion of the variable data corresponding to the rank. + """ + + nc_var = self.dataset.variables[var_name] + var_dims = nc_var.dimensions + + # Read data in 1 or 2 dimensions + if len(var_dims) < 2: + data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + elif len(var_dims) == 2: + data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], + self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"]] + elif len(var_dims) == 3: + data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], + self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], + :] + else: + raise NotImplementedError("Error with {0}. Only can be read netCDF with 3 dimensions or less".format( + var_name)) + + # Unmask array + data = self._unmask_array(data) + + return data + + # noinspection DuplicatedCode + def _create_variables(self, netcdf, chunking=False): + """ + Create the netCDF file variables. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python open Dataset. + chunking : bool + Indicates if you want to chunk the output netCDF. + """ + + if self.variables is not None: + for i, (var_name, var_dict) in enumerate(self.variables.items()): + # Get data type + if "dtype" in var_dict.keys(): + var_dtype = var_dict["dtype"] + if (var_dict["data"] is not None) and (var_dtype != var_dict["data"].dtype): + msg = "WARNING!!! " + msg += "Different data types for variable {0}. ".format(var_name) + msg += "Input dtype={0}. Data dtype={1}.".format(var_dtype, var_dict["data"].dtype) + warn(msg) + sys.stderr.flush() + try: + var_dict["data"] = var_dict["data"].astype(var_dtype) + except Exception: + raise TypeError("It was not possible to cast the data to the input dtype.") + else: + var_dtype = var_dict["data"].dtype + if var_dtype is object: + raise TypeError("Data dtype is object. Define dtype explicitly as dictionary key 'dtype'") + + # Get dimensions when reading datasets + if "dimensions" in var_dict.keys(): + var_dims = var_dict["dimensions"] + # Get dimensions when creating new datasets + else: + if len(var_dict["data"].shape) == 1: + # For data that depends only on station (e.g. station_code) + var_dims = self._var_dim + else: + # For data that is dependent on time and station (e.g. PM10) + var_dims = self._var_dim + ("time",) + + if var_dict["data"] is not None: + + # Ensure data is of type numpy array (to create NES) + if not isinstance(var_dict["data"], (ndarray, generic)): + try: + var_dict["data"] = array(var_dict["data"]) + except AttributeError: + raise AttributeError("Data for variable {0} must be a numpy array.".format(var_name)) + + # Convert list of strings to chars for parallelization + if issubdtype(var_dtype, character): + var_dict["data_aux"] = self._str2char(var_dict["data"]) + var_dims += ("strlen",) + var_dtype = "S1" + + if self.info: + print("Rank {0:03d}: Writing {1} var ({2}/{3})".format(self.rank, var_name, i + 1, + len(self.variables))) + + if not chunking: + var = netcdf.createVariable(var_name, var_dtype, var_dims, + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + else: + if self.master: + chunk_size = var_dict["data"].shape + else: + chunk_size = None + chunk_size = self.comm.bcast(chunk_size, root=0) + var = netcdf.createVariable(var_name, var_dtype, var_dims, zlib=self.zip_lvl > 0, + complevel=self.zip_lvl, chunksizes=chunk_size) + + if self.info: + print("Rank {0:03d}: Var {1} created ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + if self.size > 1: + var.set_collective(True) + if self.info: + print("Rank {0:03d}: Var {1} collective ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + + for att_name, att_value in var_dict.items(): + if att_name == "data": + if self.info: + print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) + if "data_aux" in var_dict.keys(): + att_value = var_dict["data_aux"] + if len(att_value.shape) == 1: + try: + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value + except IndexError: + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape, + att_value.shape)) + except ValueError: + raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape, + att_value.shape)) + elif len(att_value.shape) == 2: + if "strlen" in var_dims: + try: + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :] = att_value + except IndexError: + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :].shape, + att_value.shape)) + except ValueError: + raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :].shape, + att_value.shape)) + else: + try: + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"]] = att_value + except IndexError: + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"]].shape, + att_value.shape)) + except ValueError: + out_shape = var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"]].shape + raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( + out_shape, att_value.shape)) + elif len(att_value.shape) == 3: + try: + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + :] = att_value + except IndexError: + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + :].shape, + att_value.shape)) + except ValueError: + raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + :].shape, + att_value.shape)) + + if self.info: + print("Rank {0:03d}: Var {1} data ({2}/{3})".format(self.rank, var_name, i + 1, + len(self.variables))) + + elif att_name not in ["chunk_size", "var_dims", "dimensions", "dtype", "data_aux"]: + var.setncattr(att_name, att_value) + + if "data_aux" in var_dict.keys(): + del var_dict["data_aux"] + + self._set_var_crs(var) + if self.info: + print("Rank {0:03d}: Var {1} completed ({2}/{3})".format(self.rank, var_name, i + 1, + len(self.variables))) + + return None + + # noinspection DuplicatedCode + def _gather_data(self, data_to_gather): + """ + Gather all the variable data into the MPI rank 0 to perform a serial write. + + Returns + ------- + data_to_gather: dict + Variables to gather. + """ + + data_list = deepcopy(data_to_gather) + for var_name, var_info in data_list.items(): + try: + # noinspection PyArgumentList + data_aux = self.comm.gather(data_list[var_name]["data"], root=0) + if self.rank == 0: + shp_len = len(data_list[var_name]["data"].shape) + # concatenate over station + if self.parallel_method == "X": + if shp_len == 1: + # dimensions = (station) + axis = 0 + elif shp_len == 2: + # dimensions = (station, strlen) or + # dimensions = (station, time) + axis = 0 + else: + msg = "The points NetCDF must have " + msg += "surface values (without levels)." + raise NotImplementedError(msg) + elif self.parallel_method == "T": + # concatenate over time + if shp_len == 1: + # dimensions = (station) + axis = None + elif shp_len == 2: + if "strlen" in var_info["dimensions"]: + # dimensions = (station, strlen) + axis = None + else: + # dimensions = (station, time) + axis = 1 + else: + msg = "The points NetCDF must have " + msg += "surface values (without levels)." + raise NotImplementedError(msg) + else: + raise NotImplementedError( + "Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( + meth=self.parallel_method, accept=["X", "T"])) + data_list[var_name]["data"] = concatenate(data_aux, axis=axis) + except Exception as e: + msg = f"**ERROR** an error has occurred while gathering the '{var_name}' variable.\n" + print(msg) + sys.stderr.write(msg) + print(e) + sys.stderr.write(str(e)) + sys.stderr.flush() + self.comm.Abort(1) + + return data_list + + def _create_metadata(self, netcdf): + """ + Create metadata variables. + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + # N FLAG CODES + flag = netcdf.createVariable("flag", int64, ("station", "time", "N_flag_codes",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + flag.units = "" + flag.axis = "" + flag.long_name = "" + flag.standard_name = "flag" + if self.size > 1: + flag.set_collective(True) + flag[:] = self._flag["data"] + + # N QA CODES + qa = netcdf.createVariable("qa", int64, ("station", "time", "N_qa_codes",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + qa.units = "" + qa.axis = "" + qa.long_name = "" + qa.standard_name = "N_qa_codes" + if self.size > 1: + qa.set_collective(True) + qa[:] = self._qa["data"] + + return None + + def to_netcdf(self, path, compression_level=0, serial=False, info=False, chunking=False, nc_type="NES", + keep_open=False): + """ + Write the netCDF output file. + + Parameters + ---------- + keep_open : bool + nc_type : str + path : str + Path to the output netCDF file. + compression_level : int + Level of compression (0 to 9) Default: 0 (no compression). + serial : bool + Indicates if you want to write in serial or not. Default: False. + info : bool + Indicates if you want to print the information of each writing step by stdout Default: False. + chunking : bool + Indicates if you want a chunked netCDF output. Only available with non-serial writes. Default: False. + """ + + if (not serial) and (self.size > 1): + msg = "WARNING!!! " + msg += "GHOST datasets cannot be written in parallel yet. " + msg += "Changing to serial mode." + warn(msg) + sys.stderr.flush() + + super(PointsNesGHOST, self).to_netcdf(path, compression_level=compression_level, + serial=True, info=info, chunking=chunking) + + return None + + def to_points(self): + """ + Transform a PointsNesGHOST into a PointsNes object + + Returns + ---------- + points_nes : nes.Nes + Points Nes Object (without GHOST metadata variables) + """ + + points_nes = PointsNes(comm=self.comm, + info=self.info, + balanced=self.balanced, + parallel_method=self.parallel_method, + avoid_first_hours=self.hours_start, + avoid_last_hours=self.hours_end, + first_level=self.first_level, + last_level=self.last_level, + create_nes=True, + lat=self.lat["data"], + lon=self.lon["data"], + times=self.time + ) + + # The version attribute in GHOST files prior to 1.3.3 is called data_version, after it is version + if "version" in self.global_attrs: + ghost_version = self.global_attrs["version"] + elif "data_version" in self.global_attrs: + ghost_version = self.global_attrs["data_version"] + else: + ghost_version = "0.0.0" + metadata_variables = self.get_standard_metadata(ghost_version) + self.free_vars(metadata_variables) + self.free_vars("station") + points_nes.variables = deepcopy(self.variables) + + return points_nes + + @staticmethod + def get_standard_metadata(ghost_version): + """ + Get all possible GHOST variables for each version. + + Parameters + ---------- + ghost_version : str + Version of GHOST file. + + Returns + ---------- + metadata_variables[GHOST_version] : list + A List of metadata variables for a certain GHOST version + """ + + # This metadata variables are + metadata_variables = {"1.4": ["GHOST_version", "station_reference", "station_timezone", "latitude", "longitude", + "altitude", "sampling_height", "measurement_altitude", "ellipsoid", + "horizontal_datum", "vertical_datum", "projection", "distance_to_building", + "distance_to_kerb", "distance_to_junction", "distance_to_source", "street_width", + "street_type", "daytime_traffic_speed", "daily_passing_vehicles", "data_level", + "climatology", "station_name", "city", "country", + "administrative_country_division_1", "administrative_country_division_2", + "population", "representative_radius", "network", "associated_networks", + "area_classification", "station_classification", "main_emission_source", + "land_use", "terrain", "measurement_scale", + "ESDAC_Iwahashi_landform_classification", + "ESDAC_modal_Iwahashi_landform_classification_5km", + "ESDAC_modal_Iwahashi_landform_classification_25km", + "ESDAC_Meybeck_landform_classification", + "ESDAC_modal_Meybeck_landform_classification_5km", + "ESDAC_modal_Meybeck_landform_classification_25km", + "GHSL_settlement_model_classification", + "GHSL_modal_settlement_model_classification_5km", + "GHSL_modal_settlement_model_classification_25km", + "Joly-Peuch_classification_code", "Koppen-Geiger_classification", + "Koppen-Geiger_modal_classification_5km", + "Koppen-Geiger_modal_classification_25km", + "MODIS_MCD12C1_v6_IGBP_land_use", "MODIS_MCD12C1_v6_modal_IGBP_land_use_5km", + "MODIS_MCD12C1_v6_modal_IGBP_land_use_25km", "MODIS_MCD12C1_v6_UMD_land_use", + "MODIS_MCD12C1_v6_modal_UMD_land_use_5km", + "MODIS_MCD12C1_v6_modal_UMD_land_use_25km", "MODIS_MCD12C1_v6_LAI", + "MODIS_MCD12C1_v6_modal_LAI_5km", "MODIS_MCD12C1_v6_modal_LAI_25km", + "WMO_region", "WWF_TEOW_terrestrial_ecoregion", "WWF_TEOW_biogeographical_realm", + "WWF_TEOW_biome", "UMBC_anthrome_classification", + "UMBC_modal_anthrome_classification_5km", + "UMBC_modal_anthrome_classification_25km", + "EDGAR_v4.3.2_annual_average_BC_emissions", + "EDGAR_v4.3.2_annual_average_CO_emissions", + "EDGAR_v4.3.2_annual_average_NH3_emissions", + "EDGAR_v4.3.2_annual_average_NMVOC_emissions", + "EDGAR_v4.3.2_annual_average_NOx_emissions", + "EDGAR_v4.3.2_annual_average_OC_emissions", + "EDGAR_v4.3.2_annual_average_PM10_emissions", + "EDGAR_v4.3.2_annual_average_biogenic_PM2.5_emissions", + "EDGAR_v4.3.2_annual_average_fossilfuel_PM2.5_emissions", + "EDGAR_v4.3.2_annual_average_SO2_emissions", "ASTER_v3_altitude", + "ETOPO1_altitude", "ETOPO1_max_altitude_difference_5km", + "GHSL_built_up_area_density", "GHSL_average_built_up_area_density_5km", + "GHSL_average_built_up_area_density_25km", "GHSL_max_built_up_area_density_5km", + "GHSL_max_built_up_area_density_25km", "GHSL_population_density", + "GHSL_average_population_density_5km", "GHSL_average_population_density_25km", + "GHSL_max_population_density_5km", "GHSL_max_population_density_25km", + "GPW_population_density", "GPW_average_population_density_5km", + "GPW_average_population_density_25km", "GPW_max_population_density_5km", + "GPW_max_population_density_25km", + "NOAA-DMSP-OLS_v4_nighttime_stable_lights", + "NOAA-DMSP-OLS_v4_average_nighttime_stable_lights_5km", + "NOAA-DMSP-OLS_v4_average_nighttime_stable_lights_25km", + "NOAA-DMSP-OLS_v4_max_nighttime_stable_lights_5km", + "NOAA-DMSP-OLS_v4_max_nighttime_stable_lights_25km", + "OMI_level3_column_annual_average_NO2", + "OMI_level3_column_cloud_screened_annual_average_NO2", + "OMI_level3_tropospheric_column_annual_average_NO2", + "OMI_level3_tropospheric_column_cloud_screened_annual_average_NO2", + "GSFC_coastline_proximity", "primary_sampling_type", + "primary_sampling_instrument_name", + "primary_sampling_instrument_documented_flow_rate", + "primary_sampling_instrument_reported_flow_rate", + "primary_sampling_process_details", "primary_sampling_instrument_manual_name", + "primary_sampling_further_details", "sample_preparation_types", + "sample_preparation_techniques", "sample_preparation_process_details", + "sample_preparation_further_details", "measurement_methodology", + "measuring_instrument_name", "measuring_instrument_sampling_type", + "measuring_instrument_documented_flow_rate", + "measuring_instrument_reported_flow_rate", "measuring_instrument_process_details", + "measuring_instrument_process_details", "measuring_instrument_manual_name", + "measuring_instrument_further_details", "measuring_instrument_reported_units", + "measuring_instrument_reported_lower_limit_of_detection", + "measuring_instrument_documented_lower_limit_of_detection", + "measuring_instrument_reported_upper_limit_of_detection", + "measuring_instrument_documented_upper_limit_of_detection", + "measuring_instrument_reported_uncertainty", + "measuring_instrument_documented_uncertainty", + "measuring_instrument_reported_accuracy", + "measuring_instrument_documented_accuracy", + "measuring_instrument_reported_precision", + "measuring_instrument_documented_precision", + "measuring_instrument_reported_zero_drift", + "measuring_instrument_documented_zero_drift", + "measuring_instrument_reported_span_drift", + "measuring_instrument_documented_span_drift", + "measuring_instrument_reported_zonal_drift", + "measuring_instrument_documented_zonal_drift", + "measuring_instrument_reported_measurement_resolution", + "measuring_instrument_documented_measurement_resolution", + "measuring_instrument_reported_absorption_cross_section", + "measuring_instrument_documented_absorption_cross_section", + "measuring_instrument_inlet_information", + "measuring_instrument_calibration_scale", + "network_provided_volume_standard_temperature", + "network_provided_volume_standard_pressure", "retrieval_algorithm", + "principal_investigator_name", "principal_investigator_institution", + "principal_investigator_email_address", "contact_name", + "contact_institution", "contact_email_address", "meta_update_stamp", + "data_download_stamp", "data_revision_stamp", "network_sampling_details", + "network_uncertainty_details", "network_maintenance_details", + "network_qa_details", "network_miscellaneous_details", "data_licence", + "process_warnings", "temporal_resolution", + "reported_lower_limit_of_detection_per_measurement", + "reported_upper_limit_of_detection_per_measurement", + "reported_uncertainty_per_measurement", "derived_uncertainty_per_measurement", + "day_night_code", "weekday_weekend_code", "season_code", + "hourly_native_representativity_percent", "hourly_native_max_gap_percent", + "daily_native_representativity_percent", "daily_representativity_percent", + "daily_native_max_gap_percent", "daily_max_gap_percent", + "monthly_native_representativity_percent", "monthly_representativity_percent", + "monthly_native_max_gap_percent", "monthly_max_gap_percent", + "annual_native_representativity_percent", "annual_native_max_gap_percent", + "all_representativity_percent", "all_max_gap_percent"], + } + + return metadata_variables[ghost_version] + + # noinspection DuplicatedCode + def add_variables_to_shapefile(self, var_list, idx_lev=0, idx_time=0): + """ + Add variables data to shapefile. + + var_list : list, str + List (or single string) of the variables to be loaded and saved in the shapefile. + idx_lev : int + Index of vertical level for which the data will be saved in the shapefile. + idx_time : int + Index of time for which the data will be saved in the shapefile. + """ + + if idx_lev != 0: + msg = "Error: Points dataset has no level (Level: {0}).".format(idx_lev) + raise ValueError(msg) + + for var_name in var_list: + # station as dimension + if len(self.variables[var_name]["dimensions"]) == 1: + self.shapefile[var_name] = self.variables[var_name]["data"][:].ravel() + # station and time as dimensions + else: + self.shapefile[var_name] = self.variables[var_name]["data"][:, idx_time].ravel() + + return None + + @staticmethod + def _get_axis_index_(axis): + if axis == "T": + value = 1 + elif axis == "X": + value = 0 + else: + raise ValueError("Unknown axis: {0}".format(axis)) + return value + + @staticmethod + def _set_var_crs(var): + """ + Set the grid_mapping + + Parameters + ---------- + var : Variable + netCDF4-python variable object. + """ + return None diff --git a/build/lib/nes/nc_projections/points_nes_providentia.py b/build/lib/nes/nc_projections/points_nes_providentia.py new file mode 100644 index 0000000000000000000000000000000000000000..ad3fc56282e7a6f406d4ed857d59e870fd91fb05 --- /dev/null +++ b/build/lib/nes/nc_projections/points_nes_providentia.py @@ -0,0 +1,650 @@ +#!/usr/bin/env python + +import sys +from warnings import warn +from copy import deepcopy +from numpy import ndarray, generic, array, issubdtype, character, concatenate +from .points_nes import PointsNes + + +class PointsNesProvidentia(PointsNes): + """ + + Attributes + ---------- + _model_centre_lon : dict + Model centre longitudes dictionary with the complete "data" key for all the values and the rest of the + attributes. + _model_centre_lat : dict + Model centre latitudes dictionary with the complete "data" key for all the values and the rest of the + attributes. + _grid_edge_lon : dict + Grid edge longitudes dictionary with the complete "data" key for all the values and the rest of the + attributes. + _grid_edge_lat : dict + Grid edge latitudes dictionary with the complete "data" key for all the values and the rest of the + attributes. + model_centre_lon : dict + Model centre longitudes dictionary with the portion of "data" corresponding to the rank values. + model_centre_lat : dict + Model centre latitudes dictionary with the portion of "data" corresponding to the rank values. + grid_edge_lon : dict + Grid edge longitudes dictionary with the portion of "data" corresponding to the rank values. + grid_edge_lat : dict + Grid edge latitudes dictionary with the portion of "data" corresponding to the rank values. + """ + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="X", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, model_centre_lon=None, model_centre_lat=None, grid_edge_lon=None, + grid_edge_lat=None, + **kwargs): + """ + Initialize the PointsNesProvidentia class + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "X". + Accepted values: ["X"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + model_centre_lon : dict + Model centre longitudes dictionary with the portion of "data" corresponding to the rank values. + model_centre_lat : dict + Model centre latitudes dictionary with the portion of "data" corresponding to the rank values. + grid_edge_lon : dict + Grid edge longitudes dictionary with the portion of "data" corresponding to the rank values. + grid_edge_lat : dict + Grid edge latitudes dictionary with the portion of "data" corresponding to the rank values. + """ + + super(PointsNesProvidentia, self).__init__(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, + avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, + create_nes=create_nes, times=times, balanced=balanced, **kwargs) + + if create_nes: + # Complete dimensions + self._model_centre_lon = model_centre_lon + self._model_centre_lat = model_centre_lat + self._grid_edge_lon = grid_edge_lon + self._grid_edge_lat = grid_edge_lat + else: + # Complete dimensions + self._model_centre_lon = self._get_coordinate_dimension(["model_centre_longitude"]) + self._model_centre_lat = self._get_coordinate_dimension(["model_centre_latitude"]) + self._grid_edge_lon = self._get_coordinate_dimension(["grid_edge_longitude"]) + self._grid_edge_lat = self._get_coordinate_dimension(["grid_edge_latitude"]) + + # Dimensions screening + self.model_centre_lon = self._get_coordinate_values(self._model_centre_lon, "") + self.model_centre_lat = self._get_coordinate_values(self._model_centre_lat, "") + self.grid_edge_lon = self._get_coordinate_values(self._grid_edge_lon, "") + self.grid_edge_lat = self._get_coordinate_values(self._grid_edge_lat, "") + + @staticmethod + def new(comm=None, path=None, info=False, dataset=None, parallel_method="X", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, + create_nes=False, balanced=False, times=None, + model_centre_lon=None, model_centre_lat=None, grid_edge_lon=None, grid_edge_lat=None, + **kwargs): + """ + Initialize the PointsNesProvidentia class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "X". + Accepted values: ["X"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use + last_level : int, None + Index of the last level to use. None if it is the last. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + model_centre_lon : dict + Model centre longitudes dictionary with the portion of "data" corresponding to the rank values. + model_centre_lat : dict + Model centre latitudes dictionary with the portion of "data" corresponding to the rank values. + grid_edge_lon : dict + Grid edge longitudes dictionary with the portion of "data" corresponding to the rank values. + grid_edge_lat : dict + Grid edge latitudes dictionary with the portion of "data" corresponding to the rank values. + """ + + new = PointsNesProvidentia(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, + avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, + create_nes=create_nes, balanced=balanced, times=times, + model_centre_lon=model_centre_lon, model_centre_lat=model_centre_lat, + grid_edge_lon=grid_edge_lon, grid_edge_lat=grid_edge_lat, **kwargs) + + return new + + def _create_dimensions(self, netcdf): + """ + Create "grid_edge", "model_latitude" and "model_longitude" dimensions and the super dimensions + "time", "time_nv", "station", and "strlen". + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + super(PointsNesProvidentia, self)._create_dimensions(netcdf) + + # Create grid_edge, model_latitude and model_longitude dimensions + netcdf.createDimension("grid_edge", len(self._grid_edge_lon["data"])) + netcdf.createDimension("model_latitude", self._model_centre_lon["data"].shape[0]) + netcdf.createDimension("model_longitude", self._model_centre_lon["data"].shape[1]) + + return None + + def _create_dimension_variables(self, netcdf): + """ + Create the "model_centre_lon", model_centre_lat", "grid_edge_lon" and "grid_edge_lat" variables. + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + super(PointsNesProvidentia, self)._create_dimension_variables(netcdf) + + # MODEL CENTRE LONGITUDES + model_centre_lon = netcdf.createVariable("model_centre_longitude", "f8", + ("model_latitude", "model_longitude",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + model_centre_lon.units = "degrees_east" + model_centre_lon.axis = "X" + model_centre_lon.long_name = "model centre longitude" + model_centre_lon.standard_name = "model centre longitude" + if self.size > 1: + model_centre_lon.set_collective(True) + msg = "2D meshed grid centre longitudes with " + msg += "{} longitudes in {} bands of latitude".format(self._model_centre_lon["data"].shape[1], + self._model_centre_lat["data"].shape[0]) + model_centre_lon.description = msg + model_centre_lon[:] = self._model_centre_lon["data"] + + # MODEL CENTRE LATITUDES + model_centre_lat = netcdf.createVariable("model_centre_latitude", "f8", + ("model_latitude", "model_longitude",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + model_centre_lat.units = "degrees_north" + model_centre_lat.axis = "Y" + model_centre_lat.long_name = "model centre latitude" + model_centre_lat.standard_name = "model centre latitude" + if self.size > 1: + model_centre_lat.set_collective(True) + msg = "2D meshed grid centre longitudes with " + msg += "{} longitudes in {} bands of latitude".format(self._model_centre_lon["data"].shape[1], + self._model_centre_lat["data"].shape[0]) + model_centre_lat[:] = self._model_centre_lat["data"] + + # GRID EDGE DOMAIN LONGITUDES + grid_edge_lon = netcdf.createVariable("grid_edge_longitude", "f8", "grid_edge") + grid_edge_lon.units = "degrees_east" + grid_edge_lon.axis = "X" + grid_edge_lon.long_name = "grid edge longitude" + grid_edge_lon.standard_name = "grid edge longitude" + if self.size > 1: + grid_edge_lon.set_collective(True) + msg = "Longitude coordinate along edge of grid domain " + msg += "(going clockwise around grid boundary from bottom-left corner)." + grid_edge_lon.description = msg + grid_edge_lon[:] = self._grid_edge_lon["data"] + + # GRID EDGE DOMAIN LATITUDES + grid_edge_lat = netcdf.createVariable("grid_edge_latitude", "f8", "grid_edge") + grid_edge_lat.units = "degrees_north" + grid_edge_lat.axis = "Y" + grid_edge_lat.long_name = "grid edge latitude" + grid_edge_lat.standard_name = "grid edge latitude" + if self.size > 1: + grid_edge_lat.set_collective(True) + msg = "Latitude coordinate along edge of grid domain " + msg += "(going clockwise around grid boundary from bottom-left corner)." + grid_edge_lat.description = msg + grid_edge_lat[:] = self._grid_edge_lat["data"] + + self.free_vars(["model_centre_longitude", "model_centre_latitude", "grid_edge_longitude", "grid_edge_latitude"]) + + # noinspection DuplicatedCode + def _get_coordinate_values(self, coordinate_info, coordinate_axis, bounds=False): + """ + Get the coordinate data of the current portion. + + Parameters + ---------- + coordinate_info : dict, list + Dictionary with the "data" key with the coordinate variable values. and the attributes as other keys. + coordinate_axis : str + Name of the coordinate to extract. Accepted values: ["X"]. + bounds : bool + Boolean variable to know if there are coordinate bounds. + Returns + ------- + values : dict + Dictionary with the portion of data corresponding to the rank. + """ + + if coordinate_info is None: + return None + + if not isinstance(coordinate_info, dict): + values = {"data": deepcopy(coordinate_info)} + else: + values = deepcopy(coordinate_info) + + coordinate_len = len(values["data"].shape) + if bounds: + coordinate_len -= 1 + + if coordinate_axis == "X": + if coordinate_len == 1: + values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + elif coordinate_len == 2: + values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], + self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"]] + elif coordinate_len == 3: + values["data"] = values["data"][self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], + self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], :] + else: + raise NotImplementedError("The coordinate has wrong dimensions: {dim}".format( + dim=values["data"].shape)) + elif coordinate_axis == "": + # pass for "model_centre_lon", "model_centre_lat", "grid_edge_lon" and "grid_edge_lat" + pass + + return values + + # noinspection DuplicatedCode + def _read_variable(self, var_name): + """ + Read the corresponding variable data according to the current rank. + + Parameters + ---------- + var_name : str + Name of the variable to read. + + Returns + ------- + data: array + Portion of the variable data corresponding to the rank. + """ + nc_var = self.dataset.variables[var_name] + var_dims = nc_var.dimensions + + # Read data in 1, 2 or 3 dimensions + if len(var_dims) < 2: + data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"]] + elif len(var_dims) == 2: + data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], + self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"]] + elif len(var_dims) == 3: + data = nc_var[self.read_axis_limits["x_min"]:self.read_axis_limits["x_max"], + self.read_axis_limits["t_min"]:self.read_axis_limits["t_max"], + :] + else: + raise NotImplementedError("Error with {0}. Only can be read netCDF with 3 dimensions or less".format( + var_name)) + + # Unmask array + data = self._unmask_array(data) + + return data + + # noinspection DuplicatedCode + def _create_variables(self, netcdf, chunking=False): + """ + Create the netCDF file variables. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python open Dataset. + chunking : bool + Indicates if you want to chunk the output netCDF. + """ + + if self.variables is not None: + for i, (var_name, var_dict) in enumerate(self.variables.items()): + # Get data type + if "dtype" in var_dict.keys(): + var_dtype = var_dict["dtype"] + if (var_dict["data"] is not None) and (var_dtype != var_dict["data"].dtype): + msg = "WARNING!!! " + msg += "Different data types for variable {0}. ".format(var_name) + msg += "Input dtype={0}. Data dtype={1}.".format(var_dtype, + var_dict["data"].dtype) + warn(msg) + sys.stderr.flush() + try: + var_dict["data"] = var_dict["data"].astype(var_dtype) + except Exception: # TODO: Detect exception + raise TypeError("It was not possible to cast the data to the input dtype.") + else: + var_dtype = var_dict["data"].dtype + if var_dtype is object: + raise TypeError("Data dtype is object. Define dtype explicitly as dictionary key 'dtype'") + + # Get dimensions when reading datasets + if "dimensions" in var_dict.keys(): + var_dims = var_dict["dimensions"] + # Get dimensions when creating new datasets + else: + if len(var_dict["data"].shape) == 1: + # For data that depends only on station (e.g. station_code) + var_dims = self._var_dim + else: + # For data that is dependent on time and station (e.g. PM10) + var_dims = self._var_dim + ("time",) + + if var_dict["data"] is not None: + + # Ensure data is of type numpy array (to create NES) + if not isinstance(var_dict["data"], (ndarray, generic)): + try: + var_dict["data"] = array(var_dict["data"]) + except AttributeError: + raise AttributeError("Data for variable {0} must be a numpy array.".format(var_name)) + + # Convert list of strings to chars for parallelization + if issubdtype(var_dtype, character): + var_dict["data_aux"] = self._str2char(var_dict["data"]) + var_dims += ("strlen",) + var_dtype = "S1" + + if self.info: + print("Rank {0:03d}: Writing {1} var ({2}/{3})".format(self.rank, var_name, i + 1, + len(self.variables))) + + if not chunking: + var = netcdf.createVariable(var_name, var_dtype, var_dims, + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + else: + if self.master: + chunk_size = var_dict["data"].shape + else: + chunk_size = None + chunk_size = self.comm.bcast(chunk_size, root=0) + var = netcdf.createVariable(var_name, var_dtype, var_dims, zlib=self.zip_lvl > 0, + complevel=self.zip_lvl, chunksizes=chunk_size) + + if self.info: + print("Rank {0:03d}: Var {1} created ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + if self.size > 1: + var.set_collective(True) + if self.info: + print("Rank {0:03d}: Var {1} collective ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + + for att_name, att_value in var_dict.items(): + if att_name == "data": + if self.info: + print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) + if "data_aux" in var_dict.keys(): + att_value = var_dict["data_aux"] + if len(att_value.shape) == 1: + try: + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = att_value + except IndexError: + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape, + att_value.shape)) + except ValueError: + raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]].shape, + att_value.shape)) + elif len(att_value.shape) == 2: + if "strlen" in var_dims: + try: + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :] = att_value + except IndexError: + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :].shape, + att_value.shape)) + except ValueError: + raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], :].shape, + att_value.shape)) + else: + try: + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"]] = att_value + except IndexError: + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"]].shape, + att_value.shape)) + except ValueError: + raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"]].shape, + att_value.shape)) + elif len(att_value.shape) == 3: + try: + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + :] = att_value + except IndexError: + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + :].shape, + att_value.shape)) + except ValueError: + raise ValueError("Axis limits cannot be accessed. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + :].shape, + att_value.shape)) + + if self.info: + print("Rank {0:03d}: Var {1} data ({2}/{3})".format(self.rank, var_name, i + 1, + len(self.variables))) + elif att_name not in ["chunk_size", "var_dims", "dimensions", "dtype", "data_aux"]: + var.setncattr(att_name, att_value) + + if "data_aux" in var_dict.keys(): + del var_dict["data_aux"] + + self._set_var_crs(var) + if self.info: + print("Rank {0:03d}: Var {1} completed ({2}/{3})".format(self.rank, var_name, i + 1, + len(self.variables))) + + return None + + # noinspection DuplicatedCode + def _gather_data(self, data_to_gather): + """ + Gather all the variable data into the MPI rank 0 to perform a serial write. + + Returns + ------- + data_to_gather: dict + Variables to gather. + """ + + data_list = deepcopy(data_to_gather) + for var_name, var_info in data_list.items(): + try: + # noinspection PyArgumentList + data_aux = self.comm.gather(data_list[var_name]["data"], root=0) + if self.rank == 0: + shp_len = len(data_list[var_name]["data"].shape) + # concatenate over station + if self.parallel_method == "X": + if shp_len == 1: + # dimensions = (station) + axis = 0 + elif shp_len == 2: + # dimensions = (station, strlen) or + # dimensions = (station, time) + axis = 0 + else: + msg = "The points NetCDF must have " + msg += "surface values (without levels)." + raise NotImplementedError(msg) + elif self.parallel_method == "T": + # concatenate over time + if shp_len == 1: + # dimensions = (station) + axis = None + elif shp_len == 2: + if "strlen" in var_info["dimensions"]: + # dimensions = (station, strlen) + axis = None + else: + # dimensions = (station, time) + axis = 1 + else: + msg = "The points NetCDF must have " + msg += "surface values (without levels)." + raise NotImplementedError(msg) + else: + raise NotImplementedError( + "Parallel method '{meth}' is not implemented. Use one of these: {accept}".format( + meth=self.parallel_method, accept=["X", "T"])) + data_list[var_name]["data"] = concatenate(data_aux, axis=axis) + except Exception as e: + msg = f"**ERROR** an error has occurred while gathering the '{var_name}' variable.\n" + print(msg) + sys.stderr.write(msg) + print(e) + sys.stderr.write(str(e)) + # print(e, file=sys.stderr) + sys.stderr.flush() + self.comm.Abort(1) + + return data_list + + def to_netcdf(self, path, compression_level=0, serial=False, info=False, chunking=False, nc_type="NES", + keep_open=False): + """ + Write the netCDF output file. + + Parameters + ---------- + path : str + Path to the output netCDF file. + compression_level : int + Level of compression (0 to 9) Default: 0 (no compression). + serial : bool + Indicates if you want to write in serial or not. Default: False. + info : bool + Indicates if you want to print the information of each writing step by stdout Default: False. + chunking : bool + Indicates if you want a chunked netCDF output. Only available with non-serial writes. Default: False. + nc_type : str + Type to NetCDf to write. "CAMS_RA" or "NES" + keep_open : bool + Indicates if you want to keep open the NetCDH to fill the data by time-step + """ + + if (not serial) and (self.size > 1): + msg = "WARNING!!! " + msg += "Providentia datasets cannot be written in parallel yet. " + msg += "Changing to serial mode." + warn(msg) + sys.stderr.flush() + + super(PointsNesProvidentia, self).to_netcdf(path, compression_level=compression_level, + serial=True, info=info, chunking=chunking) + + return None + + # noinspection DuplicatedCode + def add_variables_to_shapefile(self, var_list, idx_lev=0, idx_time=0): + """ + Add variables data to shapefile. + + var_list : list, str + List (or single string) of the variables to be loaded and saved in the shapefile. + idx_lev : int + Index of vertical level for which the data will be saved in the shapefile. + idx_time : int + Index of time for which the data will be saved in the shapefile. + """ + + if idx_lev != 0: + msg = "Error: Points dataset has no level (Level: {0}).".format(idx_lev) + raise ValueError(msg) + + for var_name in var_list: + # station as dimension + if len(self.variables[var_name]["dimensions"]) == 1: + self.shapefile[var_name] = self.variables[var_name]["data"][:].ravel() + # station and time as dimensions + else: + self.shapefile[var_name] = self.variables[var_name]["data"][:, idx_time].ravel() + + return None + + @staticmethod + def _get_axis_index_(axis): + if axis == "T": + value = 1 + elif axis == "X": + value = 0 + else: + raise ValueError("Unknown axis: {0}".format(axis)) + return value + + @staticmethod + def _set_var_crs(var): + """ + Set the grid_mapping + + Parameters + ---------- + var : Variable + netCDF4-python variable object. + """ + return None diff --git a/build/lib/nes/nc_projections/rotated_nes.py b/build/lib/nes/nc_projections/rotated_nes.py new file mode 100644 index 0000000000000000000000000000000000000000..c5c3794e82503721d16a0c32a57864bcc23faa67 --- /dev/null +++ b/build/lib/nes/nc_projections/rotated_nes.py @@ -0,0 +1,694 @@ +#!/usr/bin/env python + +from numpy import (float64, linspace, cos, sin, arcsin, arctan2, array, mean, diff, append, flip, repeat, concatenate, + vstack) +from math import pi +from geopandas import GeoDataFrame +from pandas import Index +from pyproj import Proj +from copy import deepcopy +from typing import Dict, Any +from shapely.geometry import Polygon, Point +from .default_nes import Nes + + +class RotatedNes(Nes): + """ + + Attributes + ---------- + _full_rlat : dict + Rotated latitudes dictionary with the complete "data" key for all the values and the rest of the attributes. + _full_rlon : dict + Rotated longitudes dictionary with the complete "data" key for all the values and the rest of the attributes. + rlat : dict + Rotated latitudes dictionary with the portion of "data" corresponding to the rank values. + rlon : dict + Rotated longitudes dictionary with the portion of "data" corresponding to the rank values. + _var_dim : tuple + A Tuple with the name of the Y and X dimensions for the variables. + ("rlat", "rlon") for a rotated projection. + _lat_dim : tuple + A Tuple with the name of the dimensions of the Latitude values. + ("rlat", "rlon") for a rotated projection. + _lon_dim : tuple + A Tuple with the name of the dimensions of the Longitude values. + ("rlat", "rlon") for a rotated projection. + """ + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="Y", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the RotatedNes class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "Y". + Accepted values: ["X", "Y", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + """ + self._full_rlat = None + self._full_rlon = None + + super(RotatedNes, self).__init__(comm=comm, path=path, + info=info, dataset=dataset, balanced=balanced, + parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=create_nes, + times=times, **kwargs) + + if create_nes: + # Complete dimensions + # self._full_rlat, self._full_rlon = self._create_rotated_coordinates() + # Dimensions screening + self.lat = self._get_coordinate_values(self.get_full_latitudes(), "Y") + self.lon = self._get_coordinate_values(self.get_full_longitudes(), "X") + else: + # Complete dimensions + self._full_rlat = self._get_coordinate_dimension("rlat") + self._full_rlon = self._get_coordinate_dimension("rlon") + + # Dimensions screening + self.rlat = self._get_coordinate_values(self.get_full_rlat(), "Y") + self.rlon = self._get_coordinate_values(self.get_full_rlon(), "X") + + # Set axis limits for parallel writing + self.write_axis_limits = self._get_write_axis_limits() + + self._var_dim = ("rlat", "rlon") + self._lat_dim = ("rlat", "rlon") + self._lon_dim = ("rlat", "rlon") + + @staticmethod + def new(comm=None, path=None, info=False, dataset=None, parallel_method="Y", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the Nes class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default over Y axis + accepted values: ["X", "Y", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int or None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : List[datetime] or None + List of times to substitute the current ones while creation. + """ + + new = RotatedNes(comm=comm, path=path, info=info, dataset=dataset, + parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, + avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, + create_nes=create_nes, balanced=balanced, times=times, **kwargs) + + return new + + def get_full_rlat(self) -> Dict[str, Any]: + """ + Retrieve the complete rotated latitude information. + + Returns + ------- + Dict[str, Any] + A dictionary containing the complete latitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of latitude values. + attr_name: attr_value, # Latitude attributes. + ... + } + """ + data = self.comm.bcast(self._full_rlat) + + return data + + def get_full_rlon(self) -> Dict[str, Any]: + """ + Retrieve the complete rotated longitude information. + + Returns + ------- + Dict[str, Any] + A dictionary containing the complete longitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of longitude values. + attr_name: attr_value, # Longitude attributes. + ... + } + """ + data = self.comm.bcast(self._full_rlon) + return data + + def set_full_rlat(self, data: Dict[str, Any]) -> None: + """ + Set the complete rotated latitude information. + + Parameters + ---------- + data : Dict[str, Any] + A dictionary containing the complete latitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of latitude values. + attr_name: attr_value, # Latitude attributes. + ... + } + """ + if self.master: + self._full_rlat = data + return None + + def set_full_rlon(self, data: Dict[str, Any]) -> None: + """ + Set the complete rotated longitude information. + + Parameters + ---------- + data : Dict[str, Any] + A dictionary containing the complete longitude data and its attributes. + The dictionary structure is: + { + "data": ndarray, # Array of longitude values. + attr_name: attr_value, # Longitude attributes. + ... + } + """ + if self.master: + self._full_rlon = data + return None + + # noinspection DuplicatedCode + def _filter_coordinates_selection(self): + """ + Use the selection limits to filter rlat, rlon, time, lev, lat, lon, lon_bnds and lat_bnds. + """ + + idx = self._get_idx_intervals() + + full_rlat = self.get_full_rlat() + full_rlon = self.get_full_rlon() + + self.rlat = self._get_coordinate_values(full_rlat, "Y") + self.rlon = self._get_coordinate_values(full_rlon, "X") + + if self.master: + self.set_full_rlat({'data': full_rlat["data"][idx["idx_y_min"]:idx["idx_y_max"]]}) + self.set_full_rlon({'data': full_rlon["data"][idx["idx_x_min"]:idx["idx_x_max"]]}) + + super(RotatedNes, self)._filter_coordinates_selection() + + return None + + def _get_pyproj_projection(self): + """ + Get projection data as in Pyproj library. + + Returns + ---------- + projection : pyproj.Proj + Grid projection. + """ + + projection = Proj(proj="ob_tran", + o_proj="longlat", + ellps="WGS84", + R=self.earth_radius[0], + o_lat_p=float64(self.projection_data["grid_north_pole_latitude"]), + o_lon_p=float64(self.projection_data["grid_north_pole_longitude"]), + ) + + return projection + + # noinspection DuplicatedCode + def _get_projection_data(self, create_nes, **kwargs): + """ + Retrieves projection data based on grid details. + + Parameters + ---------- + create_nes : bool + Flag indicating whether to create new object (True) or use existing (False). + **kwargs : dict + Additional keyword arguments for specifying projection details. + """ + if create_nes: + projection_data = {"grid_mapping_name": "rotated_latitude_longitude", + "grid_north_pole_latitude": 90 - kwargs["centre_lat"], + "grid_north_pole_longitude": -180 + kwargs["centre_lon"], + "inc_rlat": kwargs["inc_rlat"], + "inc_rlon": kwargs["inc_rlon"], + "south_boundary": kwargs["south_boundary"], + "west_boundary": kwargs["west_boundary"], + } + else: + if "rotated_pole" in self.variables.keys(): + projection_data = self.variables["rotated_pole"] + self.free_vars("rotated_pole") + else: + msg = "There is no variable called rotated_pole, projection has not been defined." + raise RuntimeError(msg) + + if "dtype" in projection_data.keys(): + del projection_data["dtype"] + + if "data" in projection_data.keys(): + del projection_data["data"] + + if "dimensions" in projection_data.keys(): + del projection_data["dimensions"] + + return projection_data + + def _create_dimensions(self, netcdf): + """ + Create "rlat", "rlon" and "spatial_nv" dimensions and the dimensions "lev", "time", "time_nv", "lon" and "lat". + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + + super(RotatedNes, self)._create_dimensions(netcdf) + + shape = self.get_full_shape() + # Create rlat and rlon dimensions + netcdf.createDimension("rlon", shape[1]) + netcdf.createDimension("rlat", shape[0]) + + # Create spatial_nv (number of vertices) dimension + if (self.lat_bnds is not None) and (self.lon_bnds is not None): + netcdf.createDimension("spatial_nv", 4) + pass + + return None + + def _create_dimension_variables(self, netcdf): + """ + Create the "rlat" and "rlon" variables. + + Parameters + ---------- + netcdf : Dataset + NetCDF object. + """ + super(RotatedNes, self)._create_dimension_variables(netcdf) + + # ROTATED LATITUDES + full_rlat = self.get_full_rlat() + rlat = netcdf.createVariable("rlat", full_rlat["data"].dtype, ("rlat",)) + rlat.long_name = "latitude in rotated pole grid" + if "units" in full_rlat.keys(): + rlat.units = full_rlat["units"] + else: + rlat.units = "degrees" + rlat.standard_name = "grid_latitude" + if self.size > 1: + rlat.set_collective(True) + rlat[:] = full_rlat["data"] + + # ROTATED LONGITUDES + full_rlon = self.get_full_rlon() + rlon = netcdf.createVariable("rlon", full_rlon["data"].dtype, ("rlon",)) + rlon.long_name = "longitude in rotated pole grid" + if "units" in full_rlon.keys(): + rlon.units = full_rlon["units"] + else: + rlon.units = "degrees" + rlon.standard_name = "grid_longitude" + if self.size > 1: + rlon.set_collective(True) + rlon[:] = full_rlon["data"] + + return None + + def _create_rotated_coordinates(self): + """ + Calculate rotated latitudes and longitudes from grid details. + + Returns + ---------- + _rlat : dict + Rotated latitudes dictionary with the "data" key for all the values and the rest of the attributes. + _rlon : dict + Rotated longitudes dictionary with the "data" key for all the values and the rest of the attributes. + """ + # Get grid resolution + inc_rlon = float64(self.projection_data["inc_rlon"]) + inc_rlat = float64(self.projection_data["inc_rlat"]) + + # Get south and west boundaries + south_boundary = float64(self.projection_data["south_boundary"]) + west_boundary = float64(self.projection_data["west_boundary"]) + + # Calculate rotated latitudes + n_lat = int((abs(south_boundary) / inc_rlat) * 2 + 1) + rlat = linspace(south_boundary, south_boundary + (inc_rlat * (n_lat - 1)), n_lat, dtype=float64) + + # Calculate rotated longitudes + n_lon = int((abs(west_boundary) / inc_rlon) * 2 + 1) + rlon = linspace(west_boundary, west_boundary + (inc_rlon * (n_lon - 1)), n_lon, dtype=float64) + + return {"data": rlat}, {"data": rlon} + + def rotated2latlon(self, lon_deg, lat_deg, lon_min=-180): + """ + Calculate the unrotated coordinates using the rotated ones. + + Parameters + ---------- + lon_deg : array + Rotated longitude coordinate. + lat_deg : array + Rotated latitude coordinate. + lon_min : float + Minimum value for the longitudes: -180 (-180 to 180) or 0 (0 to 360). + + Returns + ---------- + almd : array + Unrotated longitudes. + aphd : array + Unrotated latitudes. + """ + + # Get centre coordinates + centre_lat = 90 - float64(self.projection_data["grid_north_pole_latitude"]) + centre_lon = float64(self.projection_data["grid_north_pole_longitude"]) + 180 + + # Convert to radians + degrees_to_radians = pi / 180. + tph0 = centre_lat * degrees_to_radians + tlm = lon_deg * degrees_to_radians + tph = lat_deg * degrees_to_radians + + tlm0d = -180 + centre_lon + ctph0 = cos(tph0) + stph0 = sin(tph0) + stlm = sin(tlm) + ctlm = cos(tlm) + stph = sin(tph) + ctph = cos(tph) + + # Calculate unrotated latitudes + sph = (ctph0 * stph) + (stph0 * ctph * ctlm) + sph[sph > 1.] = 1. + sph[sph < -1.] = -1. + aph = arcsin(sph) + aphd = aph / degrees_to_radians + + # Calculate rotated longitudes + anum = ctph * stlm + denom = (ctlm * ctph - stph0 * sph) / ctph0 + relm = arctan2(anum, denom) - pi + almd = relm / degrees_to_radians + tlm0d + almd[almd > (lon_min + 360)] -= 360 + almd[almd < lon_min] += 360 + + return almd, aphd + + def _create_centre_coordinates(self, **kwargs): + """ + Calculate centre latitudes and longitudes from grid details. + + Returns + ---------- + centre_lat : dict + Dictionary with data of centre coordinates for latitude in 2D (latitude, longitude). + centre_lon : dict + Dictionary with data of centre coordinates for longitude in 2D (latitude, longitude). + """ + if self.master: + # Complete dimensions + self._full_rlat, self._full_rlon = self._create_rotated_coordinates() + + # Calculate centre latitudes and longitudes (1D to 2D) + centre_lon, centre_lat = self.rotated2latlon( + array([self._full_rlon["data"]] * len(self._full_rlat["data"])), + array([self._full_rlat["data"]] * len(self._full_rlon["data"])).T) + + return {"data": centre_lat}, {"data": centre_lon} + else: + return None, None + + def create_providentia_exp_centre_coordinates(self): + """ + Calculate centre latitudes and longitudes from original coordinates and store as 2D arrays. + + Returns + ---------- + model_centre_lat : dict + Dictionary with data of centre coordinates for latitude in 2D (latitude, longitude). + model_centre_lon : dict + Dictionary with data of centre coordinates for longitude in 2D (latitude, longitude). + """ + + # Get centre latitudes + model_centre_lat = self.lat + + # Get centre longitudes + model_centre_lon = self.lon + + return model_centre_lat, model_centre_lon + + # noinspection DuplicatedCode + def create_providentia_exp_grid_edge_coordinates(self): + """ + Calculate grid edge latitudes and longitudes and get model grid outline. + + Returns + ---------- + grid_edge_lat : dict + Dictionary with data of grid edge latitudes. + grid_edge_lon : dict + Dictionary with data of grid edge longitudes. + """ + + # Get grid resolution + inc_rlon = abs(mean(diff(self.rlon["data"]))) + inc_rlat = abs(mean(diff(self.rlat["data"]))) + + # Get bounds for rotated coordinates + rlat_bounds = self._create_single_spatial_bounds(self.rlat["data"], inc_rlat) + rlon_bounds = self._create_single_spatial_bounds(self.rlon["data"], inc_rlon) + + # Get rotated latitudes for grid edge + left_edge_rlat = append(rlat_bounds.flatten()[::2], rlat_bounds.flatten()[-1]) + right_edge_rlat = flip(left_edge_rlat, 0) + top_edge_rlat = repeat(rlat_bounds[-1][-1], len(self.rlon["data"]) - 1) + bottom_edge_rlat = repeat(rlat_bounds[0][0], len(self.rlon["data"])) + rlat_grid_edge = concatenate((left_edge_rlat, top_edge_rlat, right_edge_rlat, bottom_edge_rlat)) + + # Get rotated longitudes for grid edge + left_edge_rlon = repeat(rlon_bounds[0][0], len(self.rlat["data"]) + 1) + top_edge_rlon = rlon_bounds.flatten()[1:-1:2] + right_edge_rlon = repeat(rlon_bounds[-1][-1], len(self.rlat["data"]) + 1) + bottom_edge_rlon = flip(rlon_bounds.flatten()[:-1:2], 0) + rlon_grid_edge = concatenate((left_edge_rlon, top_edge_rlon, right_edge_rlon, bottom_edge_rlon)) + + # Get edges for regular coordinates + grid_edge_lon_data, grid_edge_lat_data = self.rotated2latlon(rlon_grid_edge, rlat_grid_edge) + + # Create grid outline by stacking the edges in both coordinates + model_grid_outline = vstack((grid_edge_lon_data, grid_edge_lat_data)).T + + grid_edge_lat = {"data": model_grid_outline[:, 1]} + grid_edge_lon = {"data": model_grid_outline[:, 0]} + + return grid_edge_lat, grid_edge_lon + + # noinspection DuplicatedCode + def create_spatial_bounds(self): + """ + Calculate longitude and latitude bounds and set them. + """ + + # Calculate rotated coordinates bounds + full_rlat = self.get_full_rlat() + full_rlon = self.get_full_rlon() + inc_rlat = abs(mean(diff(full_rlat["data"]))) + rlat_bnds = self._create_single_spatial_bounds(array([full_rlat["data"]] * len(full_rlon["data"])).T, + inc_rlat, spatial_nv=4, inverse=True) + + inc_rlon = abs(mean(diff(full_rlon["data"]))) + rlon_bnds = self._create_single_spatial_bounds(array([full_rlon["data"]] * len(full_rlat["data"])), + inc_rlon, spatial_nv=4) + + # Transform rotated bounds to regular bounds + lon_bnds, lat_bnds = self.rotated2latlon(rlon_bnds, rlat_bnds) + + # Obtain regular coordinates bounds + self.set_full_latitudes_boundaries({"data": deepcopy(lat_bnds)}) + self.lat_bnds = {"data": lat_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + :]} + + self.set_full_longitudes_boundaries({"data": deepcopy(lon_bnds)}) + self.lon_bnds = {"data": lon_bnds[self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"], + :]} + + return None + + @staticmethod + def _set_var_crs(var): + """ + Set the grid_mapping to "rotated_pole". + + Parameters + ---------- + var : Variable + netCDF4-python variable object. + """ + + var.grid_mapping = "rotated_pole" + var.coordinates = "lat lon" + + return None + + def _create_metadata(self, netcdf): + """ + Create the "crs" variable for the rotated latitude longitude grid_mapping. + + Parameters + ---------- + netcdf : Dataset + netcdf4-python Dataset. + """ + + if self.projection_data is not None: + mapping = netcdf.createVariable("rotated_pole", "i") + mapping.grid_mapping_name = self.projection_data["grid_mapping_name"] + mapping.grid_north_pole_latitude = self.projection_data["grid_north_pole_latitude"] + mapping.grid_north_pole_longitude = self.projection_data["grid_north_pole_longitude"] + + return None + + def to_grib2(self, path, grib_keys, grib_template_path, lat_flip=False, info=False): + """ + Write output file with grib2 format. + + Parameters + ---------- + lat_flip : bool + Indicates if you want to flip the latitude coordinates. + path : str + Path to the output file. + grib_keys : dict + Dictionary with the grib2 keys. + grib_template_path : str + Path to the grib2 file to use as template. + info : bool + Indicates if you want to print extra information during the process. + """ + + raise NotImplementedError("Grib2 format cannot be written in a Rotated pole projection.") + + # noinspection DuplicatedCode + def create_shapefile(self): + """ + Create spatial geodataframe (shapefile). + + Returns + ------- + shapefile : GeoPandasDataFrame + Shapefile dataframe. + """ + + if self.shapefile is None: + + if self.lat_bnds is None or self.lon_bnds is None: + self.create_spatial_bounds() + + # Reshape arrays to create geometry + aux_b_lats = self.lat_bnds["data"].reshape((self.lat_bnds["data"].shape[0] * self.lat_bnds["data"].shape[1], + self.lat_bnds["data"].shape[2])) + aux_b_lons = self.lon_bnds["data"].reshape((self.lon_bnds["data"].shape[0] * self.lon_bnds["data"].shape[1], + self.lon_bnds["data"].shape[2])) + + # Get polygons from bounds + geometry = [] + for i in range(aux_b_lons.shape[0]): + geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), + (aux_b_lons[i, 1], aux_b_lats[i, 1]), + (aux_b_lons[i, 2], aux_b_lats[i, 2]), + (aux_b_lons[i, 3], aux_b_lats[i, 3]), + (aux_b_lons[i, 0], aux_b_lats[i, 0])])) + + # Create dataframe cointaining all polygons + fids = self.get_fids() + gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=geometry, crs="EPSG:4326") + self.shapefile = gdf + + else: + gdf = self.shapefile + + return gdf + + # noinspection DuplicatedCode + def get_centroids_from_coordinates(self): + """ + Get centroids from geographical coordinates. + + Returns + ------- + centroids_gdf: GeoPandasDataFrame + Centroids dataframe. + """ + + # Get centroids from coordinates + centroids = [] + for lat_ind in range(0, self.lon["data"].shape[0]): + for lon_ind in range(0, self.lon["data"].shape[1]): + centroids.append(Point(self.lon["data"][lat_ind, lon_ind], + self.lat["data"][lat_ind, lon_ind])) + + # Create dataframe cointaining all points + fids = self.get_fids() + centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=centroids, crs="EPSG:4326") + + return centroids_gdf diff --git a/build/lib/nes/nc_projections/rotated_nested_nes.py b/build/lib/nes/nc_projections/rotated_nested_nes.py new file mode 100644 index 0000000000000000000000000000000000000000..4517701655ee09272de966e74bc03ef60c87514f --- /dev/null +++ b/build/lib/nes/nc_projections/rotated_nested_nes.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python + +from numpy import linspace, float64 +from netCDF4 import Dataset +from .rotated_nes import RotatedNes + + +class RotatedNestedNes(RotatedNes): + + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method="Y", + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + balanced=False, times=None, **kwargs): + """ + Initialize the RotatedNestedNes class. + + Parameters + ---------- + comm: MPI.COMM + MPI Communicator. + path: str + Path to the NetCDF to initialize the object. + info: bool + Indicates if you want to get reading/writing info. + dataset: Dataset or None + NetCDF4-python Dataset to initialize the class. + parallel_method : str + Indicates the parallelization method that you want. Default: "Y". + Accepted values: ["X", "Y", "T"]. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + first_level : int + Index of the first level to use. + last_level : int, None + Index of the last level to use. None if it is the last. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + balanced : bool + Indicates if you want a balanced parallelization or not. + Balanced dataset cannot be written in chunking mode. + times : list, None + List of times to substitute the current ones while creation. + """ + + super(RotatedNestedNes, self).__init__(comm=comm, path=path, + info=info, dataset=dataset, balanced=balanced, + parallel_method=parallel_method, + avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, + first_level=first_level, last_level=last_level, create_nes=create_nes, + times=times, **kwargs) + + @staticmethod + def _get_parent_attributes(projection_data): + """ + Get projection attributes from parent grid. + + Parameters + ---------- + projection_data : dict + Dictionary with the projection information. + + Returns + ------- + projection_data : dict + Dictionary with the projection information, including parameters from the parent grid. + """ + + # Read variables from parent grid + netcdf = Dataset(projection_data["parent_grid_path"], mode="r") + rlat = netcdf.variables["rlat"][:] + rlon = netcdf.variables["rlon"][:] + rotated_pole = netcdf.variables["rotated_pole"] + + # j_parent_start starts at index 1, so we must subtract 1 + projection_data["inc_rlat"] = (rlat[1] - rlat[0]) / projection_data["parent_ratio"] + projection_data["1st_rlat"] = rlat[int(projection_data["j_parent_start"]) - 1] + + # i_parent_start starts at index 1, so we must subtract 1 + projection_data["inc_rlon"] = (rlon[1] - rlon[0]) / projection_data["parent_ratio"] + projection_data["1st_rlon"] = rlon[int(projection_data["i_parent_start"]) - 1] + + projection_data["grid_north_pole_longitude"] = rotated_pole.grid_north_pole_longitude + projection_data["grid_north_pole_latitude"] = rotated_pole.grid_north_pole_latitude + + netcdf.close() + + return projection_data + + def _get_projection_data(self, create_nes, **kwargs): + """ + Retrieves projection data based on grid details. + + Parameters + ---------- + create_nes : bool + Flag indicating whether to create new object (True) or use existing (False). + **kwargs : dict + Additional keyword arguments for specifying projection details. + """ + if create_nes: + projection_data = {"grid_mapping_name": "rotated_latitude_longitude", + "parent_grid_path": kwargs["parent_grid_path"], + "parent_ratio": kwargs["parent_ratio"], + "i_parent_start": kwargs["i_parent_start"], + "j_parent_start": kwargs["j_parent_start"], + "n_rlat": kwargs["n_rlat"], + "n_rlon": kwargs["n_rlon"] + } + projection_data = self._get_parent_attributes(projection_data) + else: + projection_data = super()._get_projection_data(create_nes, **kwargs) + + return projection_data + + def _create_rotated_coordinates(self): + """ + Calculate rotated latitudes and longitudes from grid details. + + Returns + ---------- + _rlat : dict + Rotated latitudes dictionary with the "data" key for all the values and the rest of the attributes. + _rlon : dict + Rotated longitudes dictionary with the "data" key for all the values and the rest of the attributes. + """ + + # Get grid resolution + inc_rlon = self.projection_data["inc_rlon"] + inc_rlat = self.projection_data["inc_rlat"] + + # Get number of rotated coordinates + n_rlat = self.projection_data["n_rlat"] + n_rlon = self.projection_data["n_rlon"] + + # Get first coordinates + first_rlat = self.projection_data["1st_rlat"] + first_rlon = self.projection_data["1st_rlon"] + + # Calculate rotated latitudes + rlat = linspace(first_rlat, first_rlat + (inc_rlat * (n_rlat - 1)), n_rlat, dtype=float64) + + # Calculate rotated longitudes + rlon = linspace(first_rlon, first_rlon + (inc_rlon * (n_rlon - 1)), n_rlon, dtype=float64) + + return {"data": rlat}, {"data": rlon} + \ No newline at end of file diff --git a/build/lib/nes/nes_formats/__init__.py b/build/lib/nes/nes_formats/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..39aaf300587fc924d1c27241bc2f2fbdaf0dba9c --- /dev/null +++ b/build/lib/nes/nes_formats/__init__.py @@ -0,0 +1,9 @@ +from .cams_ra_format import to_netcdf_cams_ra +from .monarch_format import to_netcdf_monarch, to_monarch_units +from .cmaq_format import to_netcdf_cmaq, to_cmaq_units +from .wrf_chem_format import to_netcdf_wrf_chem, to_wrf_chem_units + +__all__ = [ + 'to_netcdf_cams_ra', 'to_netcdf_monarch', 'to_monarch_units', 'to_netcdf_cmaq', 'to_cmaq_units', + 'to_netcdf_wrf_chem', 'to_wrf_chem_units' +] diff --git a/build/lib/nes/nes_formats/cams_ra_format.py b/build/lib/nes/nes_formats/cams_ra_format.py new file mode 100644 index 0000000000000000000000000000000000000000..480becccd81cd67e1e2a15426347e439119e0f3c --- /dev/null +++ b/build/lib/nes/nes_formats/cams_ra_format.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python + +import sys +import nes +from numpy import float64, float32, int32, array +from warnings import warn +from netCDF4 import Dataset +from mpi4py import MPI +from copy import copy + + +# noinspection DuplicatedCode +def to_netcdf_cams_ra(self, path): + """ + Horizontal methods from one grid to another one. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + path : str + Path to the output netCDF file. + """ + + if not isinstance(self, nes.LatLonNes): + raise TypeError("CAMS Re-Analysis format must have Regular Lat-Lon projection") + if "" not in path: + raise ValueError(f"AMS Re-Analysis path must contain '' as pattern; current: '{path}'") + + orig_path = copy(path) + + for i_lev, level in enumerate(self.lev["data"]): + path = orig_path.replace("", "l{0}".format(i_lev)) + # Open NetCDF + if self.info: + print("Rank {0:03d}: Creating {1}".format(self.rank, path)) + if self.size > 1: + netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=True, comm=self.comm, info=MPI.Info()) + else: + netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=False) + if self.info: + print("Rank {0:03d}: NetCDF ready to write".format(self.rank)) + self.to_dtype(data_type=float32) + + # Create dimensions + create_dimensions(self, netcdf) + + # Create variables + create_variables(self, netcdf, i_lev) + + # Create dimension variables + create_dimension_variables(self, netcdf) + if self.info: + print("Rank {0:03d}: Dimensions done".format(self.rank)) + + # Close NetCDF + if self.global_attrs is not None: + for att_name, att_value in self.global_attrs.items(): + netcdf.setncattr(att_name, att_value) + + netcdf.close() + + return None + + +def create_dimensions(self, netcdf): + """ + Create "time", "time_bnds", "lev", "lon" and "lat" dimensions. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + netcdf : Dataset + netcdf4-python open dataset. + """ + + # Create time dimension + netcdf.createDimension("time", None) + + # Create lev, lon and lat dimensions + netcdf.createDimension("lat", len(self.get_full_latitudes()["data"])) + netcdf.createDimension("lon", len(self.get_full_longitudes()["data"])) + + return None + + +def create_dimension_variables(self, netcdf): + """ + Create the "time", "time_bnds", "lev", "lat", "lat_bnds", "lon" and "lon_bnds" variables. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + netcdf : Dataset + netcdf4-python open dataset. + """ + + # LATITUDES + lat = netcdf.createVariable("lat", float64, ("lat",)) + lat.standard_name = "latitude" + lat.long_name = "latitude" + lat.units = "degrees_north" + lat.axis = "Y" + + if self.size > 1: + lat.set_collective(True) + lat[:] = self.get_full_latitudes()["data"] + + # LONGITUDES + lon = netcdf.createVariable("lon", float64, ("lon",)) + lon.long_name = "longitude" + lon.standard_name = "longitude" + lon.units = "degrees_east" + lon.axis = "X" + if self.size > 1: + lon.set_collective(True) + lon[:] = self.get_full_longitudes()["data"] + + # TIMES + time_var = netcdf.createVariable("time", float64, ("time",)) + time_var.standard_name = "time" + time_var.units = "day as %Y%m%d.%f" + time_var.calendar = "proleptic_gregorian" + time_var.axis = "T" + if self.size > 1: + time_var.set_collective(True) + time_var[:] = __date2num(self.get_full_times()[self._get_time_id(self.hours_start, first=True): + self._get_time_id(self.hours_end, first=False)]) + + return None + + +# noinspection DuplicatedCode +def create_variables(self, netcdf, i_lev): + """ + Create and write variables to a netCDF file. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + netcdf : Dataset + netcdf4-python open dataset. + i_lev : int + The specific level index to write data for. + """ + + for i, (var_name, var_dict) in enumerate(self.variables.items()): + if var_dict["data"] is not None: + if self.info: + print("Rank {0:03d}: Writing {1} var ({2}/{3})".format(self.rank, var_name, i + 1, len(self.variables))) + try: + var = netcdf.createVariable(var_name, float32, ("time", "lat", "lon",), + zlib=True, complevel=7, least_significant_digit=3) + + if self.info: + print("Rank {0:03d}: Var {1} created ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + if self.size > 1: + var.set_collective(True) + if self.info: + print("Rank {0:03d}: Var {1} collective ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + + if self.info: + print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) + var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = var_dict["data"][:, i_lev, :, :] + + if self.info: + print("Rank {0:03d}: Var {1} data ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) + var.long_name = var_dict["long_name"] + var.units = var_dict["units"] + var.number_of_significant_digits = int32(3) + + if self.info: + print("Rank {0:03d}: Var {1} completed ({2}/{3})".format(self.rank, var_name, i + 1, + len(self.variables))) + except Exception as e: + print(f"**ERROR** an error has occurred while writing the '{var_name}' variable") + raise e + else: + msg = "WARNING!!! " + msg += "Variable {0} was not loaded. It will not be written.".format(var_name) + warn(msg) + sys.stderr.flush() + + return None + + +def __date2num(time_array): + """ + Convert an array of datetime objects to numerical values. + + Parameters + ---------- + time_array : List[datetime.datetime] + List of datetime objects to be converted. + + Returns + ------- + numpy.ndarray + Array of numerical time values, with each date represented as a float. + + Notes + ----- + The conversion represents each datetime as a float in the format YYYYMMDD.HH/24. + """ + + time_res = [] + for aux_time in time_array: + time_res.append(float(aux_time.strftime("%Y%m%d")) + (float(aux_time.strftime("%H")) / 24)) + time_res = array(time_res, dtype=float64) + + return time_res diff --git a/build/lib/nes/nes_formats/cmaq_format.py b/build/lib/nes/nes_formats/cmaq_format.py new file mode 100644 index 0000000000000000000000000000000000000000..30a5cea70be6de753dcee740783100a2f1db67ad --- /dev/null +++ b/build/lib/nes/nes_formats/cmaq_format.py @@ -0,0 +1,355 @@ +#!/usr/bin/env python + +import nes +from numpy import float32, array, ndarray, empty, int32, float64 +from netCDF4 import Dataset +from mpi4py import MPI +from copy import deepcopy +from datetime import datetime + +GLOBAL_ATTRIBUTES_ORDER = [ + "IOAPI_VERSION", "EXEC_ID", "FTYPE", "CDATE", "CTIME", "WDATE", "WTIME", "SDATE", "STIME", "TSTEP", "NTHIK", + "NCOLS", "NROWS", "NLAYS", "NVARS", "GDTYP", "P_ALP", "P_BET", "P_GAM", "XCENT", "YCENT", "XORIG", "YORIG", + "XCELL", "YCELL", "VGTYP", "VGTOP", "VGLVLS", "GDNAM", "UPNAM", "FILEDESC", "HISTORY", "VAR-LIST"] + + +# noinspection DuplicatedCode +def to_netcdf_cmaq(self, path, keep_open=False): + """ + Create the NetCDF using netcdf4-python methods. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + path : str + Path to the output netCDF file. + keep_open : bool + Indicates if you want to keep open the NetCDH to fill the data by time-step. + """ + + self.to_dtype(float32) + + set_global_attributes(self) + change_variable_attributes(self) + + # Open NetCDF + if self.info: + print("Rank {0:03d}: Creating {1}".format(self.rank, path)) + if self.size > 1: + netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=True, comm=self.comm, info=MPI.Info()) + else: + netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=False) + if self.info: + print("Rank {0:03d}: NetCDF ready to write".format(self.rank)) + + # Create dimensions + create_dimensions(self, netcdf) + + create_dimension_variables(self, netcdf) + if self.info: + print("Rank {0:03d}: Dimensions done".format(self.rank)) + + # Create variables + create_variables(self, netcdf) + + for att_name in GLOBAL_ATTRIBUTES_ORDER: + netcdf.setncattr(att_name, self.global_attrs[att_name]) + + # Close NetCDF + if keep_open: + self.dataset = netcdf + else: + netcdf.close() + + return None + + +def change_variable_attributes(self): + """ + Modify the emission list to be consistent to use the output as input for CMAQ model. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + """ + + for var_name in self.variables.keys(): + + if self.variables[var_name]["units"] == "mol.s-1": + self.variables[var_name]["units"] = "{:<16}".format("mole/s") + self.variables[var_name]["var_desc"] = "{:<80}".format(self.variables[var_name]["long_name"]) + self.variables[var_name]["long_name"] = "{:<16}".format(var_name) + elif self.variables[var_name]["units"] == "g.s-1": + self.variables[var_name]["units"] = "{:<16}".format("g/s") + self.variables[var_name]["var_desc"] = "{:<80}".format(self.variables[var_name]["long_name"]) + self.variables[var_name]["long_name"] = "{:<16}".format(var_name) + + else: + raise TypeError("The unit '{0}' of specie {1} is not defined correctly. ".format( + self.variables[var_name]["units"], var_name) + "Should be 'mol.s-1' or 'g.s-1'") + + return None + + +def to_cmaq_units(self): + """ + Change the data values according to the CMAQ conventions + + Parameters + ---------- + self : nes.Nes + A Nes Object. + + Returns + ------- + dict + Variable in the MONARCH units. + """ + + self.calculate_grid_area(overwrite=False) + for var_name in self.variables.keys(): + if isinstance(self.variables[var_name]["data"], ndarray): + if self.variables[var_name]["units"] == "mol.s-1": + # Kmol.m-2.s-1 to mol.s-1 + self.variables[var_name]["data"] = array( + self.variables[var_name]["data"] * 1000 * self.cell_measures["cell_area"]["data"], dtype=float32) + elif self.variables[var_name]["units"] == "g.s-1": + # Kg.m-2.s-1 to g.s-1 + self.variables[var_name]["data"] = array( + self.variables[var_name]["data"] * 1000 * self.cell_measures["cell_area"]["data"], dtype=float32) + + else: + raise TypeError("The unit '{0}' of specie {1} is not defined correctly. ".format( + self.variables[var_name]["units"], var_name) + "Should be 'mol.s-1' or 'g.s-1'") + self.variables[var_name]["dtype"] = float32 + + return self.variables + + +def create_tflag(self): + """ + Create the content of the CMAQ variable TFLAG. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + + Returns + ------- + numpy.ndarray + Array with the content of TFLAG. + """ + + t_flag = empty((len(self.time), len(self.variables), 2)) + + for i_d, aux_date in enumerate(self.time): + y_d = int(aux_date.strftime("%Y%j")) + hms = int(aux_date.strftime("%H%M%S")) + for i_p in range(len(self.variables)): + t_flag[i_d, i_p, 0] = y_d + t_flag[i_d, i_p, 1] = hms + + return t_flag + + +def str_var_list(self): + """ + Transform the list of variable names to a string with the elements with 16 white spaces. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + + Returns + ------- + str + List of variable names transformed on string. + """ + + str_var_list_aux = "" + for var in self.variables.keys(): + str_var_list_aux += "{:<16}".format(var) + + return str_var_list_aux + + +# noinspection DuplicatedCode +def set_global_attributes(self): + """ + Set the NetCDF global attributes. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + """ + + now = datetime.now() + if len(self.time) > 1: + tstep = ((self.time[1] - self.time[0]).seconds // 3600) * 10000 + else: + tstep = 1 * 10000 + + current_attributes = deepcopy(self.global_attrs) + del self.global_attrs + + self.global_attrs = {"IOAPI_VERSION": "None: made only with NetCDF libraries", + "EXEC_ID": "{:<80}".format("0.1alpha"), # Editable + "FTYPE": int32(1), # Editable + "CDATE": int32(now.strftime("%Y%j")), + "CTIME": int32(now.strftime("%H%M%S")), + "WDATE": int32(now.strftime("%Y%j")), + "WTIME": int32(now.strftime("%H%M%S")), + "SDATE": int32(self.time[0].strftime("%Y%j")), + "STIME": int32(self.time[0].strftime("%H%M%S")), + "TSTEP": int32(tstep), + "NTHIK": int32(1), # Editable + "NCOLS": None, # Projection dependent + "NROWS": None, # Projection dependent + "NLAYS": int32(len(self.lev["data"])), + "NVARS": None, # Projection dependent + "GDTYP": None, # Projection dependent + "P_ALP": None, # Projection dependent + "P_BET": None, # Projection dependent + "P_GAM": None, # Projection dependent + "XCENT": None, # Projection dependent + "YCENT": None, # Projection dependent + "XORIG": None, # Projection dependent + "YORIG": None, # Projection dependent + "XCELL": None, # Projection dependent + "YCELL": None, # Projection dependent + "VGTYP": int32(7), # Editable + "VGTOP": float32(5000.), # Editable + "VGLVLS": array([1., 0.], dtype=float32), # Editable + "GDNAM": "{:<16}".format(""), # Editable + "UPNAM": "{:<16}".format("HERMESv3"), + "FILEDESC": "", # Editable + "HISTORY": "", # Editable + "VAR-LIST": str_var_list(self)} + + # Editable attributes + for att_name, att_value in current_attributes.items(): + if att_name == "EXEC_ID": + self.global_attrs[att_name] = "{:<80}".format(att_value) # Editable + elif att_name == "FTYPE": + self.global_attrs[att_name] = int32(att_value) # Editable + elif att_name == "NTHIK": + self.global_attrs[att_name] = int32(att_value) # Editable + elif att_name == "VGTYP": + self.global_attrs[att_name] = int32(att_value) # Editable + elif att_name == "VGTOP": + self.global_attrs[att_name] = float32(att_value) # Editable + elif att_name == "VGLVLS": + self.global_attrs[att_name] = array(att_value.split(), dtype=float32) # Editable + elif att_name == "GDNAM": + self.global_attrs[att_name] = "{:<16}".format(att_value) # Editable + elif att_name == "FILEDESC": + self.global_attrs[att_name] = att_value # Editable + elif att_name == "HISTORY": + self.global_attrs[att_name] = att_value # Editable + + # Projection dependent attributes + if isinstance(self, nes.LCCNes): + self.global_attrs["NCOLS"] = int32(len(self._full_x["data"])) + self.global_attrs["NROWS"] = int32(len(self._full_y["data"])) + self.global_attrs["NVARS"] = int32(len(self.variables)) + self.global_attrs["GDTYP"] = int32(2) + + self.global_attrs["P_ALP"] = float64(self.projection_data["standard_parallel"][0]) + self.global_attrs["P_BET"] = float64(self.projection_data["standard_parallel"][1]) + self.global_attrs["P_GAM"] = float64(self.projection_data["longitude_of_central_meridian"]) + self.global_attrs["XCENT"] = float64(self.projection_data["longitude_of_central_meridian"]) + self.global_attrs["YCENT"] = float64(self.projection_data["latitude_of_projection_origin"]) + self.global_attrs["XORIG"] = float64( + self._full_x["data"][0]) - (float64(self._full_x["data"][1] - self._full_x["data"][0]) / 2) + self.global_attrs["YORIG"] = float64( + self._full_y["data"][0]) - (float64(self._full_y["data"][1] - self._full_y["data"][0]) / 2) + self.global_attrs["XCELL"] = float64(self._full_x["data"][1] - self._full_x["data"][0]) + self.global_attrs["YCELL"] = float64(self._full_y["data"][1] - self._full_y["data"][0]) + + return None + + +def create_dimensions(self, netcdf): + """ + Create "time", "time_bnds", "lev", "lon" and "lat" dimensions. + + Parameters + ---------- + self : nes.Nes + Nes Object. + netcdf : Dataset + netcdf4-python open dataset. + """ + + netcdf.createDimension("TSTEP", len(self.get_full_times())) + netcdf.createDimension("DATE-TIME", 2) + netcdf.createDimension("LAY", len(self.get_full_levels()["data"])) + netcdf.createDimension("VAR", len(self.variables)) + if isinstance(self, nes.LCCNes): + netcdf.createDimension("COL", len(self._full_x["data"])) + netcdf.createDimension("ROW", len(self._full_y["data"])) + + return None + + +def create_dimension_variables(self, netcdf): + """ + Create the "y" and "x" variables. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + netcdf : Dataset + NetCDF object. + """ + + tflag = netcdf.createVariable("TFLAG", "i", ("TSTEP", "VAR", "DATE-TIME",)) + tflag.setncatts({"units": "{:<16}".format(""), "long_name": "{:<16}".format("TFLAG"), + "var_desc": "{:<80}".format("Timestep-valid flags: (1) YYYYDDD or (2) HHMMSS")}) + tflag[:] = create_tflag(self) + + return None + + +# noinspection DuplicatedCode +def create_variables(self, netcdf): + """ + Create the netCDF file variables. + + Parameters + ---------- + self : nes.Nes + Nes Object. + netcdf : Dataset + netcdf4-python open dataset. + """ + + for var_name, var_info in self.variables.items(): + var = netcdf.createVariable(var_name, "f", ("TSTEP", "LAY", "ROW", "COL",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + var.units = var_info["units"] + var.long_name = str(var_info["long_name"]) + var.var_desc = str(var_info["var_desc"]) + if var_info["data"] is not None: + if self.info: + print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) + + if isinstance(var_info["data"], int) and var_info["data"] == 0: + var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = 0 + + elif len(var_info["data"].shape) == 4: + var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = var_info["data"] + + return None diff --git a/build/lib/nes/nes_formats/monarch_format.py b/build/lib/nes/nes_formats/monarch_format.py new file mode 100644 index 0000000000000000000000000000000000000000..0a50e75eea67563961ef390e139154892efa0d59 --- /dev/null +++ b/build/lib/nes/nes_formats/monarch_format.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python + +import nes +from numpy import float32, array, ndarray +from netCDF4 import Dataset +from mpi4py import MPI + + +# noinspection DuplicatedCode +def to_netcdf_monarch(self, path, chunking=False, keep_open=False): + """ + Create the NetCDF using netcdf4-python methods. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + path : str + Path to the output netCDF file. + chunking: bool + Indicates if you want to chunk the output netCDF. + keep_open : bool + Indicates if you want to keep open the NetCDH to fill the data by time-step. + """ + + self.to_dtype(float32) + + # Open NetCDF + if self.info: + print("Rank {0:03d}: Creating {1}".format(self.rank, path)) + if self.size > 1: + netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=True, comm=self.comm, info=MPI.Info()) + else: + netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=False) + if self.info: + print("Rank {0:03d}: NetCDF ready to write".format(self.rank)) + + # Create dimensions + self._create_dimensions(netcdf) + + # Create dimension variables + if self.master: + self._full_lev["data"] = array(self._full_lev["data"], dtype=float32) + self._full_lat["data"] = array(self._full_lat["data"], dtype=float32) + self._full_lat_bnds["data"] = array(self._full_lat_bnds["data"], dtype=float32) + self._full_lon["data"] = array(self._full_lon["data"], dtype=float32) + self._full_lon_bnds["data"] = array(self._full_lon_bnds["data"], dtype=float32) + + if isinstance(self, nes.RotatedNes): + self._full_rlat["data"] = array(self._full_rlat["data"], dtype=float32) + self._full_rlon["data"] = array(self._full_rlon["data"], dtype=float32) + if isinstance(self, nes.LCCNes) or isinstance(self, nes.MercatorNes): + self._full_y["data"] = array(self._full_y["data"], dtype=float32) + self._full_x["data"] = array(self._full_x["data"], dtype=float32) + + self._create_dimension_variables(netcdf) + if self.info: + print("Rank {0:03d}: Dimensions done".format(self.rank)) + + # Create cell measures + if "cell_area" in self.cell_measures.keys(): + self.cell_measures["cell_area"]["data"] = array(self.cell_measures["cell_area"]["data"], dtype=float32) + self._create_cell_measures(netcdf) + + # Create variables + self._create_variables(netcdf, chunking=chunking) + + # Create metadata + self._create_metadata(netcdf) + + # Close NetCDF + if self.global_attrs is not None: + for att_name, att_value in self.global_attrs.items(): + netcdf.setncattr(att_name, att_value) + netcdf.setncattr("Conventions", "CF-1.7") + + if keep_open: + self.dataset = netcdf + else: + netcdf.close() + + return None + + +def to_monarch_units(self): + """ + Change the data values according to the MONARCH conventions. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + + Returns + ------- + dict + Variable in the MONARCH units. + """ + + for var_name in self.variables.keys(): + if isinstance(self.variables[var_name]["data"], ndarray): + if self.variables[var_name]["units"] == "mol.s-1.m-2": + # Kmol to mol + self.variables[var_name]["data"] = array(self.variables[var_name]["data"] * 1000, dtype=float32) + elif self.variables[var_name]["units"] == "kg.s-1.m-2": + # No unit change needed + self.variables[var_name]["data"] = array(self.variables[var_name]["data"], dtype=float32) + + else: + raise TypeError("The unit '{0}' of specie {1} is not defined correctly. ".format( + self.variables[var_name]["units"], var_name) + + "Should be 'mol.s-1.m-2' or 'kg.s-1.m-2'") + self.variables[var_name]["dtype"] = float32 + return self.variables diff --git a/build/lib/nes/nes_formats/wrf_chem_format.py b/build/lib/nes/nes_formats/wrf_chem_format.py new file mode 100644 index 0000000000000000000000000000000000000000..6a06af4600efba99eb5ede74efbbd8ecabab0608 --- /dev/null +++ b/build/lib/nes/nes_formats/wrf_chem_format.py @@ -0,0 +1,398 @@ +#!/usr/bin/env python + +import nes +from numpy import float32, int32, ndarray, array, chararray +from netCDF4 import Dataset +from mpi4py import MPI +from copy import deepcopy + +GLOBAL_ATTRIBUTES_ORDER = [ + "TITLE", "START_DATE", "WEST-EAST_GRID_DIMENSION", "SOUTH-NORTH_GRID_DIMENSION", "BOTTOM-TOP_GRID_DIMENSION", "DX", + "DY", "GRIDTYPE", "DIFF_OPT", "KM_OPT", "DAMP_OPT", "DAMPCOEF", "KHDIF", "KVDIF", "MP_PHYSICS", "RA_LW_PHYSICS", + "RA_SW_PHYSICS", "SF_SFCLAY_PHYSICS", "SF_SURFACE_PHYSICS", "BL_PBL_PHYSICS", "CU_PHYSICS", "SF_LAKE_PHYSICS", + "SURFACE_INPUT_SOURCE", "SST_UPDATE", "GRID_FDDA", "GFDDA_INTERVAL_M", "GFDDA_END_H", "GRID_SFDDA", + "SGFDDA_INTERVAL_M", "SGFDDA_END_H", "WEST-EAST_PATCH_START_UNSTAG", "WEST-EAST_PATCH_END_UNSTAG", + "WEST-EAST_PATCH_START_STAG", "WEST-EAST_PATCH_END_STAG", "SOUTH-NORTH_PATCH_START_UNSTAG", + "SOUTH-NORTH_PATCH_END_UNSTAG", "SOUTH-NORTH_PATCH_START_STAG", "SOUTH-NORTH_PATCH_END_STAG", + "BOTTOM-TOP_PATCH_START_UNSTAG", "BOTTOM-TOP_PATCH_END_UNSTAG", "BOTTOM-TOP_PATCH_START_STAG", + "BOTTOM-TOP_PATCH_END_STAG", "GRID_ID", "PARENT_ID", "I_PARENT_START", "J_PARENT_START", "PARENT_GRID_RATIO", "DT", + "CEN_LAT", "CEN_LON", "TRUELAT1", "TRUELAT2", "MOAD_CEN_LAT", "STAND_LON", "POLE_LAT", "POLE_LON", "GMT", "JULYR", + "JULDAY", "MAP_PROJ", "MMINLU", "NUM_LAND_CAT", "ISWATER", "ISLAKE", "ISICE", "ISURBAN", "ISOILWATER"] + + +# noinspection DuplicatedCode +def to_netcdf_wrf_chem(self, path, keep_open=False): + """ + Create the NetCDF using netcdf4-python methods. + + Parameters + ---------- + self : nes.Nes + Source projection Nes Object. + path : str + Path to the output netCDF file. + keep_open : bool + Indicates if you want to keep open the NetCDH to fill the data by time-step. + """ + + self.to_dtype(float32) + + set_global_attributes(self) + change_variable_attributes(self) + + # Open NetCDF + if self.info: + print("Rank {0:03d}: Creating {1}".format(self.rank, path)) + if self.size > 1: + netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=True, comm=self.comm, info=MPI.Info()) + else: + netcdf = Dataset(path, format="NETCDF4", mode="w", parallel=False) + if self.info: + print("Rank {0:03d}: NetCDF ready to write".format(self.rank)) + + # Create dimensions + create_dimensions(self, netcdf) + + create_dimension_variables(self, netcdf) + if self.info: + print("Rank {0:03d}: Dimensions done".format(self.rank)) + + # Create variables + create_variables(self, netcdf) + + for att_name in GLOBAL_ATTRIBUTES_ORDER: + netcdf.setncattr(att_name, self.global_attrs[att_name]) + + # Close NetCDF + if keep_open: + self.dataset = netcdf + else: + netcdf.close() + + return None + + +def change_variable_attributes(self): + """ + Modify the emission list to be consistent to use the output as input for WRF-CHEM model. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + """ + + for var_name in self.variables.keys(): + if self.variables[var_name]["units"] == "mol.h-1.km-2": + self.variables[var_name]["FieldType"] = int32(104) + self.variables[var_name]["MemoryOrder"] = "XYZ" + self.variables[var_name]["description"] = "EMISSIONS" + self.variables[var_name]["units"] = "mol km^-2 hr^-1" + self.variables[var_name]["stagger"] = "" + self.variables[var_name]["coordinates"] = "XLONG XLAT" + + elif self.variables[var_name]["units"] == "ug.s-1.m-2": + self.variables[var_name]["FieldType"] = int32(104) + self.variables[var_name]["MemoryOrder"] = "XYZ" + self.variables[var_name]["description"] = "EMISSIONS" + self.variables[var_name]["units"] = "ug/m3 m/s" + self.variables[var_name]["stagger"] = "" + self.variables[var_name]["coordinates"] = "XLONG XLAT" + + else: + raise TypeError("The unit '{0}' of specie {1} is not defined correctly. ".format( + self.variables[var_name]["units"], var_name) + "Should be 'mol.h-1.km-2' or 'ug.s-1.m-2'") + + if "long_name" in self.variables[var_name].keys(): + del self.variables[var_name]["long_name"] + + return None + + +def to_wrf_chem_units(self): + """ + Change the data values according to the WRF-CHEM conventions. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + + Returns + ------- + dict + Variable in the MONARCH units. + """ + + self.calculate_grid_area(overwrite=False) + for var_name in self.variables.keys(): + if isinstance(self.variables[var_name]["data"], ndarray): + if self.variables[var_name]["units"] == "mol.h-1.km-2": + # 10**6 -> from m2 to km2 + # 10**3 -> from kmol to mol + # 3600 -> from s to h + self.variables[var_name]["data"] = array( + self.variables[var_name]["data"] * 10 ** 6 * 10 ** 3 * 3600, dtype=float32) + elif self.variables[var_name]["units"] == "ug.s-1.m-2": + # 10**9 -> from kg to ug + self.variables[var_name]["data"] = array( + self.variables[var_name]["data"] * 10 ** 9, dtype=float32) + + else: + raise TypeError("The unit '{0}' of specie {1} is not defined correctly. ".format( + self.variables[var_name]["units"], var_name) + "Should be 'mol.h-1.km-2' or 'ug.s-1.m-2'") + self.variables[var_name]["dtype"] = float32 + + return self.variables + + +def create_times_var(self): + """ + Create the content of the WRF-CHEM variable times. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + + Returns + ------- + numpy.ndarray + Array with the content of TFLAG. + """ + + aux_times = chararray((len(self.time), 19), itemsize=1) + + for i_d, aux_date in enumerate(self.time): + aux_times[i_d] = list(aux_date.strftime("%Y-%m-%d_%H:%M:%S")) + + return aux_times + + +# noinspection DuplicatedCode +def set_global_attributes(self): + """ + Set the NetCDF global attributes + + Parameters + ---------- + self : nes.Nes + A Nes Object. + """ + + # now = datetime.now() + # if len(self.time) > 1: + # tstep = ((self.time[1] - self.time[0]).seconds // 3600) * 10000 + # else: + # tstep = 1 * 10000 + + current_attributes = deepcopy(self.global_attrs) + del self.global_attrs + + self.global_attrs = {"TITLE": None, + "START_DATE": self.time[0].strftime("%Y-%m-%d_%H:%M:%S"), + "WEST-EAST_GRID_DIMENSION": None, # Projection dependent attributes + "SOUTH-NORTH_GRID_DIMENSION": None, # Projection dependent attributes + "BOTTOM-TOP_GRID_DIMENSION": int32(45), + "DX": None, # Projection dependent attributes + "DY": None, # Projection dependent attributes + "GRIDTYPE": "C", + "DIFF_OPT": int32(1), + "KM_OPT": int32(4), + "DAMP_OPT": int32(3), + "DAMPCOEF": float32(0.2), + "KHDIF": float32(0.), + "KVDIF": float32(0.), + "MP_PHYSICS": int32(6), + "RA_LW_PHYSICS": int32(4), + "RA_SW_PHYSICS": int32(4), + "SF_SFCLAY_PHYSICS": int32(2), + "SF_SURFACE_PHYSICS": int32(2), + "BL_PBL_PHYSICS": int32(8), + "CU_PHYSICS": int32(0), + "SF_LAKE_PHYSICS": int32(0), + "SURFACE_INPUT_SOURCE": None, # Projection dependent attributes + "SST_UPDATE": int32(0), + "GRID_FDDA": int32(0), + "GFDDA_INTERVAL_M": int32(0), + "GFDDA_END_H": int32(0), + "GRID_SFDDA": int32(0), + "SGFDDA_INTERVAL_M": int32(0), + "SGFDDA_END_H": int32(0), + "WEST-EAST_PATCH_START_UNSTAG": None, # Projection dependent attributes + "WEST-EAST_PATCH_END_UNSTAG": None, # Projection dependent attributes + "WEST-EAST_PATCH_START_STAG": None, # Projection dependent attributes + "WEST-EAST_PATCH_END_STAG": None, # Projection dependent attributes + "SOUTH-NORTH_PATCH_START_UNSTAG": None, # Projection dependent attributes + "SOUTH-NORTH_PATCH_END_UNSTAG": None, # Projection dependent attributes + "SOUTH-NORTH_PATCH_START_STAG": None, # Projection dependent attributes + "SOUTH-NORTH_PATCH_END_STAG": None, # Projection dependent attributes + "BOTTOM-TOP_PATCH_START_UNSTAG": None, + "BOTTOM-TOP_PATCH_END_UNSTAG": None, + "BOTTOM-TOP_PATCH_START_STAG": None, + "BOTTOM-TOP_PATCH_END_STAG": None, + "GRID_ID": int32(1), + "PARENT_ID": int32(0), + "I_PARENT_START": int32(1), + "J_PARENT_START": int32(1), + "PARENT_GRID_RATIO": int32(1), + "DT": float32(18.), + "CEN_LAT": None, # Projection dependent attributes + "CEN_LON": None, # Projection dependent attributes + "TRUELAT1": None, # Projection dependent attributes + "TRUELAT2": None, # Projection dependent attributes + "MOAD_CEN_LAT": None, # Projection dependent attributes + "STAND_LON": None, # Projection dependent attributes + "POLE_LAT": None, # Projection dependent attributes + "POLE_LON": None, # Projection dependent attributes + "GMT": float32(self.time[0].hour), + "JULYR": int32(self.time[0].year), + "JULDAY": int32(self.time[0].strftime("%j")), + "MAP_PROJ": None, # Projection dependent attributes + "MMINLU": "MODIFIED_IGBP_MODIS_NOAH", + "NUM_LAND_CAT": int32(41), + "ISWATER": int32(17), + "ISLAKE": int32(-1), + "ISICE": int32(15), + "ISURBAN": int32(13), + "ISOILWATER": int32(14), + "HISTORY": "", # Editable + } + + # Editable attributes + float_atts = ["DAMPCOEF", "KHDIF", "KVDIF", "CEN_LAT", "CEN_LON", "DT"] + int_atts = ["BOTTOM-TOP_GRID_DIMENSION", "DIFF_OPT", "KM_OPT", "DAMP_OPT", + "MP_PHYSICS", "RA_LW_PHYSICS", "RA_SW_PHYSICS", "SF_SFCLAY_PHYSICS", "SF_SURFACE_PHYSICS", + "BL_PBL_PHYSICS", "CU_PHYSICS", "SF_LAKE_PHYSICS", "SURFACE_INPUT_SOURCE", "SST_UPDATE", + "GRID_FDDA", "GFDDA_INTERVAL_M", "GFDDA_END_H", "GRID_SFDDA", "SGFDDA_INTERVAL_M", "SGFDDA_END_H", + "BOTTOM-TOP_PATCH_START_UNSTAG", "BOTTOM-TOP_PATCH_END_UNSTAG", "BOTTOM-TOP_PATCH_START_STAG", + "BOTTOM-TOP_PATCH_END_STAG", "GRID_ID", "PARENT_ID", "I_PARENT_START", "J_PARENT_START", + "PARENT_GRID_RATIO", "NUM_LAND_CAT", "ISWATER", "ISLAKE", "ISICE", "ISURBAN", "ISOILWATER"] + str_atts = ["GRIDTYPE", "MMINLU", "HISTORY"] + for att_name, att_value in current_attributes.items(): + if att_name in int_atts: + self.global_attrs[att_name] = int32(att_value) + elif att_name in float_atts: + self.global_attrs[att_name] = float32(att_value) + elif att_name in str_atts: + self.global_attrs[att_name] = str(att_value) + + # Projection dependent attributes + if isinstance(self, nes.LCCNes) or isinstance(self, nes.MercatorNes): + self.global_attrs["WEST-EAST_GRID_DIMENSION"] = int32(len(self._full_x["data"]) + 1) + self.global_attrs["SOUTH-NORTH_GRID_DIMENSION"] = int32(len(self._full_y["data"]) + 1) + self.global_attrs["DX"] = float32(self._full_x["data"][1] - self._full_x["data"][0]) + self.global_attrs["DY"] = float32(self._full_y["data"][1] - self._full_y["data"][0]) + self.global_attrs["SURFACE_INPUT_SOURCE"] = int32(1) + self.global_attrs["WEST-EAST_PATCH_START_UNSTAG"] = int32(1) + self.global_attrs["WEST-EAST_PATCH_END_UNSTAG"] = int32(len(self._full_x["data"])) + self.global_attrs["WEST-EAST_PATCH_START_STAG"] = int32(1) + self.global_attrs["WEST-EAST_PATCH_END_STAG"] = int32(len(self._full_x["data"]) + 1) + self.global_attrs["SOUTH-NORTH_PATCH_START_UNSTAG"] = int32(1) + self.global_attrs["SOUTH-NORTH_PATCH_END_UNSTAG"] = int32(len(self._full_y["data"])) + self.global_attrs["SOUTH-NORTH_PATCH_START_STAG"] = int32(1) + self.global_attrs["SOUTH-NORTH_PATCH_END_STAG"] = int32(len(self._full_y["data"]) + 1) + + self.global_attrs["POLE_LAT"] = float32(90) + self.global_attrs["POLE_LON"] = float32(0) + + if isinstance(self, nes.LCCNes): + self.global_attrs["MAP_PROJ"] = int32(1) + self.global_attrs["TRUELAT1"] = float32(self.projection_data["standard_parallel"][0]) + self.global_attrs["TRUELAT2"] = float32(self.projection_data["standard_parallel"][1]) + self.global_attrs["MOAD_CEN_LAT"] = float32(self.projection_data["latitude_of_projection_origin"]) + self.global_attrs["STAND_LON"] = float32(self.projection_data["longitude_of_central_meridian"]) + self.global_attrs["CEN_LAT"] = float32(self.projection_data["latitude_of_projection_origin"]) + self.global_attrs["CEN_LON"] = float32(self.projection_data["longitude_of_central_meridian"]) + elif isinstance(self, nes.MercatorNes): + self.global_attrs["MAP_PROJ"] = int32(3) + self.global_attrs["TRUELAT1"] = float32(self.projection_data["standard_parallel"]) + self.global_attrs["TRUELAT2"] = float32(0) + self.global_attrs["MOAD_CEN_LAT"] = float32(self.projection_data["standard_parallel"]) + self.global_attrs["STAND_LON"] = float32(self.projection_data["longitude_of_projection_origin"]) + self.global_attrs["CEN_LAT"] = float32(self.projection_data["standard_parallel"]) + self.global_attrs["CEN_LON"] = float32(self.projection_data["longitude_of_projection_origin"]) + + return None + + +def create_dimensions(self, netcdf): + """ + Create "time", "time_bnds", "lev", "lon" and "lat" dimensions. + + Parameters + ---------- + self : nes.Nes + Nes Object. + netcdf : Dataset + netcdf4-python open dataset. + """ + + netcdf.createDimension("Time", len(self.get_full_times())) + netcdf.createDimension("DateStrLen", 19) + netcdf.createDimension("emissions_zdim", len(self.get_full_levels()["data"])) + if isinstance(self, nes.LCCNes): + netcdf.createDimension("west_east", len(self._full_x["data"])) + netcdf.createDimension("south_north", len(self._full_y["data"])) + + return None + + +def create_dimension_variables(self, netcdf): + """ + Create the "y" and "x" variables. + + Parameters + ---------- + self : nes.Nes + A Nes Object. + netcdf : Dataset + NetCDF object. + """ + + times = netcdf.createVariable("Times", "S1", ("Time", "DateStrLen", )) + times[:] = create_times_var(self) + + return None + + +# noinspection DuplicatedCode +def create_variables(self, netcdf): + """ + Create the netCDF file variables. + + Parameters + ---------- + self : nes.Nes + Nes Object. + netcdf : Dataset + netcdf4-python open dataset. + """ + + for var_name, var_info in self.variables.items(): + var = netcdf.createVariable(var_name, "f", ("Time", "emissions_zdim", "south_north", "west_east",), + zlib=self.zip_lvl > 0, complevel=self.zip_lvl) + var.FieldType = var_info["FieldType"] + var.MemoryOrder = var_info["MemoryOrder"] + var.description = var_info["description"] + var.units = var_info["units"] + var.stagger = var_info["stagger"] + var.coordinates = var_info["coordinates"] + + if var_info["data"] is not None: + if self.info: + print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) + + if isinstance(var_info["data"], int) and var_info["data"] == 0: + var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = 0 + + elif len(var_info["data"].shape) == 4: + var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], + self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], + self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], + self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = var_info["data"] + + return None diff --git a/build/lib/tests/1.1-test_read_write_projection.py b/build/lib/tests/1.1-test_read_write_projection.py new file mode 100644 index 0000000000000000000000000000000000000000..5788b3043cd4e6b21258915b97e002a168f7ef39 --- /dev/null +++ b/build/lib/tests/1.1-test_read_write_projection.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python + +import sys +import timeit +import pandas as pd +from mpi4py import MPI +from nes import open_netcdf + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' + +result_path = "Times_test_1.1_read_write_projection_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'write'], + columns=['1.1.1.Regular', '1.1.2.Rotated', '1.1.3.Points', '1.1.4.Points_GHOST', + '1.1.5.LCC', '1.1.6.Mercator']) + +# ====================================================================================================================== +# ============================================= REGULAR ======================================================== +# ====================================================================================================================== + +test_name = '1.1.1.Regular' +if rank == 0: + print(test_name) +comm.Barrier() + +# Original path: /gpfs/scratch/bsc32/bsc32538/original_files/franco_interp.nc +# Regular lat-lon grid from MONARCH +path = '/gpfs/projects/bsc32/models/NES_tutorial_data/franco_interp.nc' + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(path=path, comm=comm, info=True, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# LOAD VARIABLES +variables = ['sconcno2'] +nessy.keep_vars(variables) +nessy.load() + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ============================================= ROTATED ======================================================== +# ====================================================================================================================== + +test_name = '1.1.2.Rotated' +if rank == 0: + print(test_name) + +# Original path: /gpfs/scratch/bsc32/bsc32538/mr_multiplyby/OUT/stats_bnds/monarch/a45g/regional/daily_max/O3_all/O3_all-000_2021080300.nc +# Rotated grid from MONARCH +path = '/gpfs/projects/bsc32/models/NES_tutorial_data/O3_all-000_2021080300.nc' + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(path=path, comm=comm, info=True, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# LOAD VARIABLES +variables = ['O3_all'] +nessy.keep_vars(variables) +nessy.load() + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + + +# ====================================================================================================================== +# ============================================= LCC ============================================================ +# ====================================================================================================================== + +test_name = '1.1.5.LCC' +if rank == 0: + print(test_name) + +# Original path: /esarchive/exp/snes/a5g1/ip/daily_max/sconco3/sconco3_2022111500.nc +# LCC grid with a coverage over the Iberian Peninsula (4x4km) +path = '/gpfs/projects/bsc32/models/NES_tutorial_data/sconco3_2022111500.nc' + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(path=path, comm=comm, info=True, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# LOAD VARIABLES +nessy.load() + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ============================================= MERCATOR ======================================================== +# ====================================================================================================================== + +test_name = '1.1.6.Mercator' +if rank == 0: + print(test_name) + +# Original path: None (generated with NES) +# Mercator grid +path = '/gpfs/projects/bsc32/models/NES_tutorial_data/mercator_grid.nc' + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(path=path, comm=comm, info=True, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# LOAD VARIABLES +nessy.load() + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + +# ====================================================================================================================== +# ============================================= POINTS ========================================================= +# ====================================================================================================================== + +test_name = '1.1.3.Points' +if rank == 0: + print(test_name) + +# Original path: /esarchive/obs/nilu/ebas/daily/pm10/pm10_201507.nc +# Points grid from EBAS network +path = '/gpfs/projects/bsc32/models/NES_tutorial_data/pm10_201507.nc' + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(path=path, comm=comm, info=True, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# LOAD VARIABLES +nessy.load() + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ============================================= POINTS GHOST =================================================== +# ====================================================================================================================== + +test_name = '1.1.4.Points_GHOST' +if rank == 0: + print(test_name) + +path = '/gpfs/projects/bsc32/AC_cache/obs/ghost/EBAS/1.4/hourly/sconco3/sconco3_201906.nc' + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(path=path, comm=comm, info=True, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# LOAD VARIABLES +nessy.load() + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/1.2-test_create_projection.py b/build/lib/tests/1.2-test_create_projection.py new file mode 100644 index 0000000000000000000000000000000000000000..60c470ad0c1acd4d4cee5ce7ff3f9f37211f2fe6 --- /dev/null +++ b/build/lib/tests/1.2-test_create_projection.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python + +import sys +from mpi4py import MPI +import pandas as pd +import timeit +from nes import create_nes + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' + +result_path = "Times_test_1.2_create_projection_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['create', 'write'], + columns=['1.2.1.Regular', '1.2.2.Rotated', '1.2.3.LCC', '1.2.4.Mercator', '1.2.5.Global']) + +# ====================================================================================================================== +# ============================================= REGULAR ======================================================== +# ====================================================================================================================== + +test_name = '1.2.1.Regular' +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +lat_orig = 41.1 +lon_orig = 1.8 +inc_lat = 0.01 +inc_lon = 0.01 +n_lat = 100 +n_lon = 100 +nessy = create_nes(projection='regular', parallel_method=parallel_method, + lat_orig=lat_orig, lon_orig=lon_orig, inc_lat=inc_lat, inc_lon=inc_lon, n_lat=n_lat, n_lon=n_lon) + +comm.Barrier() +result.loc['create', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ============================================= ROTATED ======================================================== +# ====================================================================================================================== + +test_name = '1.2.2.Rotated' +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +centre_lat = 51 +centre_lon = 10 +west_boundary = -35 +south_boundary = -27 +inc_rlat = 0.2 +inc_rlon = 0.2 +nessy = create_nes(projection='rotated', parallel_method=parallel_method, + centre_lat=centre_lat, centre_lon=centre_lon, + west_boundary=west_boundary, south_boundary=south_boundary, + inc_rlat=inc_rlat, inc_rlon=inc_rlon) + +comm.Barrier() +result.loc['create', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ============================================= LCC ======================================================== +# ====================================================================================================================== + +test_name = '1.2.3.LCC' +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 397 +ny = 397 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 +nessy = create_nes(projection='lcc', parallel_method=parallel_method, + lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0) + +comm.Barrier() +result.loc['create', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ============================================= MERCATOR ======================================================== +# ====================================================================================================================== + +test_name = '1.2.4.Mercator' +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +lat_ts = -1.5 +lon_0 = -18.0 +nx = 210 +ny = 236 +inc_x = 50000 +inc_y = 50000 +x_0 = -126017.5 +y_0 = -5407460.0 +nessy = create_nes(projection='mercator', parallel_method=parallel_method, + lat_ts=lat_ts, lon_0=lon_0, nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0) + +comm.Barrier() +result.loc['create', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ============================================== GLOBAL ======================================================== +# ====================================================================================================================== + +test_name = '1.2.5.Global' +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +inc_lat = 0.1 +inc_lon = 0.1 +nessy = create_nes(projection='global', parallel_method=parallel_method, inc_lat=inc_lat, inc_lon=inc_lon) + +comm.Barrier() +result.loc['create', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/1.3-test_selecting.py b/build/lib/tests/1.3-test_selecting.py new file mode 100644 index 0000000000000000000000000000000000000000..00bbb23493c5239a27b344e7a2363ed0e17d8a80 --- /dev/null +++ b/build/lib/tests/1.3-test_selecting.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python +import sys +import timeit +import pandas as pd +from mpi4py import MPI +from nes import open_netcdf +from datetime import datetime + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' +serial_write = True + +result_path = "Times_test_1.3.Selecting_{0}_{1:03d}.csv".format(parallel_method, size) + +result = pd.DataFrame(index=['read', 'calcul', 'write'], + columns=['1.3.1.LatLon', '1.3.2.Level', '1.3.3.Time', '1.3.4.Time_min', '1.3.5.Time_max']) + +# NAMEE +src_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc" +var_list = ['O3'] + +# ====================================================================================================================== +# ====================================== '1.3.1.LatLon' ===================================================== +# ====================================================================================================================== +test_name = '1.3.1.Selecting_LatLon' + +if rank == 0: + print(test_name) + +st_time = timeit.default_timer() + +# Source data +nessy = open_netcdf(src_path, parallel_method=parallel_method, balanced=True) +nessy.keep_vars(var_list) +nessy.sel(lat_min=35, lat_max=45, lon_min=-9, lon_max=5) + +nessy.load() + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size), serial=serial_write) + +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ====================================== 1.3.2.Level ===================================================== +# ====================================================================================================================== +test_name = '1.3.2.Selecting_Level' + +if rank == 0: + print(test_name) + +st_time = timeit.default_timer() + +# Source data +nessy = open_netcdf(src_path, parallel_method=parallel_method) +nessy.keep_vars(var_list) +nessy.sel(lev_min=3, lev_max=5) + +nessy.load() + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size), serial=serial_write) + +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ====================================== 1.3.3.Time ===================================================== +# ====================================================================================================================== +test_name = '1.3.3.Selecting_Time' + +if rank == 0: + print(test_name) + +st_time = timeit.default_timer() + +# Source data +nessy = open_netcdf(src_path, parallel_method=parallel_method) +nessy.keep_vars(var_list) +nessy.sel(time_min=datetime(year=2022, month=11, day=16, hour=0), + time_max=datetime(year=2022, month=11, day=16, hour=0)) + +nessy.load() + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size), serial=serial_write) + +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ====================================== '1.3.4.Time_min' ===================================================== +# ====================================================================================================================== +test_name = '1.3.4.Selecting_Time_min' + +if rank == 0: + print(test_name) + +st_time = timeit.default_timer() + +# Source data +nessy = open_netcdf(src_path, parallel_method=parallel_method) +nessy.keep_vars(var_list) +nessy.sel(time_min=datetime(year=2022, month=11, day=16, hour=0)) + +nessy.load() + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size), serial=serial_write) + +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ====================================== '1.3.5.Time_max' ===================================================== +# ====================================================================================================================== +test_name = '1.3.5.Selecting_Time_max' + +if rank == 0: + print(test_name) + +st_time = timeit.default_timer() + +# Source data +nessy = open_netcdf(src_path, parallel_method=parallel_method) +nessy.keep_vars(var_list) +nessy.sel(time_max=datetime(year=2022, month=11, day=16, hour=0)) + +nessy.load() + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size), serial=serial_write) + +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/2.1-test_spatial_join.py b/build/lib/tests/2.1-test_spatial_join.py new file mode 100644 index 0000000000000000000000000000000000000000..e24d443f614fe8fe24cb7c68155dc062c08bd38e --- /dev/null +++ b/build/lib/tests/2.1-test_spatial_join.py @@ -0,0 +1,329 @@ +#!/usr/bin/env python + +import sys +from mpi4py import MPI +import pandas as pd +import timeit +from nes import open_netcdf, from_shapefile + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' +serial_write = False + +result_path = "Times_test_2.1_spatial_join_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'calculate', 'write'], + columns=['2.1.1.Existing_file_centroid', '2.1.2.New_file_centroid', + '2.1.3.Existing_file_nearest', '2.1.4.New_file_nearest', + '2.1.5.Existing_file_intersection', '2.1.6.New_file_intersection']) + +# ===== PATH TO MASK ===== # +# Timezones +# shapefile_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/timezones_2021c/timezones_2021c.shp' +# shapefile_var_list = ['tzid'] +# str_len = 32 +# Country ISO codes +shapefile_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/gadm_country_mask/gadm_country_ISO3166.shp" +shapefile_var_list = ['ISO'] +str_len = 3 + +# Original path: /gpfs/scratch/bsc32/bsc32538/original_files/franco_interp.nc +# Regular lat-lon grid from MONARCH +original_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/franco_interp.nc' +# CAMS_Global +# original_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/nox_no_201505.nc" + +# ====================================================================================================================== +# =================================== CENTROID EXISTING FILE =================================================== +# ====================================================================================================================== + +test_name = '2.1.1.Existing_file_centroid' +if rank == 0: + print(test_name) + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(original_path, parallel_method=parallel_method) +nessy.variables = {} +nessy.create_shapefile() +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# SPATIAL JOIN +# Method can be centroid, nearest and intersection +st_time = timeit.default_timer() +nessy.spatial_join(shapefile_path, method='centroid', var_list=shapefile_var_list, info=True) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() + +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) +comm.Barrier() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) +nessy.load() + +# REWRITE +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}_2.nc".format(size), serial=serial_write) + +# REOPEN +nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}_2.nc".format(size), parallel_method=parallel_method) +nessy.load() + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# =================================== CENTROID FROM NEW FILE =================================================== +# ====================================================================================================================== + +test_name = '2.1.2.New_file_centroid' +if rank == 0: + print(test_name) + +# DEFINE PROJECTION +st_time = timeit.default_timer() +projection = 'regular' +lat_orig = 41.1 +lon_orig = 1.8 +inc_lat = 0.2 +inc_lon = 0.2 +n_lat = 100 +n_lon = 100 + +# SPATIAL JOIN +# Method can be centroid, nearest and intersection +nessy = from_shapefile(shapefile_path, method='centroid', projection=projection, + lat_orig=lat_orig, lon_orig=lon_orig, + inc_lat=inc_lat, inc_lon=inc_lon, + n_lat=n_lat, n_lon=n_lon) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) +nessy.load() + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# =================================== NEAREST EXISTING FILE =================================================== +# ====================================================================================================================== + +test_name = '2.1.3.Existing_file_nearest' +if rank == 0: + print(test_name) + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(original_path, parallel_method=parallel_method) +nessy.variables = {} +nessy.create_shapefile() +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# SPATIAL JOIN +# Method can be centroid, nearest and intersection +st_time = timeit.default_timer() +nessy.spatial_join(shapefile_path, method='nearest', var_list=shapefile_var_list, info=True) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('SPATIAL JOIN - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) +nessy.load() + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# =================================== NEAREST FROM NEW FILE =================================================== +# ====================================================================================================================== + +test_name = '2.1.4.New_file_nearest' +if rank == 0: + print(test_name) + +# DEFINE PROJECTION +st_time = timeit.default_timer() +projection = 'regular' +lat_orig = 41.1 +lon_orig = 1.8 +inc_lat = 0.2 +inc_lon = 0.2 +n_lat = 100 +n_lon = 100 + +# SPATIAL JOIN +# Method can be centroid, nearest and intersection +nessy = from_shapefile(shapefile_path, method='nearest', projection=projection, + lat_orig=lat_orig, lon_orig=lon_orig, + inc_lat=inc_lat, inc_lon=inc_lon, + n_lat=n_lat, n_lon=n_lon) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) +nessy.load() + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + + +# ====================================================================================================================== +# =================================== INTERSECTION EXISTING FILE =================================================== +# ====================================================================================================================== + +test_name = '2.1.5.Existing_file_intersection' +if rank == 0: + print(test_name) + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(original_path, parallel_method=parallel_method) +nessy.variables = {} +nessy.create_shapefile() +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# SPATIAL JOIN +# Method can be centroid, nearest and intersection +st_time = timeit.default_timer() +nessy.spatial_join(shapefile_path, method='intersection', var_list=shapefile_var_list, info=True) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('SPATIAL JOIN - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) + +# WRITE +st_time = timeit.default_timer() +nessy.set_strlen(strlen=str_len) +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) +nessy.load() + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# =================================== INTERSECTION FROM NEW FILE =================================================== +# ====================================================================================================================== + +test_name = '2.1.6.New_file_intersection' +if rank == 0: + print(test_name) + +# DEFINE PROJECTION +st_time = timeit.default_timer() +projection = 'regular' +lat_orig = 41.1 +lon_orig = 1.8 +inc_lat = 0.2 +inc_lon = 0.2 +n_lat = 100 +n_lon = 100 + +# SPATIAL JOIN +# Method can be centroid, nearest and intersection +nessy = from_shapefile(shapefile_path, method='intersection', projection=projection, + lat_orig=lat_orig, lon_orig=lon_orig, + inc_lat=inc_lat, inc_lon=inc_lon, + n_lat=n_lat, n_lon=n_lon) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) +nessy.load() + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/2.2-test_create_shapefile.py b/build/lib/tests/2.2-test_create_shapefile.py new file mode 100644 index 0000000000000000000000000000000000000000..6d443a7040deb03a6503ecf6b35654ad8dde26f1 --- /dev/null +++ b/build/lib/tests/2.2-test_create_shapefile.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python + +import sys +import timeit +import pandas as pd +from mpi4py import MPI +import datetime +from nes import create_nes, open_netcdf + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' + +result_path = "Times_test_2.2_create_shapefile_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'calculate'], + columns=['2.2.1.Existing', '2.2.2.New_Regular', + '2.2.3.New_Rotated', '2.2.4.New_LCC', '2.2.5.New_Mercator']) + +# ====================================================================================================================== +# ===================================== CREATE SHAPEFILE FROM EXISTING GRID ========================================== +# ====================================================================================================================== + +test_name = '2.2.1.Existing' +if rank == 0: + print(test_name) + +# Original path: /gpfs/scratch/bsc32/bsc32538/original_files/franco_interp.nc +# Regular lat-lon grid from MONARCH +path = '/gpfs/projects/bsc32/models/NES_tutorial_data/franco_interp.nc' + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(path=path, info=True, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# LOAD VARIABLES +nessy.load() + +# CREATE SHAPEFILE +st_time = timeit.default_timer() +nessy.to_shapefile(path='regular_shp', + time=datetime.datetime(2019, 1, 1, 10, 0), + lev=0, var_list=['sconcno2']) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('FROM EXISTING GRID - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ===================================== CREATE SHAPEFILE FROM NEW REGULAR GRID ======================================= +# ====================================================================================================================== + +test_name = '2.2.2.New_Regular' +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +lat_orig = 41.1 +lon_orig = 1.8 +inc_lat = 0.1 +inc_lon = 0.1 +n_lat = 50 +n_lon = 100 +nessy = create_nes(comm=None, info=False, projection='regular', + lat_orig=lat_orig, lon_orig=lon_orig, inc_lat=inc_lat, inc_lon=inc_lon, + n_lat=n_lat, n_lon=n_lon) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CREATE SHAPEFILE +st_time = timeit.default_timer() +nessy.create_shapefile() +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('FROM NEW REGULAR GRID - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ===================================== CREATE SHAPEFILE FROM NEW ROTATED GRID ======================================= +# ====================================================================================================================== + +test_name = '2.2.3.New_Rotated' +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +centre_lat = 51 +centre_lon = 10 +west_boundary = -35 +south_boundary = -27 +inc_rlat = 0.2 +inc_rlon = 0.2 +nessy = create_nes(comm=None, info=False, projection='rotated', + centre_lat=centre_lat, centre_lon=centre_lon, + west_boundary=west_boundary, south_boundary=south_boundary, + inc_rlat=inc_rlat, inc_rlon=inc_rlon) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CREATE SHAPEFILE +st_time = timeit.default_timer() +nessy.create_shapefile() +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('FROM NEW ROTATED GRID - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ===================================== CREATE SHAPEFILE FROM NEW LCC GRID =========================================== +# ====================================================================================================================== + +test_name = '2.2.4.New_LCC' +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 100 +ny = 200 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 +nessy = create_nes(comm=None, info=False, projection='lcc', + lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CREATE SHAPEFILE +st_time = timeit.default_timer() +nessy.create_shapefile() +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('FROM NEW LCC GRID - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ===================================== CREATE SHAPEFILE FROM NEW MERCATOR GRID ====================================== +# ====================================================================================================================== + +test_name = '2-2.5.New_Mercator' +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +lat_ts = -1.5 +lon_0 = -18.0 +nx = 100 +ny = 50 +inc_x = 50000 +inc_y = 50000 +x_0 = -126017.5 +y_0 = -5407460.0 +nessy = create_nes(comm=None, info=False, projection='mercator', + lat_ts=lat_ts, lon_0=lon_0, nx=nx, ny=ny, + inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CREATE SHAPEFILE +st_time = timeit.default_timer() +nessy.create_shapefile() +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('FROM NEW MERCATOR GRID - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/2.3-test_bounds.py b/build/lib/tests/2.3-test_bounds.py new file mode 100644 index 0000000000000000000000000000000000000000..a2a9c1cea863791f286fde85bbf17ac1c0e1999f --- /dev/null +++ b/build/lib/tests/2.3-test_bounds.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python + +import sys +import timeit +import pandas as pd +from mpi4py import MPI +from nes import open_netcdf, create_nes + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' + +result_path = "Times_test_2.3_bounds_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'calculate', 'write'], + columns=['2.3.1.With_bounds', '2.3.2.Without_bounds', "2.3.3.Create_new", + "2.3.4.latlon_sel_create_bnds", "2.3.5.rotated_sel_create_bnds"]) + +# ====================================================================================================================== +# ===================================== FILE WITH EXISTING BOUNDS ==================================================== +# ====================================================================================================================== + +test_name = "2.3.1.With_bounds" +if rank == 0: + print(test_name) + +# READ +st_time = timeit.default_timer() +# Original path: /esarchive/exp/snes/a5s1/regional/3hourly/od550du/od550du-000_2021070612.nc +# Rotated grid for dust regional +path_1 = '/gpfs/projects/bsc32/models/NES_tutorial_data/od550du-000_2021070612.nc' +nessy_1 = open_netcdf(path=path_1, parallel_method=parallel_method, info=True) + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# EXPLORE BOUNDS +st_time = timeit.default_timer() +print('FILE WITH EXISTING BOUNDS - Rank', rank, '-', 'Lat bounds', nessy_1.lat_bnds) +print('FILE WITH EXISTING BOUNDS - Rank', rank, '-', 'Lon bounds', nessy_1.lon_bnds) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +nessy_1.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy_2 = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) + +# LOAD DATA AND EXPLORE BOUNDS +print('FILE WITH EXISTING BOUNDS AFTER WRITE - Rank', rank, '-', 'Lat bounds', nessy_2.lat_bnds) +print('FILE WITH EXISTING BOUNDS AFTER WRITE - Rank', rank, '-', 'Lon bounds', nessy_2.lon_bnds) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# =================================== FILE WITHOUT EXISTING BOUNDS =================================================== +# ====================================================================================================================== + +test_name = '2.3.2.Without_bounds' +if rank == 0: + print(test_name) + +# Original path: /gpfs/scratch/bsc32/bsc32538/mr_multiplyby/OUT/stats_bnds/monarch/a45g/regional/daily_max/O3_all +# /O3_all-000_2021080300.nc Rotated grid from MONARCH +st_time = timeit.default_timer() +path_3 = "/gpfs/projects/bsc32/models/NES_tutorial_data/O3_all-000_2021080300.nc" +nessy_3 = open_netcdf(path=path_3, parallel_method=parallel_method, info=True) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CREATE BOUNDS +st_time = timeit.default_timer() +nessy_3.create_spatial_bounds() +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# EXPLORE BOUNDS +print('FILE WITHOUT EXISTING BOUNDS - Rank', rank, '-', 'Lat bounds', nessy_3.lat_bnds) +print('FILE WITHOUT EXISTING BOUNDS - Rank', rank, '-', 'Lon bounds', nessy_3.lon_bnds) + +# WRITE +st_time = timeit.default_timer() +nessy_3.to_netcdf('/tmp/bounds_file_2.nc', info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy_4 = open_netcdf('/tmp/bounds_file_2.nc', info=True) + +# LOAD DATA AND EXPLORE BOUNDS +print('FILE WITH EXISTING BOUNDS AFTER WRITE - Rank', rank, '-', 'Lat bounds', nessy_4.lat_bnds) +print('FILE WITH EXISTING BOUNDS AFTER WRITE - Rank', rank, '-', 'Lon bounds', nessy_4.lon_bnds) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ==================================== CREATE NES REGULAR LAT-LON ==================================================== +# ====================================================================================================================== + +test_name = "2.3.3.Create_new" +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +lat_orig = 41.1 +lon_orig = 1.8 +inc_lat = 0.2 +inc_lon = 0.2 +n_lat = 100 +n_lon = 100 +nessy_5 = create_nes(comm=None, parallel_method=parallel_method, info=True, projection='regular', + lat_orig=lat_orig, lon_orig=lon_orig, + inc_lat=inc_lat, inc_lon=inc_lon, + n_lat=n_lat, n_lon=n_lon) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CREATE BOUNDS +st_time = timeit.default_timer() +nessy_5.create_spatial_bounds() +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# EXPLORE BOUNDS +print('FROM NEW GRID - Rank', rank, '-', 'Lat bounds', nessy_5.lat_bnds) +print('FROM NEW GRID - Rank', rank, '-', 'Lon bounds', nessy_5.lon_bnds) + +# WRITE +st_time = timeit.default_timer() +nessy_5.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy_6 = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) + +# LOAD DATA AND EXPLORE BOUNDS +print('FROM NEW GRID AFTER WRITE - Rank', rank, '-', 'Lat bounds', nessy_6.lat_bnds) +print('FROM NEW GRID AFTER WRITE - Rank', rank, '-', 'Lon bounds', nessy_6.lon_bnds) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + + +# ====================================================================================================================== +# ================================ REGULAR LAT-LON SEL THEN CREATE BOUNDS ============================================= +# ====================================================================================================================== + +test_name = "2.3.4.latlon_sel_create_bnds" +if rank == 0: + print(test_name) + +# USE SAME GRID SETTING AS 2.3.3 +nessy_7 = create_nes(comm=None, parallel_method=parallel_method, info=True, projection='regular', + lat_orig=lat_orig, lon_orig=lon_orig, + inc_lat=inc_lat, inc_lon=inc_lon, + n_lat=n_lat, n_lon=n_lon) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# SEL +nessy_7.sel(lat_min=50, lat_max=60, lon_min=10, lon_max=20) + +# CREATE BOUNDS +st_time = timeit.default_timer() +nessy_7.create_spatial_bounds() +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# EXPLORE BOUNDS +print('FROM NEW GRID - Rank', rank, '-', 'Lat bounds', nessy_7.lat_bnds) +print('FROM NEW GRID - Rank', rank, '-', 'Lon bounds', nessy_7.lon_bnds) + +# Check lon_bnds +if nessy_7.lon_bnds['data'].shape != (52, 2): + raise Exception("Wrong lon_bnds.") + +# WRITE +st_time = timeit.default_timer() +nessy_7.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy_8 = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) + +# LOAD DATA AND EXPLORE BOUNDS +print('FROM NEW GRID AFTER WRITE - Rank', rank, '-', 'Lat bounds', nessy_8.lat_bnds) +print('FROM NEW GRID AFTER WRITE - Rank', rank, '-', 'Lon bounds', nessy_8.lon_bnds) + +# Check lon_bnds +if nessy_8.lon_bnds['data'].shape != (52, 2): + raise Exception("Wrong lon_bnds.") + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + + +# ====================================================================================================================== +# ================================ ROTATED SEL THEN CREATE BOUNDS ============================================= +# ====================================================================================================================== + +test_name = "2.3.5.rotated_sel_create_bnds" +if rank == 0: + print(test_name) + +# USE FILE AS 2.3.2 + +# Original path: /gpfs/scratch/bsc32/bsc32538/mr_multiplyby/OUT/stats_bnds/monarch/a45g/regional/daily_max/O3_all +# /O3_all-000_2021080300.nc Rotated grid from MONARCH +st_time = timeit.default_timer() +path_9 = "/gpfs/projects/bsc32/models/NES_tutorial_data/O3_all-000_2021080300.nc" +nessy_9 = open_netcdf(path=path_9, parallel_method=parallel_method, info=True) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# SEL +nessy_9.sel(lat_min=50, lat_max=60, lon_min=10, lon_max=15) + +# CREATE BOUNDS +st_time = timeit.default_timer() +nessy_9.create_spatial_bounds() +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# EXPLORE BOUNDS +print('FILE WITHOUT EXISTING BOUNDS - Rank', rank, '-', 'Lat bounds', nessy_9.lat_bnds) +print('FILE WITHOUT EXISTING BOUNDS - Rank', rank, '-', 'Lon bounds', nessy_9.lon_bnds) + +# Check lon_bnds +if nessy_9.lon_bnds['data'].shape[0:2] != nessy_9.lon['data'].shape: + raise Exception("Wrong lon_bnds.") + +# WRITE +st_time = timeit.default_timer() +nessy_9.to_netcdf('/tmp/bounds_file_9.nc', info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy_10 = open_netcdf('/tmp/bounds_file_9.nc', info=True) + +# LOAD DATA AND EXPLORE BOUNDS +print('FILE WITH EXISTING BOUNDS AFTER WRITE - Rank', rank, '-', 'Lat bounds', nessy_10.lat_bnds) +print('FILE WITH EXISTING BOUNDS AFTER WRITE - Rank', rank, '-', 'Lon bounds', nessy_10.lon_bnds) + +# Check lon_bnds +if nessy_10.lon_bnds['data'].shape[0:2] != nessy_10.lon['data'].shape: + raise Exception("Wrong lon_bnds.") + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/2.4-test_cell_area.py b/build/lib/tests/2.4-test_cell_area.py new file mode 100644 index 0000000000000000000000000000000000000000..9db836ff9b388d061c06144c50958ea0fb7d7f81 --- /dev/null +++ b/build/lib/tests/2.4-test_cell_area.py @@ -0,0 +1,195 @@ +#!/usr/bin/env python + +import sys +import timeit +import pandas as pd +from mpi4py import MPI +from nes import create_nes, open_netcdf, calculate_geometry_area + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' + +result_path = "Times_test_2.4_cell_area_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'calculate', 'write'], + columns=['2.4.1.New_file_grid_area', '2.4.2.New_file_geometry_area', + '2.4.3.Existing_file_grid_area', '2.4.4.Existing_file_geometry_area']) + +# ====================================================================================================================== +# ===================================== CALCULATE CELLS AREA FROM NEW GRID =========================================== +# ====================================================================================================================== + +test_name = "2.4.1.New_file_grid_area" +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 20 +ny = 40 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 +nessy = create_nes(comm=None, info=False, projection='lcc', + lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CALCULATE AREA OF EACH CELL IN GRID +st_time = timeit.default_timer() +nessy.calculate_grid_area() +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# EXPLORE GRID AREA +print('Rank {0:03d}: Calculate grid cell area: {1}'.format(rank, nessy.cell_measures['cell_area'])) + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +# nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) + +# EXPLORE GRID AREA +print('Rank {0:03d}: Write grid cell area: {1}'.format(rank, nessy.cell_measures['cell_area'])) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +del nessy + +# ====================================================================================================================== +# ===================================== CALCULATE CELLS AREA FROM GEOMETRIES ========================================= +# ====================================================================================================================== + +test_name = "2.4.2.New_file_geometry_area" +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 20 +ny = 40 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 +nessy = create_nes(comm=None, info=False, projection='lcc', + lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CALCULATE AREA OF EACH CELL POLYGON +st_time = timeit.default_timer() +nessy.create_shapefile() +geometry_list = nessy.shapefile['geometry'].values +geometry_area = calculate_geometry_area(geometry_list) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# EXPLORE GEOMETRIES AREA +print('Rank {0:03d}: Calculate geometry cell area: {1}'.format(rank, geometry_area)) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ===================================== CALCULATE CELLS AREA FROM EXISTING GRID ====================================== +# ====================================================================================================================== + +test_name = '2.4.3.Existing_file_grid_area' +if rank == 0: + print(test_name) + +# Original path: /esarchive/exp/monarch/a4dd/original_files/000/2022111512/MONARCH_d01_2022111512.nc +# Rotated grid from MONARCH +original_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc' + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(original_path, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CALCULATE AREA OF EACH CELL IN GRID +st_time = timeit.default_timer() +nessy.calculate_grid_area() +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# EXPLORE GRID AREA +print('Rank {0:03d}: Calculate grid cell area: {1}'.format(rank, nessy.cell_measures['cell_area'])) + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +# nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() +del nessy + +# ====================================================================================================================== +# ===================================== CALCULATE CELLS AREA FROM GEOMETRIES FROM EXISTING GRID ====================== +# ====================================================================================================================== + +test_name = '2.4.4.Existing_file_geometry_area' +if rank == 0: + print(test_name) + +# Original path: /esarchive/exp/monarch/a4dd/original_files/000/2022111512/MONARCH_d01_2022111512.nc +# Rotated grid from MONARCH +original_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc' + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(original_path, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CALCULATE AREA OF EACH CELL POLYGON +st_time = timeit.default_timer() +nessy.create_shapefile() +geometry_list = nessy.shapefile['geometry'].values +geometry_area = calculate_geometry_area(geometry_list) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# EXPLORE GEOMETRIES AREA +print('Rank {0:03d}: Calculate geometry cell area: {1}'.format(rank, geometry_area)) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() +del nessy + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/3.1-test_vertical_interp.py b/build/lib/tests/3.1-test_vertical_interp.py new file mode 100644 index 0000000000000000000000000000000000000000..9b78628a1f22a3f8f4d1b50429607198e0104a61 --- /dev/null +++ b/build/lib/tests/3.1-test_vertical_interp.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python +import sys +import timeit +import pandas as pd +from mpi4py import MPI +from nes import open_netcdf + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'T' + +result_path = "Times_test_3.1_vertical_interp_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'calculate'], + columns=['3.1.1.Interp', '3.1.1.Exterp']) + +# ====================================================================================================================== +# =============================================== VERTICAL INTERPOLATION ============================================= +# ====================================================================================================================== + +test_name = '3.1.1.Interp' +if rank == 0: + print(test_name) + +# READ +st_time = timeit.default_timer() + +# Original path: /esarchive/exp/monarch/a4dd/original_files/000/2022111512/MONARCH_d01_2022111512.nc +# Rotated grid from MONARCH +source_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc' + +# Read source data +source_data = open_netcdf(path=source_path, info=True) + +# Select time and load variables +source_data.keep_vars(['O3', 'mid_layer_height_agl']) +source_data.load() + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# INTERPOLATE +st_time = timeit.default_timer() +source_data.vertical_var_name = 'mid_layer_height_agl' +level_list = [0., 50., 100., 250., 500., 750., 1000., 2000., 3000., 5000.] +interp_nes = source_data.interpolate_vertical(level_list, info=True, kind='linear', extrapolate=None) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +interp_nes.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# =============================================== VERTICAL INTERPOLATION ============================================= +# ====================================================================================================================== + +test_name = '3.1.1.Exterp' +if rank == 0: + print(test_name) + +# READ +st_time = timeit.default_timer() + +# Original path: /esarchive/exp/monarch/a4dd/original_files/000/2022111512/MONARCH_d01_2022111512.nc +# Rotated grid from MONARCH +source_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc' + +# Read source data +source_data = open_netcdf(path=source_path, info=True) + +# Select time and load variables +source_data.keep_vars(['O3', 'mid_layer_height_agl']) +source_data.load() + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# INTERPOLATE +st_time = timeit.default_timer() +source_data.vertical_var_name = 'mid_layer_height_agl' +level_list = [0., 50., 100., 250., 500., 750., 1000., 2000., 3000., 5000., 21000, 25000, 30000, 40000, 50000] +interp_nes = source_data.interpolate_vertical(level_list, info=True, kind='linear', extrapolate=True) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +interp_nes.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/3.2-test_horiz_interp_bilinear.py b/build/lib/tests/3.2-test_horiz_interp_bilinear.py new file mode 100644 index 0000000000000000000000000000000000000000..4366a8de6a8d55e2b58fe2de65054caea4c63335 --- /dev/null +++ b/build/lib/tests/3.2-test_horiz_interp_bilinear.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python +import sys +import timeit +import pandas as pd +from mpi4py import MPI +from nes import open_netcdf, create_nes +import os + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'T' + +result_path = "Times_test_3.2_horiz_interp_bilinear_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'calculate', 'write'], + columns=['3.2.1.Only interp', '3.2.2.Create_WM', "3.2.3.Use_WM", "3.2.4.Read_WM"]) + +# NAMEE +src_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc" +var_list = ['O3'] + +# ====================================================================================================================== +# ====================================== Only interp ===================================================== +# ====================================================================================================================== +test_name = '3.2.1.NN_Only interp' +if rank == 0: + print(test_name) + sys.stdout.flush() + +# READING +st_time = timeit.default_timer() + +# Source data +src_nes = open_netcdf(src_path, parallel_method=parallel_method) +src_nes.keep_vars(var_list) +src_nes.load() + +# Destination Grid +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 397 +ny = 397 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 +dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, + times=src_nes.get_full_times()) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() + +interp_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='NN') +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() +interp_nes.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ====================================== Create_WM ===================================================== +# ====================================================================================================================== +test_name = '3.2.2.NN_Create_WM' +if rank == 0: + print(test_name) + +# READ +st_time = timeit.default_timer() + +# Read source data +src_nes = open_netcdf(src_path, parallel_method=parallel_method) +src_nes.keep_vars(var_list) +src_nes.load() + +# Destination Grid +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 397 +ny = 397 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 +dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, + times=src_nes.get_full_times()) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# Cleaning WM +if os.path.exists("NN_WM_NAMEE_to_IP.nc") and rank == 0: + os.remove("NN_WM_NAMEE_to_IP.nc") +comm.Barrier() + +st_time = timeit.default_timer() + +wm_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='NN', info=True, + weight_matrix_path="NN_WM_NAMEE_to_IP.nc", only_create_wm=True) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ====================================== Use_WM ===================================================== +# ====================================================================================================================== +test_name = "3.2.3.NN_Use_WM" +if rank == 0: + print(test_name) + +# READING +st_time = timeit.default_timer() + +# Source data +src_nes = open_netcdf(src_path, parallel_method=parallel_method) +src_nes.keep_vars(var_list) +src_nes.load() + +# Destination Grid +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 397 +ny = 397 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 + +dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, + times=src_nes.get_full_times()) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() + +interp_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='NN', wm=wm_nes) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() +interp_nes.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ====================================== Read_WM ===================================================== +# ====================================================================================================================== +test_name = "3.2.4.NN_Read_WM" +if rank == 0: + print(test_name) + +# READING +st_time = timeit.default_timer() + +# Source data +src_nes = open_netcdf(src_path, parallel_method=parallel_method) +src_nes.keep_vars(var_list) +src_nes.load() + +# Destination Grid +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 397 +ny = 397 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 + +dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, + times=src_nes.get_full_times()) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() + +interp_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='NN', + weight_matrix_path="NN_WM_NAMEE_to_IP.nc") +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() +interp_nes.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/3.3-test_horiz_interp_conservative.py b/build/lib/tests/3.3-test_horiz_interp_conservative.py new file mode 100644 index 0000000000000000000000000000000000000000..90aa72bd920c4d23f6ad950229139356b641634d --- /dev/null +++ b/build/lib/tests/3.3-test_horiz_interp_conservative.py @@ -0,0 +1,248 @@ +#!/usr/bin/env python +import sys +import os +import timeit +import pandas as pd +from mpi4py import MPI +from nes import open_netcdf, create_nes + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' + +result_path = "Times_test_3.3_horiz_interp_conservative.py_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'calculate', 'write'], + columns=['3.3.1.Only interp', '3.3.2.Create_WM', "3.3.3.Use_WM", "3.3.4.Read_WM"]) + +src_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc" +src_type = 'NAMEE' +var_list = ['O3'] +# src_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/nox_no_201505.nc" +# src_type = 'CAMS_glob_antv21' +# var_list = ['nox_no'] + +# ====================================================================================================================== +# ====================================== Only interp ===================================================== +# ====================================================================================================================== + +test_name = '3.3.1.Only interp' +if rank == 0: + print(test_name) + +# READ +# final_dst.variables[var_name]['data'][time, lev] = np.sum(weights * src_aux, axis=1) + +st_time = timeit.default_timer() + +# Read source data +src_nes = open_netcdf(src_path, parallel_method=parallel_method) +src_nes.keep_vars(var_list) +src_nes.load() + +# Create destination grid +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 397 +ny = 397 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 +dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, + times=src_nes.get_full_times()) +dst_type = "IP" + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +st_time = timeit.default_timer() + +# INTERPOLATE +interp_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='Conservative', info=False) +# interp_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='Conservative', weight_matrix_path='T_WM.nc') +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +interp_nes.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size), serial=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ====================================== Create_WM ===================================================== +# ====================================================================================================================== + +test_name = '3.3.2.Create_WM' +if rank == 0: + print(test_name) + +# READING +st_time = timeit.default_timer() + +# Read source data +src_nes = open_netcdf(src_path, parallel_method=parallel_method) +src_nes.keep_vars(var_list) +src_nes.load() + +# Create destination grid +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 397 +ny = 397 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 +dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, + times=src_nes.get_full_times()) +dst_type = "IP" + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# Cleaning WM +if os.path.exists("CONS_WM_{0}_to_{1}.nc".format(src_type, dst_type)) and rank == 0: + os.remove("CONS_WM_{0}_to_{1}.nc".format(src_type, dst_type)) +comm.Barrier() + +# INTERPOLATE +st_time = timeit.default_timer() +wm_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='Conservative', info=True, + weight_matrix_path="CONS_WM_{0}_to_{1}.nc".format(src_type, dst_type), + only_create_wm=True) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# WRITE +# st_time = timeit.default_timer() +# interp_nes.to_netcdf(test_name.replace(' ', '_') + ".nc") +# comm.Barrier() +# result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ====================================== Use_WM ===================================================== +# ====================================================================================================================== + +test_name = "3.3.3.Use_WM" +if rank == 0: + print(test_name) + +# READ +st_time = timeit.default_timer() + +# Read source data +src_nes = open_netcdf(src_path, parallel_method=parallel_method) +src_nes.keep_vars(var_list) +src_nes.load() + +# Create destination grid +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 397 +ny = 397 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 +dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, + times=src_nes.get_full_times()) +dst_type = "IP" + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# INTERPOLATE +st_time = timeit.default_timer() +interp_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='Conservative', wm=wm_nes) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +interp_nes.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ====================================== Read_WM ===================================================== +# ====================================================================================================================== + +test_name = "3.3.4.Read_WM" +if rank == 0: + print(test_name) + +# READ +st_time = timeit.default_timer() + +# Read source data +src_nes = open_netcdf(src_path, parallel_method=parallel_method) +src_nes.keep_vars(var_list) +src_nes.load() + +# Create destination grid +lat_1 = 37 +lat_2 = 43 +lon_0 = -3 +lat_0 = 40 +nx = 397 +ny = 397 +inc_x = 4000 +inc_y = 4000 +x_0 = -807847.688 +y_0 = -797137.125 +dst_nes = create_nes(comm=None, info=False, projection='lcc', lat_1=lat_1, lat_2=lat_2, lon_0=lon_0, lat_0=lat_0, + nx=nx, ny=ny, inc_x=inc_x, inc_y=inc_y, x_0=x_0, y_0=y_0, parallel_method=parallel_method, + times=src_nes.get_full_times()) +dst_type = "IP" + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# INTERPOLATE +st_time = timeit.default_timer() +interp_nes = src_nes.interpolate_horizontal(dst_grid=dst_nes, kind='Conservative', + weight_matrix_path="CONS_WM_{0}_to_{1}.nc".format(src_type, dst_type)) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +interp_nes.to_netcdf(test_name.replace(' ', '_') + "{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/4.1-test_stats.py b/build/lib/tests/4.1-test_stats.py new file mode 100644 index 0000000000000000000000000000000000000000..f11206c72ebc5d7d08bfd90be4dfa2ccbfd0b0a0 --- /dev/null +++ b/build/lib/tests/4.1-test_stats.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python + +import sys +import timeit +import pandas as pd +from mpi4py import MPI +from nes import open_netcdf + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' + +result_path = "Times_test_4.1_daily_stats_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'calculate', 'write'], + columns=['4.1.1.Mean']) + +# ====================================================================================================================== +# ============================================== CALCULATE DAILY MEAN ================================================ +# ====================================================================================================================== + +test_name = '4.1.1.Mean' +if rank == 0: + print(test_name) + +# Original path: /esarchive/exp/monarch/a4dd/original_files/000/2022111512/MONARCH_d01_2022111512.nc +# Rotated grid from MONARCH +cams_file = '/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc' + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(path=cams_file, info=True, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# LOAD VARIABLES +nessy.keep_vars('O3') +nessy.load() + +# CALCULATE MEAN +st_time = timeit.default_timer() +nessy.daily_statistic(op="mean") +print(nessy.variables['O3']['cell_methods']) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# ========================================== CALCULATE 8-HOUR ROLLING MEAN =========================================== +# ====================================================================================================================== + +test_name = '4.1.2.Rolling_Mean' +if rank == 0: + print(test_name) + +# Original path: /esarchive/exp/monarch/a4dd/original_files/000/2022111512/MONARCH_d01_2022111512.nc +# Rotated grid from MONARCH +cams_file = '/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc' + +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(path=cams_file, info=True, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CALCULATE MEAN +st_time = timeit.default_timer() +rolling_mean = nessy.rolling_mean(var_list='O3', hours=8) +print(rolling_mean.variables['O3']['data']) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size)) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/4.2-test_sum.py b/build/lib/tests/4.2-test_sum.py new file mode 100644 index 0000000000000000000000000000000000000000..2f1a93c4de5100fe510f35cccd1826ddcd070ae0 --- /dev/null +++ b/build/lib/tests/4.2-test_sum.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python + +import sys +from mpi4py import MPI +import pandas as pd +import timeit +import numpy as np +from nes import create_nes + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' + +result_path = "Times_test_4.2_sum_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'calculate', 'write'], + columns=['4.2.1.Sum']) + +# ====================================================================================================================== +# =================================== CENTROID FROM NEW FILE =================================================== +# ====================================================================================================================== + +test_name = '4.2.1.Sum' + +if rank == 0: + print(test_name) + +# CREATE GRID +st_time = timeit.default_timer() +projection = 'regular' +lat_orig = 41.1 +lon_orig = 1.8 +inc_lat = 0.2 +inc_lon = 0.2 +n_lat = 100 +n_lon = 100 +nessy = create_nes(projection=projection, lat_orig=lat_orig, lon_orig=lon_orig, inc_lat=inc_lat, inc_lon=inc_lon, + n_lat=n_lat, n_lon=n_lon) + +# ADD VARIABLES +nessy.variables = {'var_aux': {'data': np.ones((len(nessy.time), len(nessy.lev['data']), + nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]))}} + +# CREATE GRID WITH COPY +nessy_2 = nessy.copy(copy_vars=True) + +# ADD VARIABLES +for var_name in nessy_2.variables.keys(): + nessy_2.variables[var_name]['data'] *= 2 + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# SUM +st_time = timeit.default_timer() +nessy_3 = nessy + nessy_2 +print('Sum result', nessy_3.variables['var_aux']['data']) + +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# WRITE +st_time = timeit.default_timer() +nessy_3.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/4.3-test_write_timestep.py b/build/lib/tests/4.3-test_write_timestep.py new file mode 100644 index 0000000000000000000000000000000000000000..b50c74b13eea96c00a9d4f9ac3a67e29ecceb58e --- /dev/null +++ b/build/lib/tests/4.3-test_write_timestep.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python + +import sys +from mpi4py import MPI +import pandas as pd +import timeit +from datetime import datetime, timedelta +import numpy as np +from nes import create_nes + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' + +result_path = "Times_test_4.3_write_time_step_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'calculate', 'write'], + columns=['4.3.1.Parallel_Write', '4.3.2.Serial_Write']) + +# ====================================================================================================================== +# =================================== PARALLEL WRITE =================================================== +# ====================================================================================================================== + +test_name = '4.3.1.Parallel_Write' + +if rank == 0: + print(test_name) + +st_time = timeit.default_timer() +# CREATE GRID +centre_lat = 51 +centre_lon = 10 +west_boundary = -35 +south_boundary = -27 +inc_rlat = 0.2 +inc_rlon = 0.2 +nessy = create_nes(comm=None, info=False, projection='rotated', + centre_lat=centre_lat, centre_lon=centre_lon, + west_boundary=west_boundary, south_boundary=south_boundary, + inc_rlat=inc_rlat, inc_rlon=inc_rlon) + +# ADD VARIABLES +nessy.variables = {'var1': {'data': None, 'units': 'kg.s-1', 'dtype': np.float32}, + 'var2': {'data': None, 'units': 'kg.s-1', 'dtype': np.float32}} +time_list = [datetime(year=2023, month=1, day=1) + timedelta(hours=x) for x in range(24)] +nessy.set_time(time_list) + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CREATE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name + '.nc', keep_open=True, info=False) + +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# CALCULATE & APPEND +result.loc['calculate', test_name] = 0 + +for i_time, time_aux in enumerate(time_list): + # CALCULATE + st_time = timeit.default_timer() + + nessy.variables['var1']['data'] = np.ones((1, 1, nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1])) * i_time + nessy.variables['var2']['data'] = np.ones((1, 1, nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1])) * i_time + + comm.Barrier() + result.loc['calculate', test_name] += timeit.default_timer() - st_time + + # APPEND + st_time = timeit.default_timer() + nessy.append_time_step_data(i_time) + comm.Barrier() + if i_time == len(time_list) - 1: + nessy.close() + result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# =================================== SERIAL WRITE =================================================== +# ====================================================================================================================== + +test_name = '4.3.2.Serial_Write' + +if rank == 0: + print(test_name) + +st_time = timeit.default_timer() +# CREATE GRID +centre_lat = 51 +centre_lon = 10 +west_boundary = -35 +south_boundary = -27 +inc_rlat = 0.2 +inc_rlon = 0.2 +nessy = create_nes(comm=None, info=False, projection='rotated', + centre_lat=centre_lat, centre_lon=centre_lon, + west_boundary=west_boundary, south_boundary=south_boundary, + inc_rlat=inc_rlat, inc_rlon=inc_rlon) + +# ADD VARIABLES +nessy.variables = {'var1': {'data': None, 'units': 'kg.s-1', 'dtype': np.float32}, + 'var2': {'data': None, 'units': 'kg.s-1', 'dtype': np.float32}} +time_list = [datetime(year=2023, month=1, day=1) + timedelta(hours=x) for x in range(24)] +nessy.set_time(time_list) + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CREATE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name + '.nc', keep_open=True, info=False, serial=True) + +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# CALCULATE & APPEND +result.loc['calculate', test_name] = 0 + +for i_time, time_aux in enumerate(time_list): + # CALCULATEATE + st_time = timeit.default_timer() + + nessy.variables['var1']['data'] = np.ones((1, 1, nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1])) * i_time + nessy.variables['var2']['data'] = np.ones((1, 1, nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1])) * i_time + + comm.Barrier() + result.loc['calculate', test_name] += timeit.default_timer() - st_time + + # APPEND + st_time = timeit.default_timer() + nessy.append_time_step_data(i_time) + comm.Barrier() + if i_time == len(time_list) - 1: + nessy.close() + result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") diff --git a/build/lib/tests/__init__.py b/build/lib/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/build/lib/tests/unit/__init__.py b/build/lib/tests/unit/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/build/lib/tests/unit/test_imports.py b/build/lib/tests/unit/test_imports.py new file mode 100644 index 0000000000000000000000000000000000000000..346ebadcddcb721858236fa969b52fca9a464140 --- /dev/null +++ b/build/lib/tests/unit/test_imports.py @@ -0,0 +1,106 @@ +import unittest + + +class TestImports(unittest.TestCase): + def test_imports(self): + imports_to_test = [ + 'sys', 'os', 'time', 'timeit', 'math', 'calendar', 'datetime', + 'warnings', 'geopandas', 'pandas', 'numpy', 'shapely', + 'mpi4py', 'netCDF4', 'pyproj', 'configargparse', 'filelock', + 'eccodes'] + + for module_name in imports_to_test: + with self.subTest(module=module_name): + try: + __import__(module_name) + except ImportError as e: + self.fail(f"Failed to import {module_name}: {e}") + + def test_eccodes(self): + try: + import eccodes + from eccodes import codes_grib_new_from_file + from eccodes import codes_keys_iterator_new + from eccodes import codes_keys_iterator_next + from eccodes import codes_keys_iterator_get_name + from eccodes import codes_get_string + from eccodes import codes_keys_iterator_delete + from eccodes import codes_clone + from eccodes import codes_set + from eccodes import codes_set_values + from eccodes import codes_write + from eccodes import codes_release + from eccodes import codes_samples_path + import os + os.path.join(codes_samples_path(), 'GRIB2.tmpl') + + print("Eccodes: ", eccodes.__version__) + except ImportError as e: + self.fail(f"Import error: {e}") + + def test_geopandas(self): + try: + import geopandas + print("GeoPandas: ", geopandas.__version__) + except ImportError as e: + self.fail(f"Import error: {e}") + + def test_pandas(self): + try: + import pandas + print("Pandas: ", pandas.__version__) + except ImportError as e: + self.fail(f"Import error: {e}") + + def test_numpy(self): + try: + import numpy + print("NumPy: ", numpy.__version__) + except ImportError as e: + self.fail(f"Import error: {e}") + + def test_shapely(self): + try: + import shapely + print("Shapely: ", shapely.__version__) + except ImportError as e: + self.fail(f"Import error: {e}") + + def test_mpi(self): + try: + import mpi4py + print("mpi4py: ", mpi4py.__version__) + from mpi4py import MPI + print("MPI Vendor: ", MPI.get_vendor()) + except ImportError as e: + self.fail(f"Import error: {e}") + + def test_netcdf4(self): + try: + import netCDF4 + print("netCDF4 version:", netCDF4.__version__) + print("HDF5 version:", netCDF4.__hdf5libversion__) + print("NetCDF library version:", netCDF4.__netcdf4libversion__) + except ImportError as e: + self.fail(f"Import error: {e}") + + def test_netcdf4_parallel(self): + try: + from mpi4py import MPI + import numpy as np + from netCDF4 import Dataset + nc = Dataset('/tmp/parallel_test.nc', 'w', parallel=True, comm=MPI.COMM_WORLD, info=MPI.Info()) + nc.close() + except ImportError as e: + self.fail(f"Import error: {e}") + + def test_pyproj(self): + try: + import pyproj + print("pyproj: ", pyproj.__version__) + except ImportError as e: + self.fail(f"Import error: {e}") + + +if __name__ == '__main__': + unittest.main() diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index a5533e42da531e4eecc68b7e9c35a3bde7aaa116..d859431a04423b4d4981170ccde230166d7a41bd 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -1,11 +1,12 @@ #!/usr/bin/env python -import sys +import os, sys from gc import collect from warnings import warn +from math import isclose from numpy import (array, ndarray, abs, mean, diff, dstack, append, tile, empty, unique, stack, vstack, full, isnan, flipud, nan, float32, float64, ma, generic, character, issubdtype, arange, newaxis, concatenate, - split, cumsum, zeros, column_stack) + split, cumsum, zeros, column_stack, argsort, take) from pandas import Index, concat from geopandas import GeoDataFrame from datetime import timedelta, datetime @@ -206,7 +207,7 @@ class Nes(object): create_nes : bool Indicates if you want to create the object from scratch (True) or through an existing file. balanced : bool - Indicates if you want a balanced parallelization or not. + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. times : List[datetime] or None List of times to substitute the current ones while creation. @@ -301,7 +302,7 @@ class Nes(object): # Lazy variables self.variables = self._get_lazy_variables() - + # Complete dimensions self._full_time = self.__get_time() self._full_time_bnds = self.__get_time_bnds() @@ -373,7 +374,7 @@ class Nes(object): @staticmethod def new(comm=None, path=None, info=False, dataset=None, parallel_method="Y", - avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, + avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ Initialize the Nes class. @@ -402,7 +403,7 @@ class Nes(object): create_nes : bool Indicates if you want to create the object from scratch (True) or through an existing file. balanced : bool - Indicates if you want a balanced parallelization or not. + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. times : List[datetime] or None List of times to substitute the current ones while creation. @@ -422,13 +423,13 @@ class Nes(object): ------- int Max length of the string data - """ - + """ + if "strlen" in self.dataset.dimensions: strlen = self.dataset.dimensions["strlen"].size else: return None - + return strlen def set_strlen(self, strlen=75): @@ -442,7 +443,7 @@ class Nes(object): strlen : int or None Max length of the string """ - + self.strlen = strlen return None @@ -960,7 +961,7 @@ class Nes(object): self.read_axis_limits = self._get_read_axis_limits() self.write_axis_limits = self._get_write_axis_limits() - + return None def set_climatology(self, is_climatology): @@ -1098,11 +1099,11 @@ class Nes(object): ---------- coordinates : array Coordinates in degrees (latitude or longitude). - inc : float + inc : float Increment between centre values. - spatial_nv : int + spatial_nv : int Non-mandatory parameter that informs the number of vertices that the boundaries must have. Default: 2. - inverse : bool + inverse : bool For some grid latitudes. Returns @@ -1110,7 +1111,7 @@ class Nes(object): bounds : array An Array with as many elements as vertices for each value of coords. """ - + # Create new arrays moving the centres half increment less and more. coords_left = coordinates - inc / 2 coords_right = coordinates + inc / 2 @@ -1300,7 +1301,7 @@ class Nes(object): aux_nessy.variables[var_name][att_name] = att_value else: aux_nessy.variables[var_name]["data"] = aux_nessy.variables[var_name]["data"][[idx_time]] - + return aux_nessy def sel(self, hours_start=None, time_min=None, hours_end=None, time_max=None, lev_min=None, lev_max=None, @@ -1450,7 +1451,7 @@ class Nes(object): self.lon_min = None return None - + def _get_projection_data(self, create_nes, **kwargs): """ Retrieves projection data based on grid details. @@ -1804,7 +1805,7 @@ class Nes(object): # ================================================================================================================== # Reading # ================================================================================================================== - + def _get_read_axis_limits(self): """ Calculate the 4D reading axis limits depending on if them have to balanced or not. @@ -1836,7 +1837,7 @@ class Nes(object): "y_min": None, "y_max": None, "z_min": None, "z_max": None, "t_min": None, "t_max": None} - + idx = self._get_idx_intervals() if self.parallel_method == "Y": y_len = idx["idx_y_max"] - idx["idx_y_min"] @@ -1855,7 +1856,7 @@ class Nes(object): axis_limits["t_min"] = idx["idx_t_min"] axis_limits["t_max"] = idx["idx_t_max"] - + elif self.parallel_method == "X": x_len = idx["idx_x_max"] - idx["idx_x_min"] if x_len < self.size: @@ -1873,7 +1874,7 @@ class Nes(object): axis_limits["t_min"] = idx["idx_t_min"] axis_limits["t_max"] = idx["idx_t_max"] - + elif self.parallel_method == "T": t_len = idx["idx_t_max"] - idx["idx_t_min"] if t_len < self.size: @@ -2616,10 +2617,10 @@ class Nes(object): else: raise NotImplementedError("Error with {0}. Only can be read netCDF with 4 dimensions or less".format( var_name)) - + # Unmask array data = self._unmask_array(data) - + return data def load(self, var_list=None): @@ -2633,7 +2634,7 @@ class Nes(object): var_list : List, str, None List (or single string) of the variables to be loaded. """ - + if (self.__ini_path is None) and (self.dataset is None): raise RuntimeError("Only data from existing files can be loaded.") @@ -2674,7 +2675,7 @@ class Nes(object): @staticmethod def _unmask_array(data): - """ + """ Missing to nan. This operation is done because sometimes the missing value is lost during the calculation. Parameters @@ -2687,7 +2688,7 @@ class Nes(object): array Unmasked array. """ - + if isinstance(data, ma.MaskedArray): try: data = data.filled(nan) @@ -2706,7 +2707,7 @@ class Nes(object): data_type : str or Type Data type, by default "float32" """ - + for var_name, var_info in self.variables.items(): if isinstance(var_info["data"], ndarray): self.variables[var_name]["data"] = self.variables[var_name]["data"].astype(data_type) @@ -2778,7 +2779,7 @@ class Nes(object): # ================================================================================================================== # Writing # ================================================================================================================== - + def _get_write_axis_limits(self): """ Calculate the 4D writing axis limits depending on if them have to balanced or not. @@ -3225,26 +3226,26 @@ class Nes(object): var_dtype = var_dict["data"].dtype if var_dtype is object: raise TypeError("Data dtype is object. Define dtype explicitly as dictionary key 'dtype'") - + if var_dict["data"] is not None: - + # Ensure data is of type numpy array (to create NES) if not isinstance(var_dict["data"], (ndarray, generic)): try: var_dict["data"] = array(var_dict["data"]) except AttributeError: raise AttributeError("Data for variable {0} must be a numpy array.".format(var_name)) - + # Convert list of strings to chars for parallelization if issubdtype(var_dtype, character): var_dict["data_aux"] = self._str2char(var_dict["data"]) var_dims += ("strlen",) var_dtype = "S1" - + if self.info: print("Rank {0:03d}: Writing {1} var ({2}/{3})".format( self.rank, var_name, i + 1, len(self.variables))) - + if not chunking: var = netcdf.createVariable(var_name, var_dtype, var_dims, zlib=self.zip_lvl > 0, complevel=self.zip_lvl) @@ -3280,7 +3281,7 @@ class Nes(object): self.write_axis_limits["z_min"]:self.write_axis_limits["z_max"], self.write_axis_limits["y_min"]:self.write_axis_limits["y_max"], self.write_axis_limits["x_min"]:self.write_axis_limits["x_max"]] = 0 - + elif len(att_value.shape) == 5: if "strlen" in var_dims: var[self.write_axis_limits["t_min"]:self.write_axis_limits["t_max"], @@ -3304,11 +3305,11 @@ class Nes(object): :] = att_value else: raise NotImplementedError("It is not possible to write 3D variables.") - + if self.info: print("Rank {0:03d}: Var {1} data ({2}/{3})".format( self.rank, var_name, i + 1, len(self.variables))) - + elif att_name not in ["chunk_size", "var_dims", "dimensions", "dtype", "data_aux"]: var.setncattr(att_name, att_value) @@ -3702,7 +3703,7 @@ class Nes(object): shapefile : GeoPandasDataFrame Shapefile dataframe. """ - + if self.shapefile is None: if self.lat_bnds is None or self.lon_bnds is None: @@ -3743,14 +3744,14 @@ class Nes(object): fids = self.get_fids() gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=geometry, crs="EPSG:4326") self.shapefile = gdf - + else: gdf = self.shapefile - + return gdf def write_shapefile(self, path): - """ + """ Save spatial GeoDataFrame (shapefile). Parameters @@ -3758,10 +3759,10 @@ class Nes(object): path : str Path to the output file. """ - + if self.shapefile is None: raise ValueError("Shapefile was not created.") - + if self.size == 1: # In serial, avoid gather self.shapefile.to_file(path) @@ -3771,13 +3772,13 @@ class Nes(object): if self.master: data = concat(data) data.to_file(path) - + return None def to_shapefile(self, path, time=None, lev=None, var_list=None, info=True): """ Create shapefile from NES data. - + 1. Create grid shapefile. 2. Add variables to shapefile (as independent function). 3. Write shapefile. @@ -3795,7 +3796,7 @@ class Nes(object): info: bool Flag to allow/suppress warnings when the 'time' or 'lev' parameters are None. Default is True. """ - + # If list is not defined, get all variables if var_list is None: var_list = list(self.variables.keys()) @@ -3835,7 +3836,7 @@ class Nes(object): if time not in self.time: raise ValueError("Time {} is not available. Choose from {}".format(time, self.time)) idx_time = self.time.index(time) - + # Create shapefile self.create_shapefile() @@ -3880,7 +3881,7 @@ class Nes(object): for lon_ind in range(0, len(self.lon["data"])): centroids.append(Point(self.lon["data"][lon_ind], self.lat["data"][lat_ind])) - + # Create dataframe containing all points fids = self.get_fids() centroids_gdf = GeoDataFrame(index=Index(name="FID", data=fids.ravel()), geometry=centroids, crs="EPSG:4326") @@ -4155,9 +4156,9 @@ class Nes(object): keep_nan : bool Indicates if you want to keep nan values after the interpolation """ - + return horizontal_interpolation.interpolate_horizontal( - self, dst_grid, weight_matrix_path=weight_matrix_path, kind=kind, n_neighbours=n_neighbours, info=info, + self, dst_grid, weight_matrix_path=weight_matrix_path, kind=kind, n_neighbours=n_neighbours, info=info, to_providentia=to_providentia, only_create_wm=only_create_wm, wm=wm, flux=flux, keep_nan=keep_nan) def spatial_join(self, ext_shp, method=None, var_list=None, info=False, apply_bbox=True): @@ -4199,7 +4200,7 @@ class Nes(object): self.cell_measures["cell_area"] = {"data": grid_area} else: grid_area = self.cell_measures["cell_area"]["data"] - + return grid_area @staticmethod @@ -4264,3 +4265,86 @@ class Nes(object): Dictionary with data of grid edge longitudes. """ raise NotImplementedError("create_providentia_exp_grid_edge_coordinates function is not implemented by default") + + def _detect_longitude_format(self): + """ + Determines whether longitude values are in the [0, 360] or [-180, 180] range. + + Returns + --------- + bool: True if longitudes are in [0, 360], False otherwise. + """ + longitudes = self.lon["data"] + longitudes = array(longitudes) + if all((longitudes >= 0) & (longitudes <= 360)): + return True + elif all((longitudes >= -180) & (longitudes <= 180)): + return False + else: + return False + + def _check_if_data_is_loaded(self): + """ + Verifies that data is loaded for all variables. + + Raises + ------- + ValueError: If any variable's data is missing. + """ + for variable in self.variables.keys(): + if self.variables[variable]["data"] is None: + raise ValueError(f"All variables data must be loaded before using this function. Data for {variable} is not loaded.") + + def convert_longitudes(self): + """ + Converts longitudes from the [0, 360] range to the [-180, 180] range. + + Parameters + ------------ + path (str): The file path where the converted data will be saved. + + Raises + -------- + ValueError: If the method is run in parallel processing mode. + ValueError: If longitudes are already in [-180, 180] format or an unrecognized format. + ValueError: If data is not fully loaded before conversion. + """ + if self.comm.size > 1: + raise ValueError("This method is currently only available in serial.") + + if not self._detect_longitude_format(): + raise ValueError("Longitudes are already in [-180, 180] format or another unrecognised format.") + + self._check_if_data_is_loaded() + + # adjust longitude values + lon = self.lon + lon_data = lon["data"] + lon_data = lon_data % 360 + lon_data[lon_data > 180] -= 360 + sorted_indices = argsort(lon_data) + self.lon["data"] = lon_data[sorted_indices] + self.set_full_longitudes(self.lon) + + # adjust longitude bounds + lon_bnds_data = self.lon_bnds["data"] + lon_bnds_data = lon_bnds_data % 360 + lon_bnds_data[lon_bnds_data > 180] -= 360 + lon_bnds_sorted = lon_bnds_data[sorted_indices] + + if (lon_bnds_sorted[0][0] > lon_bnds_sorted[0][1]) and (isclose(lon_bnds_sorted[0][0], 180)): + lon_bnds_sorted[0][0] = -180 + elif (lon_bnds_sorted[-1][0] > lon_bnds_sorted[-1][1]) and (isclose(lon_bnds_sorted[-1][1], -180)): + lon_bnds_sorted[-1][1] = 180 + + self.lon_bnds["data"] = lon_bnds_sorted + self.set_full_longitudes_boundaries(self.lon_bnds) + + # adjust variables which have longitude in their dimensions + for name, var in self.variables.items(): + if "longitude" in var["dimensions"]: + data = var["data"] + reordered_data = take(data, sorted_indices, axis=3) + self.variables[name]["data"] = reordered_data + + return None diff --git a/nes/utilities/reorder_longitudes_cli.py b/nes/utilities/reorder_longitudes_cli.py new file mode 100644 index 0000000000000000000000000000000000000000..8f97c80366270ba535fe7d34a912146256e513bc --- /dev/null +++ b/nes/utilities/reorder_longitudes_cli.py @@ -0,0 +1,38 @@ +from ..load_nes import open_netcdf +import argparse +from mpi4py import MPI + + +def reorder_longitudes_cli(): + """ + Converts longitudes in a NetCDF file and saves the modified file. + + Returns: + None + """ + comm = MPI.COMM_WORLD + if comm.Get_size() > 1: + raise ValueError("Parallel not implemented yet. This script must be run with a single process.") + + parser = argparse.ArgumentParser(description="Reorder longitudes in a NetCDF file.") + + # Define expected arguments + parser.add_argument("infile", help="Input NetCDF file path") + parser.add_argument("outfile", help="Output NetCDF file path") + + # Parse arguments + args = parser.parse_args() + + # Call your function with parsed arguments + infile = args.infile + outfile = args.outfile + + # open + nc = open_netcdf(infile) + # load + nc.load() + # convert longitudes from default_projections + nc.convert_longitudes() + # save + nc.to_netcdf(outfile) + return None diff --git a/setup.py b/setup.py index a76cd35d44f825a4bc38be812b6b915b3d096dae..71994b51e48ab936247fa25d279ecb8c0371a552 100755 --- a/setup.py +++ b/setup.py @@ -58,4 +58,10 @@ setup( setup_requires=REQUIREMENTS['setup'], install_requires=REQUIREMENTS['install'], python_requires=">=3.7", -) + + entry_points={ + "console_scripts": [ + "nes_reorder_longitudes=nes.utilities.reorder_longitudes_cli:reorder_longitudes_cli", + ] + } +) \ No newline at end of file diff --git a/tests/2.5-test_longitude_conversion.py b/tests/2.5-test_longitude_conversion.py new file mode 100644 index 0000000000000000000000000000000000000000..ac28da9156aba772bcf2fdb7abf75f81e819e3dc --- /dev/null +++ b/tests/2.5-test_longitude_conversion.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python + +import sys +import timeit +import pandas as pd +from mpi4py import MPI +from nes import open_netcdf + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +parallel_method = 'Y' + +result_path = "Times_test_2.5_longitudes_{0}_{1:03d}.csv".format(parallel_method, size) +result = pd.DataFrame(index=['read', 'calculate', 'write'], + columns=['2.5.1.Longitude_conversion']) + +# ====================================================================================================================== +# ===================================== FILE WITH LONGITUDES in [0, 360] ============================================= +# ====================================================================================================================== + +test_name = "2.5.1.Longitude_conversion" +if rank == 0: + print(test_name) + +# READ +st_time = timeit.default_timer() + +# NC file with longitudes in [0, 360]. +path_1 = '/gpfs/projects/bsc32/models/NES_tutorial_data/preprocessed_backup.nc' +nessy_1 = open_netcdf(path=path_1, parallel_method=parallel_method, info=True) + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# CONVERT LONGITUDES +st_time = timeit.default_timer() +nessy_1.load() +print('Rank', rank, '-', 'Convert Longitudes', nessy_1.convert_longitudes()) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +# WRITE +path_2 = test_name.replace(' ', '_') + "_{0:03d}.nc".format(size) +st_time = timeit.default_timer() +nessy_1.to_netcdf(path_2, info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +if rank == 0: + result.to_csv(result_path) + print("TEST PASSED SUCCESSFULLY!!!!!") \ No newline at end of file diff --git a/tests/clean_output.sh b/tests/clean_output.sh index bac741a5f600de4401b549f75292230a6e13953b..d5d7810511ea4b17272fa2bc014c49072d6411a9 100644 --- a/tests/clean_output.sh +++ b/tests/clean_output.sh @@ -8,6 +8,7 @@ rm 2.1.* rm 2.2.* rm 2.3.* rm 2.4.* +rm 2.5.* rm 3.1.* rm 3.2.* diff --git a/tests/run_scalability_tests_nord3v2.sh b/tests/run_scalability_tests_nord3v2.sh index 4c28785985d815926b57f6721396988578a4210d..3c2dd25441702a75f2e178aae55981020aeff87d 100644 --- a/tests/run_scalability_tests_nord3v2.sh +++ b/tests/run_scalability_tests_nord3v2.sh @@ -8,7 +8,7 @@ module load Python/3.7.4-GCCcore-8.3.0 module load NES/1.1.3-nord3-v2-foss-2019b-Python-3.7.4 -for EXE in "1.1-test_read_write_projection.py" "1.2-test_create_projection.py" "1.3-test_selecting.py" "2.1-test_spatial_join.py" "2.2-test_create_shapefile.py" "2.3-test_bounds.py" "2.4-test_cell_area.py" "3.1-test_vertical_interp.py" "3.2-test_horiz_interp_bilinear.py" "3.3-test_horiz_interp_conservative.py" "4.1-test_stats.py" "4.2-test_sum.py" "4.3-test_write_timestep.py" +for EXE in "1.1-test_read_write_projection.py" "1.2-test_create_projection.py" "1.3-test_selecting.py" "2.1-test_spatial_join.py" "2.2-test_create_shapefile.py" "2.3-test_bounds.py" "2.4-test_cell_area.py" "2.5-test_longitude_conversion.py" "3.1-test_vertical_interp.py" "3.2-test_horiz_interp_bilinear.py" "3.3-test_horiz_interp_conservative.py" "4.1-test_stats.py" "4.2-test_sum.py" "4.3-test_write_timestep.py" do for nprocs in 1 2 4 8 16 do diff --git a/tests/test_bash.mn4.sh b/tests/test_bash.mn4.sh index 5edea6722bf95d090ce5f5bbfa5e330e1c1bfbd0..3f8dc9e7360b6bea1a0ac114cd9483ce4b51017d 100644 --- a/tests/test_bash.mn4.sh +++ b/tests/test_bash.mn4.sh @@ -28,6 +28,7 @@ mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.1-test_spatial_join.py mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.2-test_create_shapefile.py mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.3-test_bounds.py mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.4-test_cell_area.py +mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.5-test_longitude_conversion.py mpirun --mca mpi_warn_on_fork 0 -np 4 python 3.1-test_vertical_interp.py mpirun --mca mpi_warn_on_fork 0 -np 4 python 3.2-test_horiz_interp_bilinear.py diff --git a/tests/test_bash.mn5.sh b/tests/test_bash.mn5.sh index 13e0a1b92542f3b8cdc5046e79d002b2e80f46a8..0966f3c34c89106e86b9a2eb46719dc2dbbc3e83 100644 --- a/tests/test_bash.mn5.sh +++ b/tests/test_bash.mn5.sh @@ -15,10 +15,11 @@ module purge module load anaconda source /apps/GPP/ANACONDA/2023.07/etc/profile.d/conda.sh conda deactivate -conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/NES_dev -export PYTHONPATH=/gpfs/projects/bsc32/repository/apps/conda_envs/NES_dev/lib/python3.12/site-packages +conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/PHENOMENA_v0.2.0_bsc124195 +#export PYTHONPATH=/gpfs/projects/bsc32/repository/apps/conda_envs/NES_dev/lib/python3.12/site-packages +export PYTHONPATH=/gpfs/projects/bsc32/repository/apps/conda_envs/PHENOMENA_v0.2.0_${USER} export SLURM_CPU_BIND=none -export PYTHONPATH=/gpfs/scratch/bsc32/bsc032538/AC_PostProcess/NES:$PYTHONPATH +#export PYTHONPATH=/gpfs/scratch/bsc32/bsc032538/AC_PostProcess/NES:$PYTHONPATH #conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/NES_v1.1.4 @@ -26,7 +27,7 @@ export PYTHONPATH=/gpfs/scratch/bsc32/bsc032538/AC_PostProcess/NES:$PYTHONPATH #export PYTHONPATH=/gpfs/projects/bsc32/repository/apps/conda_envs/NES_v1.1.4/lib/python3.12/site-packages #export SLURM_CPU_BIND=none -cd /gpfs/scratch/bsc32/bsc032538/AC_PostProcess/NES/tests || exit +cd /gpfs/scratch/bsc32/${USER}/AC_PostProcess/NES/tests || exit mpirun -np 4 python 1.1-test_read_write_projection.py mpirun -np 4 python 1.2-test_create_projection.py @@ -36,6 +37,7 @@ mpirun -np 4 python 2.1-test_spatial_join.py mpirun -np 4 python 2.2-test_create_shapefile.py mpirun -np 4 python 2.3-test_bounds.py mpirun -np 4 python 2.4-test_cell_area.py +mpirun -np 1 python 2.5-test_longitude_conversion.py mpirun -np 4 python 3.1-test_vertical_interp.py mpirun -np 4 python 3.2-test_horiz_interp_bilinear.py diff --git a/tests/test_bash.nord3v2.sh b/tests/test_bash.nord3v2.sh index ed5815320228191ac6456f8e6d97354b94dfe9cc..5cfe091990dc527e5d909c1813bbab82f36aca80 100644 --- a/tests/test_bash.nord3v2.sh +++ b/tests/test_bash.nord3v2.sh @@ -26,6 +26,7 @@ mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.1-test_spatial_join.py mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.2-test_create_shapefile.py mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.3-test_bounds.py mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.4-test_cell_area.py +mpirun --mca mpi_warn_on_fork 0 -np 4 python 2.5-test_longitude_conversion.py mpirun --mca mpi_warn_on_fork 0 -np 4 python 3.1-test_vertical_interp.py mpirun --mca mpi_warn_on_fork 0 -np 4 python 3.2-test_horiz_interp_bilinear.py