diff --git a/nes/__init__.py b/nes/__init__.py index 86a702fd344761f6fe5c4f773c117a944dc25940..884723bd725552252df224dc28550b1f849950ee 100644 --- a/nes/__init__.py +++ b/nes/__init__.py @@ -2,7 +2,6 @@ __date__ = "2023-06-22" __version__ = "1.1.3" from .load_nes import open_netcdf, concatenate_netcdfs -from .load_nes import open_raster from .create_nes import create_nes, from_shapefile from .nc_projections import * from .methods.cell_measures import calculate_geometry_area diff --git a/nes/create_nes.py b/nes/create_nes.py index 98f81f07a0eed533b945c88ac86db85e6e6fe752..7444bb5e2dfa83a5558a32de0ae9fc082abaf3a7 100644 --- a/nes/create_nes.py +++ b/nes/create_nes.py @@ -92,32 +92,32 @@ def create_nes(comm=None, info=False, projection=None, parallel_method='Y', bala parallel_method = 'X' elif parallel_method == 'T': raise NotImplementedError("Parallel method T not implemented yet") - nessy = PointsNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, + nessy = PointsNes(comm=comm, dataset=None, xarray=False, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, balanced=balanced, create_nes=True, times=times, **kwargs) elif projection in ['regular', 'global']: - nessy = LatLonNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, + nessy = LatLonNes(comm=comm, dataset=None, xarray=False, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, balanced=balanced, create_nes=True, times=times, **kwargs) elif projection == 'rotated': - nessy = RotatedNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, + nessy = RotatedNes(comm=comm, dataset=None, xarray=False, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, balanced=balanced, create_nes=True, times=times, **kwargs) elif projection == 'rotated-nested': - nessy = RotatedNestedNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, + nessy = RotatedNestedNes(comm=comm, dataset=None, xarray=False, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, balanced=balanced, create_nes=True, times=times, **kwargs) elif projection == 'lcc': - nessy = LCCNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, + nessy = LCCNes(comm=comm, dataset=None, xarray=False, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, balanced=balanced, create_nes=True, times=times, **kwargs) elif projection == 'mercator': - nessy = MercatorNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, + nessy = MercatorNes(comm=comm, dataset=None, xarray=False, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, balanced=balanced, create_nes=True, times=times, **kwargs) diff --git a/nes/load_nes.py b/nes/load_nes.py index 72eb61de5c4515f9b6000d421e4300361a0768a2..dd08addaae9d2a99a4723aef36e5efdb2ae5c0a8 100644 --- a/nes/load_nes.py +++ b/nes/load_nes.py @@ -12,85 +12,63 @@ DIM_VAR_NAMES = ['lat', 'latitude', 'lat_bnds', 'lon', 'longitude', 'lon_bnds', 'cell_area', 'crs', 'rotated_pole', 'x', 'y', 'rlat', 'rlon', 'Lambert_conformal', 'mercator'] -def open_netcdf(path, comm=None, info=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, +def open_netcdf(path, comm=None, xarray=False, info=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, balanced=False): """ - Open a NetCDF file and return a Nes object with lazily loaded variables. + Open a netCDF file. Parameters ---------- path : str Path to the NetCDF file to read. - comm : MPI.COMM, optional - MPI communicator to use for parallel I/O. Default: MPI.COMM_WORLD. - info : bool, optional - Indicates whether to print reading/writing steps to stdout. - avoid_first_hours : int, optional - Number of hours to remove from the beginning of time steps. - avoid_last_hours : int, optional - Number of hours to remove from the end of time steps. - parallel_method : str, optional - Parallelization method. Default: 'Y'. + comm : MPI.COMM + MPI communicator to use in that netCDF. Default: MPI.COMM_WORLD. + xarray : bool + (Not working) Indicates if you want to use xarray. Default: False. + info : bool + Indicates if you want to print (stdout) the reading/writing steps. + avoid_first_hours : int + Number of hours to remove from first time steps. + avoid_last_hours : int + Number of hours to remove from last time steps. + parallel_method : str + Indicates the parallelization method that you want. Default: 'Y'. Accepted values: ['X', 'Y', 'T'] - balanced : bool, optional - Indicates if you want balanced parallelization. Note: Balanced datasets cannot be written in chunking mode. - first_level : int, optional + balanced : bool + Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. + first_level : int Index of the first level to use. - last_level : int or None, optional + last_level : int, None Index of the last level to use. None if it is the last. Returns ------- Nes - Nes object containing lazily loaded variables with metadata. - - Raises - ------ - FileNotFoundError - If the specified NetCDF file at `path` does not exist. - - Notes - ----- - This function supports parallel I/O using MPI. The `comm` parameter allows you to specify an MPI communicator for coordinating processes. - - Examples - -------- - Open a NetCDF file using the default communicator: - - ```python - nes_data = open_netcdf('/path/to/data.nc', info=True, parallel_method='Y') - ``` - - Open a NetCDF file with a custom MPI communicator: - - ```python - from mpi4py import MPI - - comm = MPI.COMM_WORLD - nes_data = open_netcdf('/path/to/data.nc', comm=comm, info=True, parallel_method='Y') - ``` + Nes object. Variables read in lazy mode (only metadata). + """ - Notes - ----- - - If `comm` is not provided, the default MPI communicator `MPI.COMM_WORLD` is used. - - The `info` parameter controls whether to print reading/writing steps to the standard output. + if comm is None: + comm = MPI.COMM_WORLD + else: + comm = comm - """ - # Set default communicator if not provided - comm = comm or MPI.COMM_WORLD - # Validate file existence if not os.path.exists(path): - raise FileNotFoundError(f"The specified NetCDF file at '{path}' does not exist.") - - # Open NetCDF dataset - dataset = Dataset(path, format="NETCDF4", mode='r', parallel=False) + raise FileNotFoundError(path) + if xarray: + dataset = None + else: + dataset = Dataset(path, format="NETCDF4", mode='r', parallel=False) + # Parallel is not needed for reading + # if comm.Get_size() == 1: + # dataset = Dataset(path, format="NETCDF4", mode='r', parallel=False) + # else: + # dataset = Dataset(path, format="NETCDF4", mode='r', parallel=True, comm=comm, info=MPI.Info()) - # Determine grid type and create Nes object if __is_rotated(dataset): # Rotated grids - nessy = RotatedNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, + nessy = RotatedNes(comm=comm, dataset=dataset, xarray=xarray, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced) + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) elif __is_points(dataset): if parallel_method == 'Y': warnings.warn("Parallel method cannot be 'Y' to create points NES. Setting it to 'X'") @@ -98,231 +76,191 @@ def open_netcdf(path, comm=None, info=False, parallel_method='Y', avoid_first_ho parallel_method = 'X' if __is_points_ghost(dataset): # Points - GHOST - nessy = PointsNesGHOST(comm=comm, dataset=dataset, info=info, + nessy = PointsNesGHOST(comm=comm, dataset=dataset, xarray=xarray, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced) + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) elif __is_points_providentia(dataset): # Points - Providentia - nessy = PointsNesProvidentia(comm=comm, dataset=dataset, info=info, + nessy = PointsNesProvidentia(comm=comm, dataset=dataset, xarray=xarray, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=False, - balanced=balanced) + first_level=first_level, last_level=last_level, create_nes=False, + balanced=balanced,) else: # Points - non-GHOST - nessy = PointsNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, + nessy = PointsNes(comm=comm, dataset=dataset, xarray=xarray, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced) + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) elif __is_lcc(dataset): # Lambert conformal conic grids - nessy = LCCNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, + nessy = LCCNes(comm=comm, dataset=dataset, xarray=xarray, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced) + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) elif __is_mercator(dataset): # Mercator grids - nessy = MercatorNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, + nessy = MercatorNes(comm=comm, dataset=dataset, xarray=xarray, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced) + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) else: # Regular grids - nessy = LatLonNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, + nessy = LatLonNes(comm=comm, dataset=dataset, xarray=xarray, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced) + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) return nessy def __is_rotated(dataset): """ - Check if the NetCDF dataset uses a rotated pole projection. + Check if the netCDF is in rotated pole projection or not. Parameters ---------- - dataset : netCDF4.Dataset - An open NetCDF dataset. + dataset : Dataset + netcdf4-python open dataset object. Returns ------- - bool - True if the dataset uses a rotated pole projection, False otherwise. - - Notes - ----- - This function checks for the presence of either a 'rotated_pole' variable or - dimensions 'rlat' and 'rlon' in the NetCDF dataset to determine if it is in a - rotated pole projection. + value : bool + Indicated if the netCDF is a rotated one. """ - rotated_pole_variable = 'rotated_pole' in dataset.variables - rlat_rlon_dimensions = 'rlat' in dataset.dimensions and 'rlon' in dataset.dimensions - return rotated_pole_variable or rlat_rlon_dimensions + if 'rotated_pole' in dataset.variables.keys(): + return True + elif ('rlat' in dataset.dimensions) and ('rlon' in dataset.dimensions): + return True + else: + return False def __is_points(dataset): """ - Check if the NetCDF dataset represents a non-GHOST points dataset. + Check if the netCDF is a points dataset in non-GHOST format or not. Parameters ---------- - dataset : netCDF4.Dataset - An open NetCDF dataset. + dataset : Dataset + netcdf4-python open dataset object. Returns ------- - bool - True if the dataset represents a non-GHOST points dataset, False otherwise. - - Notes - ----- - This function checks for the presence of a 'station' dimension in the NetCDF dataset - to determine if it represents a points dataset in non-GHOST format. + value : bool + Indicated if the netCDF is a points non-GHOST one. """ - return 'station' in dataset.dimensions + + if 'station' in dataset.dimensions: + return True + else: + return False def __is_points_ghost(dataset): """ - Check if the NetCDF dataset represents a GHOST points dataset. + Check if the netCDF is a points dataset in GHOST format or not. Parameters ---------- - dataset : netCDF4.Dataset - An open NetCDF dataset. + dataset : Dataset + netcdf4-python open dataset object. Returns ------- - bool - True if the dataset represents a GHOST points dataset, False otherwise. - - Notes - ----- - This function checks for the presence of 'N_flag_codes' and 'N_qa_codes' dimensions - in the NetCDF dataset to determine if it represents a points dataset in GHOST format. - + value : bool + Indicated if the netCDF is a points GHOST one. """ - return 'N_flag_codes' in dataset.dimensions and 'N_qa_codes' in dataset.dimensions + + if 'N_flag_codes' in dataset.dimensions and 'N_qa_codes' in dataset.dimensions: + return True + else: + return False def __is_points_providentia(dataset): """ - Check if the NetCDF dataset represents a Providentia points dataset. + Check if the netCDF is a points dataset in Providentia format or not. Parameters ---------- - dataset : netCDF4.Dataset - An open NetCDF dataset. + dataset : Dataset + netcdf4-python open dataset object. Returns ------- - bool - True if the dataset represents a Providentia points dataset, False otherwise. - - Notes - ----- - This function checks for the presence of 'grid_edge', 'model_latitude', and 'model_longitude' - dimensions in the NetCDF dataset to determine if it represents a points dataset in Providentia format. - + value : bool + Indicated if the netCDF is a points Providentia one. """ - return ('grid_edge' in dataset.dimensions and 'model_latitude' in dataset.dimensions and - 'model_longitude' in dataset.dimensions) + + if (('grid_edge' in dataset.dimensions) and ('model_latitude' in dataset.dimensions) + and ('model_longitude' in dataset.dimensions)): + return True + else: + return False def __is_lcc(dataset): """ - Check if the NetCDF dataset is in Lambert Conformal Conic (LCC) projection. + Check if the netCDF is in Lambert Conformal Conic (LCC) projection or not. Parameters ---------- - dataset : netCDF4.Dataset - An open NetCDF dataset. + dataset : Dataset + netcdf4-python open dataset object. Returns ------- - bool - True if the dataset is in Lambert Conformal Conic (LCC) projection, False otherwise. - - Notes - ----- - This function checks for the presence of 'Lambert_Conformal' or 'Lambert_conformal' - variables in the NetCDF dataset to determine if it is in Lambert Conformal Conic projection. + value : bool + Indicated if the netCDF is a LCC one. """ - return 'Lambert_Conformal' in dataset.variables or 'Lambert_conformal' in dataset.variables + + if 'Lambert_Conformal' in dataset.variables.keys() or 'Lambert_conformal' in dataset.variables.keys(): + return True + else: + return False def __is_mercator(dataset): """ - Check if the NetCDF dataset is in Mercator projection. + Check if the netCDF is in Mercator projection or not. Parameters ---------- - dataset : netCDF4.Dataset - An open NetCDF dataset. + dataset : Dataset + netcdf4-python open dataset object. Returns ------- - bool - True if the dataset is in Mercator projection, False otherwise. - - Notes - ----- - This function checks for the presence of 'mercator' variable in the NetCDF dataset - to determine if it is in Mercator projection. - + value : bool + Indicated if the netCDF is a Mercator one. """ - return 'mercator' in dataset.variables + + if 'mercator' in dataset.variables.keys(): + return True + else: + return False def concatenate_netcdfs(nessy_list, comm=None, info=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, balanced=False): """ - Concatenate variables from different sources. + Concatenate variables form different sources. Parameters ---------- - nessy_list : list of Nes or str - List of Nes objects or paths to NetCDF files for concatenation. - comm : MPI.Communicator, optional - MPI Communicator. Default: None. - info : bool, optional - Indicates whether to print reading/writing steps to stdout. Default: False. - parallel_method : str, optional - Parallelization method. Default: 'Y'. Accepted values: ['X', 'Y', 'T'] - avoid_first_hours : int, optional - Number of hours to remove from the beginning of time steps. Default: 0. - avoid_last_hours : int, optional - Number of hours to remove from the end of time steps. Default: 0. - first_level : int, optional - Index of the first level to use. Default: 0. - last_level : int or None, optional - Index of the last level to use. None if it is the last. Default: None. - balanced : bool, optional - Indicates if balanced parallelization is desired. Note: Balanced datasets cannot be written in chunking mode. - Default: False. + nessy_list : list + List of Nes objects or list of paths to concatenate. + comm : MPI.Communicator + MPI Communicator. Returns ------- Nes - Nes object with all the concatenated variables. - - Raises - ------ - TypeError - If the input is not a list. - FileNotFoundError - If a provided path to a NetCDF file does not exist. - ValueError - If the input list is empty. + Nes object with all the variables. """ if not isinstance(nessy_list, list): - raise TypeError("You must pass a list of NES objects or paths.") - - if not nessy_list: - raise ValueError("Input list is empty.") + raise AttributeError("You must pass a list of NES objects or paths.") if isinstance(nessy_list[0], str): - if not os.path.exists(nessy_list[0]): - raise FileNotFoundError(nessy_list[0]) nessy_first = open_netcdf(nessy_list[0], comm=comm, parallel_method=parallel_method, @@ -336,10 +274,8 @@ def concatenate_netcdfs(nessy_list, comm=None, info=False, parallel_method='Y', nessy_first.load() else: nessy_first = nessy_list[0] - for aux_nessy in nessy_list[1:]: + for i, aux_nessy in enumerate(nessy_list[1:]): if isinstance(aux_nessy, str): - if not os.path.exists(aux_nessy): - raise FileNotFoundError(aux_nessy) nc_add = Dataset(filename=aux_nessy, mode='r') for var_name, var_info in nc_add.variables.items(): if var_name not in DIM_VAR_NAMES: @@ -390,62 +326,3 @@ def concatenate_netcdfs(nessy_list, comm=None, info=False, parallel_method='Y', nessy_first.concatenate(aux_nessy) return nessy_first - - -def open_raster(path, comm=None, info=False, parallel_method='Y'): - """ - Open a Raster (TIFF) file and return a Raster NES object. - - This function reads a Raster (TIFF) file, initializes a Raster NES object, and returns it. - - Parameters - ---------- - path : str - The path to the Raster (TIFF) file to be read. - - comm : MPI.COMM, optional - MPI communicator to use in the Raster NES object. Default: MPI.COMM_WORLD. - - info : bool, optional - Indicates whether to print the reading/writing steps to stdout. - - parallel_method : str, optional - Specifies the parallelization method to be used in the Raster NES object. Default: 'Y'. - Accepted values: ['Y', 'X']. - - Returns - ------- - RasterNes - The initialized Raster NES object. - - Raises - ------ - FileNotFoundError - If the specified Raster (TIFF) file does not exist. - - RuntimeError - If an unsupported parallelization method ('T') is specified. - - Notes - ----- - - If the MPI communicator (`comm`) is not provided, it defaults to MPI.COMM_WORLD. - - The Raster NES object is created using the RasterNes class with the specified parameters. - - Example - ------- - >>> raster_path = 'path/to/your/raster.tif' - >>> raster_nes_obj = open_raster(raster_path, comm=comm, info=True, parallel_method='Y') - """ - raise NotImplementedError('Raster NES object') - if comm is None: - comm = MPI.COMM_WORLD - - if not os.path.exists(path): - raise FileNotFoundError(f"The specified Raster (TIFF) file does not exist: {path}") - - if parallel_method == 'T': - raise RuntimeError("Parallel method 'T' is not accepted for opening raster files.") - - nessy = RasterNes(path=path, comm=comm, info=info, parallel_method=parallel_method) - - return nessy diff --git a/nes/methods/horizontal_interpolation.py b/nes/methods/horizontal_interpolation.py index aacd498fc8bdd48e2aed2911a78e687a8d1dcbb7..3887bd905b1dc47b8fac119a31fb9a331484b56e 100644 --- a/nes/methods/horizontal_interpolation.py +++ b/nes/methods/horizontal_interpolation.py @@ -84,7 +84,7 @@ def interpolate_horizontal(self, dst_grid, weight_matrix_path=None, kind='Neares # Remove original file information final_dst.__ini_path = None - final_dst.dataset = None + final_dst.netcdf = None final_dst.dataset = None # Return final_dst diff --git a/nes/methods/vertical_interpolation.py b/nes/methods/vertical_interpolation.py index 3100cf93106efe913a696dfd15c41376b11ee083..7260fdbe191484129649b0feff7e3efcc4d95728 100644 --- a/nes/methods/vertical_interpolation.py +++ b/nes/methods/vertical_interpolation.py @@ -328,5 +328,6 @@ def interpolate_vertical(self, new_levels, new_src_vertical=None, kind='linear', # Remove original file information self.__ini_path = None self.dataset = None + self.netcdf = None return self diff --git a/nes/nc_projections/__init__.py b/nes/nc_projections/__init__.py index f5c287b5c799c09735e21d131124a0829b799ef2..d4c4b9f8656b43a1646228e46629ade913236006 100644 --- a/nes/nc_projections/__init__.py +++ b/nes/nc_projections/__init__.py @@ -7,4 +7,3 @@ from .points_nes_ghost import PointsNesGHOST from .points_nes_providentia import PointsNesProvidentia from .lcc_nes import LCCNes from .mercator_nes import MercatorNes -from .raster_nes import RasterNes diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 87fec78a8ff568c3ce02aefaf4d335675cabb951..5c2fcd0350e1c05a4af589ad571891270d3e4143 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -5,6 +5,8 @@ import gc import warnings import numpy as np import pandas as pd +from datetime import timedelta +from xarray import open_dataset from netCDF4 import Dataset, num2date, date2num, stringtochar from mpi4py import MPI from cfunits import Units @@ -35,13 +37,17 @@ class Nes(object): Size of the communicator. info : bool Indicates if you want to print reading/writing info. + is_xarray : bool + (Not working) Indicates if you want to use xarray as default. __ini_path : str Path to the original file to read when open_netcdf is called. hours_start : int Number of hours to avoid from the first original values. hours_end : int Number of hours to avoid from the last original values. - dataset : Dataset + dataset : xr.Dataset + (not working) xArray Dataset. + netcdf : Dataset netcdf4-python Dataset. variables : dict Variables information. @@ -54,8 +60,12 @@ class Nes(object): Vertical level dictionary with the complete 'data' key for all the values and the rest of the attributes. _lat : dict Latitudes dictionary with the complete 'data' key for all the values and the rest of the attributes. - _lon _ dict + _lon : dict Longitudes dictionary with the complete 'data' key for all the values and the rest of the attributes. + _lat_bnds : None or dict + Latitude bounds dictionary with the complete 'data' key for the latitudinal boundaries of each grid and the rest of the attributes. + _lon_bnds : None or dict + Longitude bounds dictionary with the complete 'data' key for the longitudinal boundaries of each grid and the rest of the attributes. parallel_method : str Parallel method to read/write. Can be chosen any of the following axis to parallelize: 'T', 'Y' or 'X'. @@ -73,6 +83,10 @@ class Nes(object): Latitudes dictionary with the portion of 'data' corresponding to the rank values. lon : dict Longitudes dictionary with the portion of 'data' corresponding to the rank values. + lat_bnds : None or dict + Latitude bounds dictionary with the portion of 'data' for the latitudinal boundaries corresponding to the rank values. + lon_bnds : None or dict + Longitude bounds dictionary with the portion of 'data' for the longitudinal boundaries corresponding to the rank values. global_attrs : dict Global attributes with the attribute name as key and data as values. _var_dim : None or tuple @@ -87,7 +101,7 @@ class Nes(object): Dictionary with the projection information. """ - def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method='Y', + def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -101,8 +115,10 @@ class Nes(object): Path to the NetCDF to initialize the object. info: bool Indicates if you want to get reading/writing info. - dataset: Dataset or None + dataset: Dataset NetCDF4-python Dataset to initialize the class. + xarray: bool + (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default over Y axis accepted values: ['X', 'Y', 'T']. @@ -124,13 +140,17 @@ class Nes(object): """ # MPI Initialization - self.comm = comm or MPI.COMM_WORLD + if comm is None: + self.comm = MPI.COMM_WORLD + else: + self.comm = comm self.rank = self.comm.Get_rank() self.master = self.rank == 0 self.size = self.comm.Get_size() # General info self.info = info + self.is_xarray = xarray self.__ini_path = path self.shapefile = None @@ -160,6 +180,7 @@ class Nes(object): # NetCDF object if create_nes: + self.netcdf = None self.dataset = None # Set string length @@ -174,8 +195,8 @@ class Nes(object): # Complete dimensions self._time = times - self._time_bnds = self._get_time_bnds(create_nes) - self._lat_bnds, self._lon_bnds = self._get_coordinates_bnds(create_nes) + self._time_bnds = self.__get_time_bnds(create_nes) + self._lat_bnds, self._lon_bnds = self.__get_coordinates_bnds(create_nes) self._lev = {'data': np.array([0]), 'units': '', 'positive': 'up'} @@ -191,16 +212,27 @@ class Nes(object): self.lat_bnds, self.lon_bnds = self._lat_bnds, self._lon_bnds # Cell measures screening - self.cell_measures = self._get_cell_measures(create_nes) + self.cell_measures = self.__get_cell_measures(create_nes) # Set NetCDF attributes - self.global_attrs = self._get_global_attributes(create_nes) + self.global_attrs = self.__get_global_attributes(create_nes) else: + if dataset is not None: - self.dataset = dataset - elif self.get_ini_path() is not None: - self.open() + if self.is_xarray: + self.dataset = dataset + self.netcdf = None + else: + self.dataset = None + self.netcdf = dataset + elif self.__ini_path is not None: + if self.is_xarray: + self.dataset = self.__open_dataset() + self.netcdf = None + else: + self.dataset = None + self.netcdf = self.__open_netcdf4() # Get string length self.strlen = self._get_strlen() @@ -212,15 +244,15 @@ class Nes(object): self._get_projection() # Complete dimensions - self._time = self._get_time() - self._time_bnds = self._get_time_bnds() + self._time = self.__get_time() + self._time_bnds = self.__get_time_bnds() self._lev = self._get_coordinate_dimension(['lev', 'level', 'lm', 'plev']) self._lat = self._get_coordinate_dimension(['lat', 'latitude', 'latitudes']) self._lon = self._get_coordinate_dimension(['lon', 'longitude', 'longitudes']) - self._lat_bnds, self._lon_bnds = self._get_coordinates_bnds() + self._lat_bnds, self._lon_bnds = self.__get_coordinates_bnds() # Complete cell measures - self._cell_measures = self._get_cell_measures() + self._cell_measures = self.__get_cell_measures() # Set axis limits for parallel reading self.read_axis_limits = self.get_read_axis_limits() @@ -241,7 +273,7 @@ class Nes(object): self.write_axis_limits = self.get_write_axis_limits() # Set NetCDF attributes - self.global_attrs = self._get_global_attributes() + self.global_attrs = self.__get_global_attributes() # Writing options self.zip_lvl = 0 @@ -264,7 +296,7 @@ class Nes(object): self.first_level = None @staticmethod - def new(comm=None, path=None, info=False, dataset=None, parallel_method='Y', + def new(comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -280,6 +312,8 @@ class Nes(object): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. + xarray: bool + (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default over Y axis accepted values: ['X', 'Y', 'T']. @@ -300,7 +334,7 @@ class Nes(object): List of times to substitute the current ones while creation. """ - new = Nes(comm=comm, path=path, info=info, dataset=dataset, parallel_method=parallel_method, + new = Nes(comm=comm, path=path, info=info, dataset=dataset, xarray=xarray, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, balanced=balanced, times=times, **kwargs) @@ -316,8 +350,8 @@ class Nes(object): Max length of the string data """ - if 'strlen' in self.dataset.dimensions: - strlen = self.dataset.dimensions['strlen'].size + if 'strlen' in self.netcdf.dimensions: + strlen = self.netcdf.dimensions['strlen'].size else: return None @@ -339,9 +373,6 @@ class Nes(object): return None - def get_ini_path(self): - return self.__ini_path - def __del__(self): """ To delete the Nes object and close all the open datasets. @@ -454,7 +485,7 @@ class Nes(object): """ nessy = deepcopy(self) - nessy.dataset = None + nessy.netcdf = None if copy_vars: nessy.set_communicator(self.comm) nessy.variables = deepcopy(self.variables) @@ -657,13 +688,13 @@ class Nes(object): lat_bnds = self.create_single_spatial_bounds(self._lat['data'], inc_lat, spatial_nv=2) self._lat_bnds = {'data': deepcopy(lat_bnds)} - self.lat_bnds = {'data': lat_bnds[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], :]} + self.lat_bnds = {'data': lat_bnds[self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], :]} inc_lon = np.abs(np.mean(np.diff(self._lon['data']))) lon_bnds = self.create_single_spatial_bounds(self._lon['data'], inc_lon, spatial_nv=2) self._lon_bnds = {'data': deepcopy(lon_bnds)} - self.lon_bnds = {'data': lon_bnds[self.read_axis_limits['x_min']:self.read_axis_limits['x_max'], :]} + self.lon_bnds = {'data': lon_bnds[self.write_axis_limits['x_min']:self.write_axis_limits['x_max'], :]} return None @@ -724,12 +755,16 @@ class Nes(object): if isinstance(var_list, str): var_list = [var_list] - if self.variables is not None: - for var_name in var_list: - if var_name in self.variables: - if 'data' in self.variables[var_name].keys(): - del self.variables[var_name]['data'] - del self.variables[var_name] + if self.is_xarray: + self.dataset = self.dataset.drop_vars(var_list) + self.variables = self._get_lazy_variables() + else: + if self.variables is not None: + for var_name in var_list: + if var_name in self.variables: + if 'data' in self.variables[var_name].keys(): + del self.variables[var_name]['data'] + del self.variables[var_name] gc.collect() return None @@ -1027,14 +1062,13 @@ class Nes(object): Statistic to perform. Accepted values: "max", "mean" and "min". type_op : str Type of statistic to perform. Accepted values: "calendar", "alltsteps", and "withoutt0". - - "calendar": Calculate the statistic using the time metadata. - It will avoid single time step by day calculations + - "calendar": Calculate the statistic using the time metadata. It will avoid single time step by day calculations - "alltsteps": Calculate a single time statistic with all the time steps. - "withoutt0": Calculate a single time statistic with all the time steps avoiding the first one. """ if self.parallel_method == 'T': - raise NotImplementedError("Statistics are not implemented on time axis paralelitation method.") + raise NotImplementedError("Statistics are not implemented on time axis parallel method.") time_interval = self.get_time_interval() if type_op == 'calendar': aux_time_bounds = [] @@ -1138,6 +1172,7 @@ class Nes(object): @staticmethod def _get_axis_index_(axis): + if axis == 'T': value = 0 elif axis == 'Z': @@ -1148,9 +1183,11 @@ class Nes(object): value = 3 else: raise ValueError("Unknown axis: {0}".format(axis)) + return value def sum_axis(self, axis='Z'): + if self.parallel_method == axis: raise NotImplementedError("It is not possible to sum the axis with it is parallelized '{0}'".format( self.parallel_method)) @@ -1168,10 +1205,95 @@ class Nes(object): self.time = [self.time[0]] self._time = [self._time[0]] if axis == 'Z': - self.lev['data'] = [self.lev['data'][0]] - self._lev['data'] = [self._lev['data'][0]] + self.lev['data'] = np.array([self.lev['data'][0]]) + self._lev['data'] = np.array([self._lev['data'][0]]) + return None + def find_time_id(self, time): + """ + Find index of time in time array. + + Parameters + ---------- + time : datetime.datetime + Time element. + + Returns + ------- + int + Index of time element. + """ + + if time in self.time: + return self.time.index(time) + + def rolling_mean(self, var_list=None, hours=8): + """ + Calculate rolling mean for given hours + + Parameters + ---------- + var_list : : List, str, None + List (or single string) of the variables to be loaded. + hours : int, optional + Window hours to calculate rolling mean, by default 8 + + Returns + ------- + Nes + Nes object + """ + + if self.parallel_method == 'T': + raise NotImplementedError("The rolling mean cannot be calculated using the time axis parallel method.") + + aux_nessy = self.copy(copy_vars=False) + aux_nessy.set_communicator(self.comm) + + if isinstance(var_list, str): + var_list = [var_list] + elif var_list is None: + var_list = list(self.variables.keys()) + + for var_name in var_list: + # Load variables if they have not been loaded previously + if self.variables[var_name]['data'] is None: + self.load(var_name) + + # Get original file shape + nessy_shape = self.variables[var_name]['data'].shape + + # Initialise array + aux_nessy.variables[var_name] = {} + aux_nessy.variables[var_name]['data'] = np.empty(shape=nessy_shape) + aux_nessy.variables[var_name]['dimensions'] = deepcopy(self.variables[var_name]['dimensions']) + + for curr_time in self.time: + # Get previous time given a set of hours + prev_time = curr_time - timedelta(hours=(hours-1)) + + # Get time indices + curr_time_id = self.find_time_id(curr_time) + prev_time_id = self.find_time_id(prev_time) + + # Get mean if previous time is available + if prev_time_id is not None: + if self.info: + print(f'Calculating mean between {prev_time} and {curr_time}.') + aux_nessy.variables[var_name]['data'][curr_time_id, :, :, :] = self.variables[var_name]['data'][ + prev_time_id:curr_time_id, :, :, :].mean(axis=0, keepdims=True) + # Fill with nan if previous time is not available + else: + if self.info: + msg = f'Mean between {prev_time} and {curr_time} cannot be calculated ' + msg += f'because data for {prev_time} is not available.' + print(msg) + aux_nessy.variables[var_name]['data'][curr_time_id, :, :, :] = np.full(shape= + (1, nessy_shape[1], nessy_shape[2], nessy_shape[3]), fill_value=np.nan) + + return aux_nessy + # ================================================================================================================== # Reading # ================================================================================================================== @@ -1186,6 +1308,7 @@ class Nes(object): Dictionary with the 4D limits of the rank data to read. t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max. """ + if self.balanced: return self.get_read_axis_limits_balanced() else: @@ -1394,8 +1517,6 @@ class Nes(object): Index of the time array. """ - from datetime import timedelta - if first: idx = self._time.index(self._time[0] + timedelta(hours=hours)) else: @@ -1432,10 +1553,37 @@ class Nes(object): Open the NetCDF. """ - self.dataset = self.__open_netcdf4() + if self.is_xarray: + self.dataset = self.__open_dataset() + self.netcdf = None + else: + self.dataset = None + self.netcdf = self.__open_netcdf4() return None + def __open_dataset(self): + """ + Open the NetCDF with xarray. + + Returns + ------- + dataset : xr.Dataset + Open dataset. + """ + + if self.master: + warnings.filterwarnings('ignore') # Disabling warnings while reading MONARCH original file + dataset = open_dataset(self.__ini_path, decode_coords='all') + warnings.filterwarnings('default') # Re-activating warnings + else: + dataset = None + + dataset = self.comm.bcast(dataset, root=0) + self.dataset = dataset + + return dataset + def __open_netcdf4(self, mode='r'): """ Open the NetCDF with netcdf4-python. @@ -1452,11 +1600,11 @@ class Nes(object): """ if self.size == 1: - netcdf = Dataset(self.get_ini_path(), format="NETCDF4", mode=mode, parallel=False) + netcdf = Dataset(self.__ini_path, format="NETCDF4", mode=mode, parallel=False) else: - netcdf = Dataset(self.get_ini_path(), format="NETCDF4", mode=mode, parallel=True, comm=self.comm, + netcdf = Dataset(self.__ini_path, format="NETCDF4", mode=mode, parallel=True, comm=self.comm, info=MPI.Info()) - self.dataset = netcdf + self.netcdf = netcdf return netcdf @@ -1468,9 +1616,9 @@ class Nes(object): if self.master: self.serial_nc.close() self.serial_nc = None - if (hasattr(self, 'netcdf')) and (self.dataset is not None): - self.dataset.close() - self.dataset = None + if (hasattr(self, 'netcdf')) and (self.netcdf is not None): + self.netcdf.close() + self.netcdf = None return None @@ -1598,63 +1746,60 @@ class Nes(object): resolution = 'hours' return resolution - def _get_time(self): + def __get_time(self): """ Get the NetCDF file time values. Returns ------- - List[datetime.datetime] - A list of datetime objects corresponding to the time values in the NetCDF data. - - Notes - ----- - This method is intended for internal use and is prefixed with double underscores to signify its private nature. - + time : List + List of times (datetime.datetime) of the NetCDF data. """ - if self.master: - nc_var = self.dataset.variables['time'] - time_data, units, calendar = self.__parse_time(nc_var) - # Extracting time resolution depending on the units - self._time_resolution = self.__get_time_resolution_from_units(units) - # Checking if it is a climatology dataset - if hasattr(nc_var, 'climatology'): - self._climatology = True - self._climatology_var_name = nc_var.climatology - time = num2date(time_data, units, calendar=calendar) - time = [aux.replace(second=0, microsecond=0) for aux in time] + if self.is_xarray: + time = self.variables['time'] else: - time = None - time = self.comm.bcast(time, root=0) + if self.master: + nc_var = self.netcdf.variables['time'] + time_data, units, calendar = self.__parse_time(nc_var) + # Extracting time resolution depending on the units + self._time_resolution = self.__get_time_resolution_from_units(units) + # Checking if it is a climatology dataset + if hasattr(nc_var, 'climatology'): + self._climatology = True + self._climatology_var_name = nc_var.climatology + time = num2date(time_data, units, calendar=calendar) + time = [aux.replace(second=0, microsecond=0) for aux in time] + else: + time = None + time = self.comm.bcast(time, root=0) self.free_vars('time') return time - def _get_time_bnds(self, create_nes=False): + def __get_time_bnds(self, create_nes=False): """ Get the NetCDF time bounds values. Parameters ---------- - create_nes : bool, optional - If True, indicates that the method will create the object from scratch. - If False (default), the method will use an existing file to obtain time bounds. + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. Returns ------- - List[datetime] - A list of datetime objects representing the time bounds in the NetCDF data. + time_bnds : List + List of time bounds (datetime) of the NetCDF data. """ if self.master: if not create_nes: - if 'time_bnds' in self.dataset.variables.keys() or self._climatology: - time = self.dataset.variables['time'] + if 'time_bnds' in self.netcdf.variables.keys() or self._climatology: + time = self.netcdf.variables['time'] if self._climatology: - nc_var = self.dataset.variables[self._climatology_var_name] + nc_var = self.netcdf.variables[self._climatology_var_name] else: - nc_var = self.dataset.variables['time_bnds'] + nc_var = self.netcdf.variables['time_bnds'] time_bnds = num2date(nc_var[:], self.__parse_time_unit(time.units), calendar=time.calendar).tolist() else: @@ -1670,7 +1815,7 @@ class Nes(object): return time_bnds - def _get_coordinates_bnds(self, create_nes=False): + def __get_coordinates_bnds(self, create_nes=False): """ Get the NetCDF coordinates bounds values. @@ -1687,30 +1832,34 @@ class Nes(object): Longitude bounds of the NetCDF data. """ - if self.master: - if not create_nes: - if 'lat_bnds' in self.dataset.variables.keys(): - lat_bnds = {'data': self._unmask_array(self.dataset.variables['lat_bnds'][:])} + if self.is_xarray: + lat_bnds = self.variables['lat_bnds'] + lon_bnds = self.variables['lon_bnds'] + else: + if self.master: + if not create_nes: + if 'lat_bnds' in self.netcdf.variables.keys(): + lat_bnds = {'data': self._unmask_array(self.netcdf.variables['lat_bnds'][:])} + else: + lat_bnds = None + if 'lon_bnds' in self.netcdf.variables.keys(): + lon_bnds = {'data': self._unmask_array(self.netcdf.variables['lon_bnds'][:])} + else: + lon_bnds = None else: lat_bnds = None - if 'lon_bnds' in self.dataset.variables.keys(): - lon_bnds = {'data': self._unmask_array(self.dataset.variables['lon_bnds'][:])} - else: lon_bnds = None else: lat_bnds = None lon_bnds = None - else: - lat_bnds = None - lon_bnds = None - lat_bnds = self.comm.bcast(lat_bnds, root=0) - lon_bnds = self.comm.bcast(lon_bnds, root=0) + lat_bnds = self.comm.bcast(lat_bnds, root=0) + lon_bnds = self.comm.bcast(lon_bnds, root=0) self.free_vars(['lat_bnds', 'lon_bnds']) return lat_bnds, lon_bnds - def _get_cell_measures(self, create_nes=False): + def __get_cell_measures(self, create_nes=False): """ Get the NetCDF cell measures values. @@ -1728,9 +1877,9 @@ class Nes(object): c_measures = {} if self.master: if not create_nes: - if 'cell_area' in self.dataset.variables.keys(): + if 'cell_area' in self.netcdf.variables.keys(): c_measures['cell_area'] = {} - c_measures['cell_area']['data'] = self._unmask_array(self.dataset.variables['cell_area'][:]) + c_measures['cell_area']['data'] = self._unmask_array(self.netcdf.variables['cell_area'][:]) c_measures = self.comm.bcast(c_measures, root=0) self.free_vars(['cell_area']) @@ -1759,10 +1908,12 @@ class Nes(object): try: dimension_name = set(possible_names).intersection(set(self.variables.keys())).pop() - - nc_var = self.variables[dimension_name].copy() - nc_var['data'] = self.dataset.variables[dimension_name][:] - if hasattr(nc_var, 'units'): + if self.is_xarray: + nc_var = self.dataset[dimension_name] + else: + nc_var = self.variables[dimension_name].copy() + nc_var['data'] = self.netcdf.variables[dimension_name][:] + if hasattr(nc_var, 'units'): if nc_var['units'] in ['unitless', '-']: nc_var['units'] = '' self.free_vars(dimension_name) @@ -1883,29 +2034,32 @@ class Nes(object): ...} """ - if self.master: - variables = {} - # Initialise data - for var_name, var_info in self.dataset.variables.items(): - variables[var_name] = {} - variables[var_name]['data'] = None - variables[var_name]['dimensions'] = var_info.dimensions - variables[var_name]['dtype'] = var_info.dtype - if variables[var_name]['dtype'] in [str, np.object]: - if self.strlen is None: - self.set_strlen() - variables[var_name]['dtype'] = str - - # Avoid some attributes - for attrname in var_info.ncattrs(): - if attrname not in ['missing_value', '_FillValue']: - value = getattr(var_info, attrname) - if value in ['unitless', '-']: - value = '' - variables[var_name][attrname] = value + if self.is_xarray: + variables = self.dataset.variables else: - variables = None - variables = self.comm.bcast(variables, root=0) + if self.master: + variables = {} + # Initialise data + for var_name, var_info in self.netcdf.variables.items(): + variables[var_name] = {} + variables[var_name]['data'] = None + variables[var_name]['dimensions'] = var_info.dimensions + variables[var_name]['dtype'] = var_info.dtype + if variables[var_name]['dtype'] in [str, np.object]: + if self.strlen is None: + self.set_strlen() + variables[var_name]['dtype'] = str + + # Avoid some attributes + for attrname in var_info.ncattrs(): + if attrname not in ['missing_value', '_FillValue']: + value = getattr(var_info, attrname) + if value in ['unitless', '-']: + value = '' + variables[var_name][attrname] = value + else: + variables = None + variables = self.comm.bcast(variables, root=0) return variables @@ -1924,7 +2078,7 @@ class Nes(object): Portion of the variable data corresponding to the rank. """ - nc_var = self.dataset.variables[var_name] + nc_var = self.netcdf.variables[var_name] var_dims = nc_var.dimensions # Read data in 4 dimensions @@ -1999,11 +2153,11 @@ class Nes(object): List (or single string) of the variables to be loaded. """ - if (self.get_ini_path() is None) and (self.dataset is None): + if (self.__ini_path is None) and (self.dataset is None) and (self.netcdf is None): raise RuntimeError('Only data from existing files can be loaded.') - if self.dataset is None: - self.open() + if self.netcdf is None: + self.__open_dataset() close = True else: close = False @@ -2096,6 +2250,7 @@ class Nes(object): if isinstance(aux_nessy, str): aux_nessy = self.new(path=aux_nessy, comm=self.comm, parallel_method=self.parallel_method, + xarray=self.is_xarray, avoid_first_hours=self.hours_start, avoid_last_hours=self.hours_end, first_level=self.first_level, last_level=self.last_level) new = True @@ -2117,7 +2272,7 @@ class Nes(object): return new_vars_added - def _get_global_attributes(self, create_nes=False): + def __get_global_attributes(self, create_nes=False): """ Read the netcdf global attributes. @@ -2133,10 +2288,12 @@ class Nes(object): """ gl_attrs = {} - - if not create_nes: - for attrname in self.dataset.ncattrs(): - gl_attrs[attrname] = getattr(self.dataset, attrname) + if self.is_xarray: + gl_attrs = self.dataset.attrs + else: + if not create_nes: + for attrname in self.netcdf.ncattrs(): + gl_attrs[attrname] = getattr(self.netcdf, attrname) return gl_attrs @@ -2277,8 +2434,6 @@ class Nes(object): # Create lev, lon and lat dimensions netcdf.createDimension('lev', len(self.lev['data'])) - netcdf.createDimension('lon', len(self._lon['data'])) - netcdf.createDimension('lat', len(self._lat['data'])) # Create string length dimension if self.strlen is not None: @@ -2494,6 +2649,7 @@ class Nes(object): return None + def _create_cell_measures(self, netcdf): # CELL AREA @@ -2709,7 +2865,7 @@ class Nes(object): if att_value is not None: if self.info: print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) - var = self.dataset.variables[var_name] + var = self.netcdf.variables[var_name] if isinstance(att_value, int) and att_value == 0: var[i_time, self.write_axis_limits['z_min']:self.write_axis_limits['z_max'], @@ -2820,7 +2976,7 @@ class Nes(object): netcdf.setncattr('Conventions', 'CF-1.7') if keep_open: - self.dataset = netcdf + self.netcdf = netcdf else: netcdf.close() @@ -2829,6 +2985,7 @@ class Nes(object): def __to_netcdf_cams_ra(self, path): return to_netcdf_cams_ra(self, path) + def to_netcdf(self, path, compression_level=0, serial=False, info=False, chunking=False, type='NES', keep_open=False): """ @@ -2856,54 +3013,56 @@ class Nes(object): self.info = info self.serial_nc = None self.zip_lvl = compression_level - - # if serial: - if serial and self.size > 1: - try: - data = self._gather_data(self.variables) - except KeyError: - data = self.__gather_data_py_object(self.variables) - try: - c_measures = self._gather_data(self.cell_measures) - except KeyError: - c_measures = self.__gather_data_py_object(self.cell_measures) - if self.master: - new_nc = self.copy(copy_vars=False) - new_nc.set_communicator(MPI.COMM_SELF) - new_nc.variables = data - new_nc.cell_measures = c_measures + if self.is_xarray: + raise NotImplementedError("Writing with xarray not implemented") + else: + # if serial: + if serial and self.size > 1: + try: + data = self._gather_data(self.variables) + except KeyError: + data = self.__gather_data_py_object(self.variables) + try: + c_measures = self._gather_data(self.cell_measures) + except KeyError: + c_measures = self.__gather_data_py_object(self.cell_measures) + if self.master: + new_nc = self.copy(copy_vars=False) + new_nc.set_communicator(MPI.COMM_SELF) + new_nc.variables = data + new_nc.cell_measures = c_measures + if type in ['NES', 'DEFAULT']: + new_nc.__to_netcdf_py(path, keep_open=keep_open) + elif type == 'CAMS_RA': + new_nc.__to_netcdf_cams_ra(path) + elif type == 'MONARCH': + to_netcdf_monarch(new_nc, path, chunking=chunking, keep_open=keep_open) + elif type == 'CMAQ': + to_netcdf_cmaq(new_nc, path, keep_open=keep_open) + elif type == 'WRF_CHEM': + to_netcdf_wrf_chem(new_nc, path, keep_open=keep_open) + else: + msg = "Unknown NetCDF type '{0}'. ".format(nc_type) + msg += "Use CAMS_RA, MONARCH or NES (or DEFAULT)" + raise ValueError(msg) + self.serial_nc = new_nc + else: + self.serial_nc = True + else: if type in ['NES', 'DEFAULT']: - new_nc.__to_netcdf_py(path, keep_open=keep_open) - elif type == 'CAMS_RA': - new_nc.__to_netcdf_cams_ra(path) - elif type == 'MONARCH': - to_netcdf_monarch(new_nc, path, chunking=chunking, keep_open=keep_open) - elif type == 'CMAQ': - to_netcdf_cmaq(new_nc, path, keep_open=keep_open) - elif type == 'WRF_CHEM': - to_netcdf_wrf_chem(new_nc, path, keep_open=keep_open) + self.__to_netcdf_py(path, chunking=chunking, keep_open=keep_open) + elif nc_type == 'CAMS_RA': + self.__to_netcdf_cams_ra(path) + elif nc_type == 'MONARCH': + to_netcdf_monarch(self, path, chunking=chunking, keep_open=keep_open) + elif nc_type == 'CMAQ': + to_netcdf_cmaq(self, path, keep_open=keep_open) + elif nc_type == 'WRF_CHEM': + to_netcdf_wrf_chem(self, path, keep_open=keep_open) else: msg = "Unknown NetCDF type '{0}'. ".format(nc_type) msg += "Use CAMS_RA, MONARCH or NES (or DEFAULT)" raise ValueError(msg) - self.serial_nc = new_nc - else: - self.serial_nc = True - else: - if type in ['NES', 'DEFAULT']: - self.__to_netcdf_py(path, chunking=chunking, keep_open=keep_open) - elif nc_type == 'CAMS_RA': - self.__to_netcdf_cams_ra(path) - elif nc_type == 'MONARCH': - to_netcdf_monarch(self, path, chunking=chunking, keep_open=keep_open) - elif nc_type == 'CMAQ': - to_netcdf_cmaq(self, path, keep_open=keep_open) - elif nc_type == 'WRF_CHEM': - to_netcdf_wrf_chem(self, path, keep_open=keep_open) - else: - msg = "Unknown NetCDF type '{0}'. ".format(nc_type) - msg += "Use CAMS_RA, MONARCH or NES (or DEFAULT)" - raise ValueError(msg) self.info = old_info @@ -3449,20 +3608,41 @@ class Nes(object): self : Nes Source Nes object. new_levels : List - New vertical levels. - new_src_vertical + List of new vertical levels. + new_src_vertical : nes.Nes, str + Nes object with the vertical information as variable or str with the path to the NetCDF file that contains + the vertical data. kind : str Vertical methods type. - extrapolate : None, tuple, str - Extrapolate method (for non linear operations). + extrapolate : bool or tuple or None or number or NaN + If bool: + - If True, both extrapolation options are set to 'extrapolate'. + - If False, extrapolation options are set to ('bottom', 'top'). + If tuple: + - The first element represents the extrapolation option for the lower bound. + - The second element represents the extrapolation option for the upper bound. + - If any element is bool: + - If True, it represents 'extrapolate'. + - If False: + - If it's the first element, it represents 'bottom'. + - If it's the second element, it represents 'top'. + - If any element is None, it is replaced with numpy.nan. + - Other numeric values are kept as they are. + - If any element is NaN, it is kept as NaN. + If None: + - Both extrapolation options are set to (NaN, NaN). + If number: + - Both extrapolation options are set to the provided number. + If NaN: + - Both extrapolation options are set to NaN. info: None, bool Indicates if you want to print extra information. overwrite: bool - Indicates if you want to compute the vertical interpolation in the same object or not + Indicates if you want to compute the vertical interpolation in the same object or not. """ return vertical_interpolation.interpolate_vertical( - self, new_levels, new_src_vertical=new_src_vertical, kind=kind, extrapolate=extrapolate, info=info, + self, new_levels, new_src_vertical=new_src_vertical, kind=kind, extrapolate_options=extrapolate, info=info, overwrite=overwrite) def interpolate_horizontal(self, dst_grid, weight_matrix_path=None, kind='NearestNeighbour', n_neighbours=4, @@ -3487,16 +3667,16 @@ class Nes(object): only_create_wm : bool Indicates if you want to only create the Weight Matrix. wm : Nes - Weight matrix Nes File + Weight matrix Nes File. flux : bool - Indicates if you want to calculate the weight matrix for flux variables + Indicates if you want to calculate the weight matrix for flux variables. """ return horizontal_interpolation.interpolate_horizontal( self, dst_grid, weight_matrix_path=weight_matrix_path, kind=kind, n_neighbours=n_neighbours, info=info, to_providentia=to_providentia, only_create_wm=only_create_wm, wm=wm, flux=flux) - def spatial_join(self, ext_shp, method=None, var_list=None, info=False): + def spatial_join(self, ext_shp, method=None, var_list=None, info=False, apply_bbox=True): """ Compute overlay intersection of two GeoPandasDataFrames. @@ -3509,10 +3689,13 @@ class Nes(object): var_list : List or None Variables that will be included in the resulting shapefile. info : bool - Indicates if you want to print the process info or no + Indicates if you want to print the process info. + apply_bbox : bool + Indicates if you want to reduce the shapefile to a bbox. """ - return spatial_join(self, ext_shp=ext_shp, method=method, var_list=var_list, info=info) + return spatial_join(self, ext_shp=ext_shp, method=method, var_list=var_list, info=info, + apply_bbox=apply_bbox) def calculate_grid_area(self, overwrite=True): """ @@ -3523,7 +3706,7 @@ class Nes(object): self : nes.Nes Source projection Nes Object. overwrite : bool - Indicates if we want to overwrite the grid area + Indicates if we want to overwrite the grid area. """ if ('cell_area' not in self.cell_measures.keys()) or (overwrite): @@ -3572,13 +3755,14 @@ class Nes(object): def get_fids(self): """ - Obtain the FIDs in a 2D format + Obtain the FIDs in a 2D format. Returns ------- np.array - 2D array with the FID data + 2D array with the FID data. """ + fids = np.arange(self._lat['data'].shape[0] * self._lon['data'].shape[-1]) fids = fids.reshape((self._lat['data'].shape[0], self._lon['data'].shape[-1])) fids = fids[self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], diff --git a/nes/nc_projections/latlon_nes.py b/nes/nc_projections/latlon_nes.py index e6b85e5d0a13b2ba03a7e62616776f5342ff6d60..01516d4c1c10301aeb94d403ef3e1f1f2262e415 100644 --- a/nes/nc_projections/latlon_nes.py +++ b/nes/nc_projections/latlon_nes.py @@ -20,7 +20,7 @@ class LatLonNes(Nes): Tuple with the name of the dimensions of the Longitude values. ('lon',) for a regular latitude-longitude projection. """ - def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method='Y', + def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -36,6 +36,8 @@ class LatLonNes(Nes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. + xarray: bool: + (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'Y'. Accepted values: ['X', 'Y', 'T']. @@ -57,7 +59,7 @@ class LatLonNes(Nes): """ super(LatLonNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, - parallel_method=parallel_method, balanced=balanced, + xarray=xarray, parallel_method=parallel_method, balanced=balanced, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, times=times, **kwargs) @@ -77,7 +79,7 @@ class LatLonNes(Nes): self.free_vars('crs') @staticmethod - def new(comm=None, path=None, info=False, dataset=None, parallel_method='Y', + def new(comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -93,6 +95,8 @@ class LatLonNes(Nes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. + xarray: bool: + (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'Y'. Accepted values: ['X', 'Y', 'T']. @@ -113,7 +117,7 @@ class LatLonNes(Nes): List of times to substitute the current ones while creation. """ - new = LatLonNes(comm=comm, path=path, info=info, dataset=dataset, + new = LatLonNes(comm=comm, path=path, info=info, dataset=dataset, xarray=xarray, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, balanced=balanced, times=times, **kwargs) @@ -226,36 +230,30 @@ class LatLonNes(Nes): centre_lon : dict Dictionary with data of centre longitudes in 1D """ - if 'lat' in kwargs.keys() and 'lon' in kwargs.keys(): - # Calculate centre latitudes - centre_lat = kwargs['lat'] - # Calculate centre longitudes - centre_lon = kwargs['lon'] - else: - # Get grid resolution - inc_lat = np.float64(self.projection_data['inc_lat']) - inc_lon = np.float64(self.projection_data['inc_lon']) - - # Get coordinates origen - lat_orig = np.float64(self.projection_data['lat_orig']) - lon_orig = np.float64(self.projection_data['lon_orig']) - - # Get number of coordinates - n_lat = int(self.projection_data['n_lat']) - n_lon = int(self.projection_data['n_lon']) - - # Calculate centre latitudes - lat_c_orig = lat_orig + (inc_lat / 2) - centre_lat = np.linspace(lat_c_orig, - lat_c_orig + (inc_lat * (n_lat - 1)), - n_lat, dtype=np.float64) - - # Calculate centre longitudes - lon_c_orig = lon_orig + (inc_lon / 2) - centre_lon = np.linspace(lon_c_orig, - lon_c_orig + (inc_lon * (n_lon - 1)), - n_lon, dtype=np.float64) + # Get grid resolution + inc_lat = np.float64(self.projection_data['inc_lat']) + inc_lon = np.float64(self.projection_data['inc_lon']) + + # Get coordinates origen + lat_orig = np.float64(self.projection_data['lat_orig']) + lon_orig = np.float64(self.projection_data['lon_orig']) + + # Get number of coordinates + n_lat = int(self.projection_data['n_lat']) + n_lon = int(self.projection_data['n_lon']) + + # Calculate centre latitudes + lat_c_orig = lat_orig + (inc_lat / 2) + centre_lat = np.linspace(lat_c_orig, + lat_c_orig + (inc_lat * (n_lat - 1)), + n_lat, dtype=np.float64) + + # Calculate centre longitudes + lon_c_orig = lon_orig + (inc_lon / 2) + centre_lon = np.linspace(lon_c_orig, + lon_c_orig + (inc_lon * (n_lon - 1)), + n_lon, dtype=np.float64) return {'data': centre_lat}, {'data': centre_lon} diff --git a/nes/nc_projections/lcc_nes.py b/nes/nc_projections/lcc_nes.py index c225d5b769956068cee4c84d8e33bdc9cf24cd4b..8225c2d6bdefc7fbbe889858d4f7af7a9e85ee44 100644 --- a/nes/nc_projections/lcc_nes.py +++ b/nes/nc_projections/lcc_nes.py @@ -35,7 +35,7 @@ class LCCNes(Nes): Tuple with the name of the dimensions of the Longitude values. ('y', 'x') for a LCC projection. """ - def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method='Y', + def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -51,6 +51,8 @@ class LCCNes(Nes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. + xarray: bool: + (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'Y'. Accepted values: ['X', 'Y', 'T']. @@ -72,7 +74,7 @@ class LCCNes(Nes): """ super(LCCNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, - parallel_method=parallel_method, balanced=balanced, + xarray=xarray, parallel_method=parallel_method, balanced=balanced, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, times=times, **kwargs) @@ -100,7 +102,7 @@ class LCCNes(Nes): self.free_vars('crs') @staticmethod - def new(comm=None, path=None, info=False, dataset=None, parallel_method='Y', + def new(comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -116,6 +118,8 @@ class LCCNes(Nes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. + xarray: bool: + (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'Y'. Accepted values: ['X', 'Y', 'T']. @@ -136,7 +140,7 @@ class LCCNes(Nes): List of times to substitute the current ones while creation. """ - new = LCCNes(comm=comm, path=path, info=info, dataset=dataset, + new = LCCNes(comm=comm, path=path, info=info, dataset=dataset, xarray=xarray, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, balanced=balanced, times=times, **kwargs) diff --git a/nes/nc_projections/mercator_nes.py b/nes/nc_projections/mercator_nes.py index 3cd46182389d258bd853518e9e67b08f707a0f95..cc02a302b3fe8ad58c72ff94ca640585cdd70c0b 100644 --- a/nes/nc_projections/mercator_nes.py +++ b/nes/nc_projections/mercator_nes.py @@ -35,7 +35,7 @@ class MercatorNes(Nes): Tuple with the name of the dimensions of the Longitude values. ('y', 'x') for a Mercator projection. """ - def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method='Y', + def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -51,6 +51,8 @@ class MercatorNes(Nes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. + xarray: bool: + (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'Y'. Accepted values: ['X', 'Y', 'T']. @@ -73,7 +75,7 @@ class MercatorNes(Nes): """ super(MercatorNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, - parallel_method=parallel_method, balanced=balanced, + xarray=xarray, parallel_method=parallel_method, balanced=balanced, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, times=times, **kwargs) @@ -101,7 +103,7 @@ class MercatorNes(Nes): self.free_vars('crs') @staticmethod - def new(comm=None, path=None, info=False, dataset=None, parallel_method='Y', + def new(comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -117,6 +119,8 @@ class MercatorNes(Nes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. + xarray: bool: + (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'Y'. Accepted values: ['X', 'Y', 'T']. @@ -137,7 +141,7 @@ class MercatorNes(Nes): List of times to substitute the current ones while creation. """ - new = MercatorNes(comm=comm, path=path, info=info, dataset=dataset, + new = MercatorNes(comm=comm, path=path, info=info, dataset=dataset, xarray=xarray, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, balanced=balanced, times=times, **kwargs) diff --git a/nes/nc_projections/points_nes.py b/nes/nc_projections/points_nes.py index 9df46e2e86e9881dbf0ebee03a60c9720289e81d..ee541842d81dbd9a081ade61323c10bc503d2885 100644 --- a/nes/nc_projections/points_nes.py +++ b/nes/nc_projections/points_nes.py @@ -28,7 +28,7 @@ class PointsNes(Nes): Tuple with the name of the dimensions of the station values. ('station',) for a points grid. """ - def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method='X', + def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='X', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -44,6 +44,8 @@ class PointsNes(Nes): Indicates if you want to get reading/writing info. dataset: Dataset, None NetCDF4-python Dataset to initialize the class. + xarray: bool + (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'X'. accepted values: ['X', 'T']. @@ -67,7 +69,7 @@ class PointsNes(Nes): """ super(PointsNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, - parallel_method=parallel_method, + xarray=xarray, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, times=times, **kwargs) @@ -91,7 +93,7 @@ class PointsNes(Nes): self._lon_dim = ('station',) @staticmethod - def new(comm=None, path=None, info=False, dataset=None, parallel_method='X', + def new(comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='X', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -107,6 +109,8 @@ class PointsNes(Nes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. + xarray: bool: + (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'X'. accepted values: ['X', 'T']. @@ -127,7 +131,7 @@ class PointsNes(Nes): List of times to substitute the current ones while creation. """ - new = PointsNes(comm=comm, path=path, info=info, dataset=dataset, + new = PointsNes(comm=comm, path=path, info=info, dataset=dataset, xarray=xarray, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, balanced=balanced, times=times, **kwargs) @@ -310,7 +314,7 @@ class PointsNes(Nes): Portion of the variable data corresponding to the rank. """ - nc_var = self.dataset.variables[var_name] + nc_var = self.netcdf.variables[var_name] var_dims = nc_var.dimensions # Read data in 1 or 2 dimensions diff --git a/nes/nc_projections/points_nes_ghost.py b/nes/nc_projections/points_nes_ghost.py index b528edf530cfa3dce984942e2480a874933086e8..09c5a443d1daa6fd7dc1d9d7ef0bf3d9f5733341 100644 --- a/nes/nc_projections/points_nes_ghost.py +++ b/nes/nc_projections/points_nes_ghost.py @@ -25,7 +25,7 @@ class PointsNesGHOST(PointsNes): Data flags (given by data provider) dictionary with the portion of 'data' corresponding to the rank values. """ - def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method='X', + def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='X', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -41,6 +41,8 @@ class PointsNesGHOST(PointsNes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. + xarray: bool: + (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'X'. Accepted values: ['X']. @@ -62,7 +64,7 @@ class PointsNesGHOST(PointsNes): """ super(PointsNesGHOST, self).__init__(comm=comm, path=path, info=info, dataset=dataset, - parallel_method=parallel_method, + xarray=xarray, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, times=times, **kwargs) @@ -76,7 +78,7 @@ class PointsNesGHOST(PointsNes): self.qa = self._get_coordinate_values(self._qa, 'X') @staticmethod - def new(comm=None, path=None, info=False, dataset=None, parallel_method='X', + def new(comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='X', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -92,6 +94,8 @@ class PointsNesGHOST(PointsNes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. + xarray: bool: + (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'X'. Accepted values: ['X']. @@ -112,7 +116,7 @@ class PointsNesGHOST(PointsNes): List of times to substitute the current ones while creation. """ - new = PointsNesGHOST(comm=comm, path=path, info=info, dataset=dataset, + new = PointsNesGHOST(comm=comm, path=path, info=info, dataset=dataset, xarray=xarray, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, balanced=balanced, times=times, **kwargs) @@ -279,7 +283,7 @@ class PointsNesGHOST(PointsNes): Portion of the variable data corresponding to the rank. """ - nc_var = self.dataset.variables[var_name] + nc_var = self.netcdf.variables[var_name] var_dims = nc_var.dimensions # Read data in 1 or 2 dimensions diff --git a/nes/nc_projections/points_nes_providentia.py b/nes/nc_projections/points_nes_providentia.py index 4a72bc0e0749c183d904882f23021113fd1e5fa6..533e4a86bc069b5af48cec87cfa8370bfd9f6e04 100644 --- a/nes/nc_projections/points_nes_providentia.py +++ b/nes/nc_projections/points_nes_providentia.py @@ -34,10 +34,9 @@ class PointsNesProvidentia(PointsNes): grid_edge_lat : dict Grid edge latitudes dictionary with the portion of 'data' corresponding to the rank values. """ - def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method='X', + def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='X', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, - balanced=False, times=None, model_centre_lon=None, model_centre_lat=None, grid_edge_lon=None, - grid_edge_lat=None, + balanced=False, times=None, model_centre_lon=None, model_centre_lat=None, grid_edge_lon=None, grid_edge_lat=None, **kwargs): """ Initialize the PointsNesProvidentia class @@ -52,6 +51,8 @@ class PointsNesProvidentia(PointsNes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. + xarray: bool: + (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'X'. Accepted values: ['X']. @@ -81,7 +82,7 @@ class PointsNesProvidentia(PointsNes): """ super(PointsNesProvidentia, self).__init__(comm=comm, path=path, info=info, dataset=dataset, - parallel_method=parallel_method, + xarray=xarray, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, @@ -107,7 +108,7 @@ class PointsNesProvidentia(PointsNes): self.grid_edge_lat = self._get_coordinate_values(self._grid_edge_lat, '') @staticmethod - def new(comm=None, path=None, info=False, dataset=None, parallel_method='X', + def new(comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='X', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, model_centre_lon=None, model_centre_lat=None, grid_edge_lon=None, grid_edge_lat=None, @@ -125,6 +126,8 @@ class PointsNesProvidentia(PointsNes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. + xarray: bool: + (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'X'. Accepted values: ['X']. @@ -153,7 +156,7 @@ class PointsNesProvidentia(PointsNes): Grid edge latitudes dictionary with the portion of 'data' corresponding to the rank values. """ - new = PointsNesProvidentia(comm=comm, path=path, info=info, dataset=dataset, + new = PointsNesProvidentia(comm=comm, path=path, info=info, dataset=dataset, xarray=xarray, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, balanced=balanced, times=times, @@ -316,7 +319,7 @@ class PointsNesProvidentia(PointsNes): Portion of the variable data corresponding to the rank. """ - nc_var = self.dataset.variables[var_name] + nc_var = self.netcdf.variables[var_name] var_dims = nc_var.dimensions # Read data in 1, 2 or 3 dimensions diff --git a/nes/nc_projections/raster_nes.py b/nes/nc_projections/raster_nes.py deleted file mode 100644 index 79515f34baa0377d48c6c9c920d1e76905d36e2c..0000000000000000000000000000000000000000 --- a/nes/nc_projections/raster_nes.py +++ /dev/null @@ -1,280 +0,0 @@ -#!/usr/bin/env python - -import numpy as np -from pyproj import Proj -from .default_nes import Nes -from .latlon_nes import LatLonNes -import rasterio as rio -import datetime -from rasterio.windows import from_bounds -from pyproj import Proj, transform -from rasterio.features import shapes - - -class RasterNes(Nes): - """ - Attributes - ---------- - dataset : rasterio.io.DatasetReader - """ - - def __init__(self, comm=None, path=None, info=False, parallel_method='Y', **kwargs): - raise NotImplementedError("RasterNes is not implemented yet") - self.window = None - - super().__init__(comm=comm, path=path, info=info, dataset=None, parallel_method=parallel_method, - balanced=False, avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, - create_nes=False, times=None, **kwargs) - - def open(self): - """ - Open the NetCDF. - """ - - self.dataset = self.__open_raster() - - return None - - def __open_raster(self): - """ - - Returns - ------- - rasterio.io.DatasetReader - """ - raster = rio.open(self.get_ini_path(), 'r') - self.dataset = raster - - return raster - - def _get_strlen(self): - """ - Get the strlen - - Returns - ------- - int - Max length of the string data - """ - return None - - def _get_lazy_variables(self): - """ - Get all the variables' information. - - Returns - ------- - variables : dict - Dictionary with the variable name as key and another dictionary as value. - De value dictionary will have the 'data' key with None as value and all the variable attributes as the - other keys. - e.g. - {'var_name_1': {'data': None, 'attr_1': value_1_1, 'attr_2': value_1_2, ...}, - 'var_name_2': {'data': None, 'attr_1': value_2_1, 'attr_2': value_2_2, ...}, - ...} - """ - - if self.master: - variables = {} - # Initialise data - for band, units in enumerate(self.dataset.units): - var_name = "band_{0}".format(band + 1) - variables[var_name] = {} - variables[var_name]['data'] = None - variables[var_name]['dimensions'] = ('lat', 'lon') - variables[var_name]['dtype'] = self.dataset.dtypes[band] - if units is None: - units = '-' - variables[var_name]['units'] = units - - else: - variables = None - variables = self.comm.bcast(variables, root=0) - - return variables - - def _get_time(self): - """ - It is assumed that the Raster has no time information. - Setting time to 01/01/2000. - - Returns - ------- - List[datetime.datetime] - A list of datetime objects corresponding to the time values in the Raster data. - """ - time = [datetime.datetime(year=2000, month=1, day=1, hour=0, minute=0)] - - return time - - def _get_time_bnds(self, create_nes=False): - """ - Get the Raster time bounds values. - - Parameters - ---------- - create_nes : bool, optional - If True, indicates that the method will create the object from scratch. - If False (default), the method will use an existing file to obtain time bounds. - - Returns - ------- - None - """ - return None - - def _get_coordinates_bnds(self, create_nes=False): - return None, None - - def _get_cell_measures(self, create_nes=False): - """ - Get the Raster cell measures values. - - Parameters - ---------- - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - - Returns - ------- - dict - Dictionary of cell measures of the Raster data. - """ - - return {} - - def _get_global_attributes(self, create_nes=False): - """ - Read the netcdf global attributes. - - Parameters - ---------- - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. - - Returns - ------- - gl_attrs : dict - Dictionary with the netCDF global attributes. - """ - - gl_attrs = self.dataset.meta - - # Removing None - for key, value in gl_attrs.items(): - if value is None: - gl_attrs[key] = '-' - gl_attrs[key] = '-' - - return gl_attrs - - def sel(self, hours_start=None, time_min=None, hours_end=None, time_max=None, lev_min=None, lev_max=None, - lat_min=None, lat_max=None, lon_min=None, lon_max=None): - """ - Select a slice of time, lev, lat or lon given a minimum and maximum limits. - """ - if (hours_start is not None or time_min is not None or hours_end is not None or time_max is not None or - lev_min is not None or lev_max is not None): - raise TypeError("Raster only can be selected by lat lon values.") - - # Rasterio reads from top to bottom, contrary to NetCDF. Thus, the latitudes and data need to be flipped. - # Calculate the inverse rank to ensure the first rank loads the bottom part of the raster instead of the top - inverse_rank = self.size - self.rank - 1 - # Define the geographic coordinates of the entire dataset - left, bottom, right, top = (lon_min, lat_min, lon_max, lat_max) - - # Create a window using the bounds for the entire dataset - full_dataset_window = from_bounds(left, bottom, right, top, self.dataset.transform) - - # Calculate the width and height of each sub-window - if self.parallel_method == 'Y': - sub_window_width = full_dataset_window.width - sub_window_height = full_dataset_window.height // self.size - - start_col = full_dataset_window.col_off - start_row = (inverse_rank * sub_window_height) + full_dataset_window.row_off - - self.read_axis_limits = {'x_min': 0, 'x_max': None, - 'y_min': int(self.rank * sub_window_height), - 'y_max': int((self.rank + 1) * sub_window_height), - 'z_min': 0, 'z_max': None, 't_min': 0, 't_max': None} - self.write_axis_limits = self.read_axis_limits.copy() - elif self.parallel_method == 'X': - sub_window_width = full_dataset_window.width // self.size - sub_window_height = full_dataset_window.height - - start_col = (inverse_rank * sub_window_width) + full_dataset_window.col_off - start_row = full_dataset_window.row_off - - self.read_axis_limits = {'x_min': int(self.rank * sub_window_width), - 'x_max': int((self.rank + 1) * sub_window_width), - 'y_min': 0, 'y_max': None, - 'z_min': 0, 'z_max': None, 't_min': 0, 't_max': None} - self.write_axis_limits = self.read_axis_limits.copy() - else: - raise RuntimeError("Parallel method only accepted X or Y") - - # Create a window for each MPI process - subset_window = rio.windows.Window(start_col, start_row, sub_window_width, sub_window_height) - full_transform = self.dataset.window_transform(full_dataset_window) - - # Generate arrays of latitude and longitude values using np.linspace - subset_y = np.linspace(full_transform.f, - full_transform.f + full_transform.e * int(full_dataset_window.height), - int(full_dataset_window.height)) - subset_x = np.linspace(full_transform.c, - full_transform.c + full_transform.a * int(full_dataset_window.width), - int(full_dataset_window.width)) - - subset_y = np.flip(subset_y, axis=0) - - print(full_transform) - print(self.dataset.transform) - # Set the window and coordinate arrays as attributes - self.window = subset_window - print(self.dataset.crs.to_string()) - print(self.dataset.crs.to_epsg()) - if self.dataset.crs.to_string() == 'EPSG:4326': - self._lat = {'data': subset_y} - self._lon = {'data': subset_x} - - self.lat = self._get_coordinate_values(self._lat, 'Y') - self.lon = self._get_coordinate_values(self._lon, 'X') - - self._var_dim = ('lat', 'lon') - self._lat_dim = ('lat',) - self._lon_dim = ('lon',) - else: - raise RuntimeError("Only EPSG:4326 supported for this raster") - - return subset_window - - def _read_variable(self, var_name): - """ - Read a portion of the variable data based on the current rank. - - Parameters - ---------- - var_name : str - Name of the variable to read. - - Returns - ------- - np.array - A portion of the variable data corresponding to the current rank. - """ - var_data = self.dataset.read(int(var_name[-1]), window=self.window) - - # Flip data - var_data = np.flipud(var_data) - # 2D to 4D - var_data = np.expand_dims(var_data, axis=(0, 1)) - - return var_data - - def to_latlon_nes(self): - return LatLonNes( - comm=self.comm, path=self.get_ini_path(), info=self.info, dataset=None, - parallel_method=self.parallel_method, create_nes=True, balanced=self.balanced, times=self.time, - levels=self.lev, lat=self.lat, lat_bnds=self.lat_bnds, lon=self.lon, lon_bnds=self.lon_bnds, - vars=self.variables) diff --git a/nes/nc_projections/rotated_nes.py b/nes/nc_projections/rotated_nes.py index b6f86c1501d67910bc647a27a51a8c34b0d8b425..eccbe03320eb11485b293d6eb167e830dc14e288 100644 --- a/nes/nc_projections/rotated_nes.py +++ b/nes/nc_projections/rotated_nes.py @@ -36,7 +36,7 @@ class RotatedNes(Nes): Tuple with the name of the dimensions of the Longitude values. ('rlat', 'rlon') for a rotated projection. """ - def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method='Y', + def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -52,6 +52,8 @@ class RotatedNes(Nes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. + xarray: bool: + (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'Y'. Accepted values: ['X', 'Y', 'T']. @@ -74,7 +76,7 @@ class RotatedNes(Nes): super(RotatedNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, balanced=balanced, - parallel_method=parallel_method, + xarray=xarray, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, times=times, **kwargs) @@ -100,7 +102,7 @@ class RotatedNes(Nes): self._lon_dim = ('rlat', 'rlon') @staticmethod - def new(comm=None, path=None, info=False, dataset=None, parallel_method='Y', + def new(comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -116,6 +118,8 @@ class RotatedNes(Nes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. + xarray: bool: + (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'Y'. Accepted values: ['X', 'Y', 'T']. @@ -132,7 +136,7 @@ class RotatedNes(Nes): List of times to substitute the current ones while creation. """ - new = RotatedNes(comm=comm, path=path, info=info, dataset=dataset, + new = RotatedNes(comm=comm, path=path, info=info, dataset=dataset, xarray=xarray, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, balanced=balanced, times=times, **kwargs) diff --git a/nes/nc_projections/rotated_nested_nes.py b/nes/nc_projections/rotated_nested_nes.py index cafc607677293000c5ef4e7e1cd2d602318cabdd..e56f42726718c7ffce7a9b794154c542a14697b0 100644 --- a/nes/nc_projections/rotated_nested_nes.py +++ b/nes/nc_projections/rotated_nested_nes.py @@ -4,10 +4,9 @@ import numpy as np from netCDF4 import Dataset from .rotated_nes import RotatedNes - class RotatedNestedNes(RotatedNes): - def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method='Y', + def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -23,6 +22,8 @@ class RotatedNestedNes(RotatedNes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. + xarray: bool: + (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'Y'. Accepted values: ['X', 'Y', 'T']. @@ -45,7 +46,7 @@ class RotatedNestedNes(RotatedNes): super(RotatedNestedNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, balanced=balanced, - parallel_method=parallel_method, + xarray=xarray, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, times=times, **kwargs) diff --git a/nes/nes_formats/cmaq_format.py b/nes/nes_formats/cmaq_format.py index e8fd3990ee7d31dddb81a2c97025dbd76f362d19..f715b765562da6a544fc1075138f01a1e8964785 100644 --- a/nes/nes_formats/cmaq_format.py +++ b/nes/nes_formats/cmaq_format.py @@ -60,7 +60,7 @@ def to_netcdf_cmaq(self, path, chunking=False, keep_open=False): # Close NetCDF if keep_open: - self.dataset = netcdf + self.netcdf = netcdf else: netcdf.close() diff --git a/nes/nes_formats/monarch_format.py b/nes/nes_formats/monarch_format.py index 34a22d10af47ac09313623379d42fb1bbc5778a1..a8f5e24753395200911efa47f3b3a9e2c139227f 100644 --- a/nes/nes_formats/monarch_format.py +++ b/nes/nes_formats/monarch_format.py @@ -73,7 +73,7 @@ def to_netcdf_monarch(self, path, chunking=False, keep_open=False): netcdf.setncattr('Conventions', 'CF-1.7') if keep_open: - self.dataset = netcdf + self.netcdf = netcdf else: netcdf.close() diff --git a/nes/nes_formats/wrf_chem_format.py b/nes/nes_formats/wrf_chem_format.py index d2a71cabe0d4f919de8cd5dde76c5f9af82e9418..f37a74d4618be1c83889b5dc1c3e0bd7fad19a77 100644 --- a/nes/nes_formats/wrf_chem_format.py +++ b/nes/nes_formats/wrf_chem_format.py @@ -68,7 +68,7 @@ def to_netcdf_wrf_chem(self, path, chunking=False, keep_open=False): # Close NetCDF if keep_open: - self.dataset = netcdf + self.netcdf = netcdf else: netcdf.close() diff --git a/requirements.txt b/requirements.txt index f7440e438135461aec09d2e672bd02759fff8fed..1d674e35b979cf32c8bb57d8ffac3a9b048dcdcd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,7 @@ scipy>=1.7.3 filelock>=3.9.0 eccodes-python~=0.9.5 cfunits>=3.3.5 +xarray>=0.20.2 mpi4py>=3.1.4 sphinx>=7.2.6 sphinx-rtd-theme==2.0.0