diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 7b0759831cd17644ac85f83e64e03354738acea3..2d056372d0a2a88377f77bd0be6b94b870d3ef24 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -9,7 +9,6 @@ from xarray import open_dataset from netCDF4 import Dataset, num2date, date2num from mpi4py import MPI from cfunits import Units -from numpy.ma.core import MaskError import geopandas as gpd from shapely.geometry import Polygon, Point from copy import deepcopy, copy @@ -69,7 +68,7 @@ class Nes(object): write_axis_limits : dict Dictionary with the 4D limits of the rank data to write. t_min, t_max, z_min, z_max, y_min, y_max, x_min and x_max. - time : List + time : List[datetime] List of time steps of the rank data. lev : dict Vertical levels dictionary with the portion of 'data' corresponding to the rank values. @@ -79,12 +78,12 @@ class Nes(object): Longitudes dictionary with the portion of 'data' corresponding to the rank values. global_attrs : dict Global attributes with the attribute name as key and data as values. - _var_dim : None, tuple - Tuple with the name of the Y and X dimensions for the variables. - _lat_dim : None, tuple - Tuple with the name of the dimensions of the Latitude values. - _lon_dim : None, tuple - Tuple with the name of the dimensions of the Longitude values. + _var_dim : None or tuple + Name of the Y and X dimensions for the variables. + _lat_dim : None or tuple + Name of the dimensions of the Latitude values. + _lon_dim : None or tuple + Name of the dimensions of the Longitude values. """ def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, @@ -102,7 +101,7 @@ class Nes(object): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. - xarray: bool: + xarray: bool (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default over Y axis @@ -116,11 +115,11 @@ class Nes(object): Number of hours to remove from last time steps. first_level : int Index of the first level to use. - last_level : int, None + last_level : int or None Index of the last level to use. None if it is the last. create_nes : bool Indicates if you want to create the object from scratch (True) or through an existing file. - times : List, None + times : List[datetime] or None List of times to substitute the current ones while creation. kwargs : Projection dependent parameters to create it from scratch @@ -272,7 +271,7 @@ class Nes(object): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. - xarray: bool: + xarray: bool (Not working) Indicates if you want to use xarray as default. avoid_first_hours : int Number of hours to remove from first time steps. @@ -286,7 +285,7 @@ class Nes(object): Balanced dataset cannot be written in chunking mode. first_level : int Index of the first level to use. - last_level : int, None + last_level : int or None Index of the last level to use. None if it is the last. create_nes : bool Indicates if you want to create the object from scratch (True) or through an existing file. @@ -364,6 +363,35 @@ class Nes(object): return None + def __add__(self, other): + """ + Sum two NES objects + + Parameters + ---------- + other : Nes + Nes to be summed + + Returns + ------- + Nes + Summed Nes object + """ + nessy = self.copy(copy_vars=True) + for var_name in other.variables.keys(): + if var_name not in nessy.variables.keys(): + # Create New variable + nessy.variables[var_name] = deepcopy(other.variables[var_name]) + else: + nessy.variables[var_name]['data'] += other.variables[var_name]['data'] + return nessy + + def __radd__(self, other): + if other == 0 or other is None: + return self + else: + return self.__add__(other) + def copy(self, copy_vars=False): """ Copy the Nes object. @@ -383,7 +411,8 @@ class Nes(object): nessy = deepcopy(self) nessy.netcdf = None if copy_vars: - nessy.variables = nessy._get_lazy_variables() + nessy.set_communicator(self.comm) + nessy.variables = deepcopy(self.variables) nessy.cell_measures = deepcopy(self.cell_measures) else: nessy.variables = {} @@ -494,7 +523,8 @@ class Nes(object): return None - def create_single_spatial_bounds(self, coordinates, inc, spatial_nv=2, inverse=False): + @staticmethod + def create_single_spatial_bounds(coordinates, inc, spatial_nv=2, inverse=False): """ Calculate the vertices coordinates. @@ -505,7 +535,7 @@ class Nes(object): inc : float Increment between centre values. spatial_nv : int - Non mandatory parameter that informs the number of vertices that the boundaries must have. Default: 2. + Non-mandatory parameter that informs the number of vertices that the boundaries must have. Default: 2. inverse : bool For some grid latitudes. @@ -544,20 +574,14 @@ class Nes(object): inc_lat = np.abs(np.mean(np.diff(self._lat['data']))) lat_bnds = self.create_single_spatial_bounds(self._lat['data'], inc_lat, spatial_nv=2) - self._lat_bnds = {} - self._lat_bnds['data'] = deepcopy(lat_bnds) - self.lat_bnds = {} - self.lat_bnds['data'] = lat_bnds[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], - :] + self._lat_bnds = {'data': deepcopy(lat_bnds)} + self.lat_bnds = {'data': lat_bnds[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], :]} inc_lon = np.abs(np.mean(np.diff(self._lon['data']))) lon_bnds = self.create_single_spatial_bounds(self._lon['data'], inc_lon, spatial_nv=2) - self._lon_bnds = {} - self._lon_bnds['data'] = deepcopy(lon_bnds) - self.lon_bnds = {} - self.lon_bnds['data'] = lon_bnds[self.read_axis_limits['x_min']:self.read_axis_limits['x_max'], - :] + self._lon_bnds = {'data': deepcopy(lon_bnds)} + self.lon_bnds = {'data': lon_bnds[self.read_axis_limits['x_min']:self.read_axis_limits['x_max'], :]} return None @@ -569,9 +593,9 @@ class Nes(object): Returns ------- - lon_bnds_mesh : numpy.array + lon_bnds_mesh : numpy.ndarray Longitude boundaries in the mesh format - lat_bnds_mesh : numpy.array + lat_bnds_mesh : numpy.ndarray Latitude boundaries in the mesh format """ if self.size > 1: @@ -601,17 +625,17 @@ class Nes(object): lon_bnds_mesh[1:, 1:] = self.lon_bnds['data'][:, :, 2] lon_bnds_mesh[1:, :-1] = self.lon_bnds['data'][:, :, 3] else: - raise RuntimeError("Invalid number of vertices: {0}".format(self.lat_bnds['data'].shape[-1] )) + raise RuntimeError("Invalid number of vertices: {0}".format(self.lat_bnds['data'].shape[-1])) return lon_bnds_mesh, lat_bnds_mesh def free_vars(self, var_list): """ - Erase the selected variables from the variables information. + Erase the selected variables from the variables' information. Parameters ---------- - var_list : List, str, list + var_list : List or str List (or single string) of the variables to be loaded. """ @@ -638,7 +662,7 @@ class Nes(object): Parameters ---------- - var_list : List, str + var_list : List or str List (or single string) of the variables to be loaded. """ @@ -666,7 +690,7 @@ class Nes(object): return time_interval - def sel_time(self, time, copy=False): + def sel_time(self, time, do_copy=False): """ To select only one time step. @@ -674,7 +698,7 @@ class Nes(object): ---------- time : datetime.datetime Time stamp to select. - copy : bool + do_copy : bool Indicates if you want a copy with the selected time step (True) or to modify te existing one (False). Returns @@ -683,7 +707,7 @@ class Nes(object): Nes object with the data (and metadata) of the selected time step. """ - if copy: + if do_copy: aux_nessy = self.copy(copy_vars=False) aux_nessy.comm = self.comm else: @@ -982,7 +1006,7 @@ class Nes(object): self.set_time_bnds(aux_time_bounds) elif type_op == 'withoutt0': - for var_name, var_info in self.variables.items (): + for var_name, var_info in self.variables.items(): if var_info['data'] is None: self.load(var_name) if op == 'mean': @@ -1197,10 +1221,10 @@ class Nes(object): rows_sum = 0 for proc in range(self.size): - fid_dist[proc] = {'x_min': None, 'x_max': None, - 'y_min': None, 'y_max': None, - 'z_min': None, 'z_max': None, - 't_min': None, 't_max': None} + fid_dist[proc] = {'x_min': 0, 'x_max': None, + 'y_min': 0, 'y_max': None, + 'z_min': 0, 'z_max': None, + 't_min': 0, 't_max': None} if proc < procs_rows_extended: aux_rows = procs_len + 1 else: @@ -1542,9 +1566,9 @@ class Nes(object): Returns ------- lat_bnds : List - List of latitude bounds of the NetCDF data. + Latitude bounds of the NetCDF data. lon_bnds : List - List of longitude bounds of the NetCDF data. + Longitude bounds of the NetCDF data. """ if self.is_xarray: @@ -1554,13 +1578,11 @@ class Nes(object): if self.master: if not create_nes: if 'lat_bnds' in self.netcdf.variables.keys(): - lat_bnds = {} - lat_bnds['data'] = self.netcdf.variables['lat_bnds'][:] + lat_bnds = {'data': self.netcdf.variables['lat_bnds'][:]} else: lat_bnds = None if 'lon_bnds' in self.netcdf.variables.keys(): - lon_bnds = {} - lon_bnds['data'] = self.netcdf.variables['lon_bnds'][:] + lon_bnds = {'data': self.netcdf.variables['lon_bnds'][:]} else: lon_bnds = None else: @@ -1587,21 +1609,21 @@ class Nes(object): Returns ------- - cell_measures : dict + dict Dictionary of cell measures of the NetCDF data. """ - cell_measures = {} + c_measures = {} if self.master: if not create_nes: if 'cell_area' in self.netcdf.variables.keys(): - cell_measures['cell_area'] = {} - cell_measures['cell_area']['data'] = self.netcdf.variables['cell_area'][:] - cell_measures = self.comm.bcast(cell_measures, root=0) + c_measures['cell_area'] = {} + c_measures['cell_area']['data'] = self.netcdf.variables['cell_area'][:] + c_measures = self.comm.bcast(c_measures, root=0) self.free_vars(['cell_area']) - return cell_measures + return c_measures def _get_coordinate_dimension(self, possible_names): """ @@ -1789,6 +1811,7 @@ class Nes(object): data: np.array Portion of the variable data corresponding to the rank. """ + # from numpy.ma.core import MaskError nc_var = self.netcdf.variables[var_name] var_dims = nc_var.dimensions @@ -1810,7 +1833,7 @@ class Nes(object): for lon_n in range(data.shape[1]): data_aux[lat_n, lon_n] = ''.join( data[lat_n, lon_n].tostring().decode('ascii').replace('\x00', '')) - data = data_aux.reshape(1, 1, data_aux.shape[-2], data_aux.shape[-1]) + data = data_aux.reshape((1, 1, data_aux.shape[-2], data_aux.shape[-1])) else: data = nc_var[self.read_axis_limits['t_min']:self.read_axis_limits['t_max'], self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], @@ -1830,11 +1853,11 @@ class Nes(object): else: raise NotImplementedError('Error with {0}. Only can be read netCDF with 4 dimensions or less'.format( var_name)) - # Missing to nan - try: - data[data.shapefile == True] = np.nan - except (AttributeError, MaskError, ValueError): - pass + # # Missing to nan + # try: + # data[data.shapefile == True] = np.nan + # except (AttributeError, MaskError, ValueError): + # pass return data @@ -2042,10 +2065,10 @@ class Nes(object): rows_sum = 0 for proc in range(self.size): - fid_dist[proc] = {'x_min': None, 'x_max': None, - 'y_min': None, 'y_max': None, - 'z_min': None, 'z_max': None, - 't_min': None, 't_max': None} + fid_dist[proc] = {'x_min': 0, 'x_max': None, + 'y_min': 0, 'y_max': None, + 'z_min': 0, 'z_max': None, + 't_min': 0, 't_max': None} if proc < procs_rows_extended: aux_rows = procs_len + 1 else: @@ -2109,7 +2132,7 @@ class Nes(object): # TIMES time_var = netcdf.createVariable('time', np.float64, ('time',), zlib=self.zip_lvl > 0, complevel=self.zip_lvl) - time_var.units = 'hours since {0}'.format( self._time[0].strftime('%Y-%m-%d %H:%M:%S')) + time_var.units = 'hours since {0}'.format(self._time[0].strftime('%Y-%m-%d %H:%M:%S')) time_var.standard_name = 'time' time_var.calendar = 'standard' time_var.long_name = 'time' @@ -2217,7 +2240,7 @@ class Nes(object): if var_dict['data'] is not None: # Get dimensions - if var_dict['data'].shape == 4: + if len(var_dict['data'].shape) == 4: var_dims = ('time', 'lev',) + self._var_dim else: var_dims = self._var_dim @@ -2228,13 +2251,13 @@ class Nes(object): if var_dtype != var_dict['data'].dtype: msg = "WARNING!!! " msg += "Different data types for variable {0}. ".format(var_name) - msg += "Input dtype={0}. Data dtype={1}.".format(var_dtype, - var_dict['data'].dtype) + msg += "Input dtype={0}. Data dtype={1}.".format(var_dtype, var_dict['data'].dtype) warnings.warn(msg) try: var_dict['data'] = var_dict['data'].astype(var_dtype) except Exception as e: # TODO: Detect exception - raise e("It was not possible to cast the data to the input dtype.") + print(e) + raise TypeError("It was not possible to cast the data to the input dtype.") else: var_dtype = var_dict['data'].dtype @@ -2244,27 +2267,37 @@ class Nes(object): var_dtype = var_dict['data'].dtype # Convert list of strings to chars for parallelization - try: - unicode_type = len(max(var_dict['data'].flatten(), key=len)) - if ((var_dict['data'].dtype == np.dtype(' 1: - data = self._gather_data() + data = self._gather_data(self.variables) + c_measures = self._gather_data(self.cell_measures) if self.master: new_nc = self.copy(copy_vars=False) new_nc.set_communicator(MPI.COMM_SELF) new_nc.variables = data + new_nc.cell_measures = c_measures new_nc.__to_grib2(path, grib_keys, grib_template_path, lat_flip=lat_flip, info=info) else: self.__to_grib2(path, grib_keys, grib_template_path, lat_flip=lat_flip, info=info) @@ -2779,8 +2816,8 @@ class Nes(object): """ Add variables data to shapefile. - var_list : List - List (or single string) of the variables to be loaded and saved in the shapefile. + var_list : List or str + Variables to be loaded and saved in the shapefile. idx_lev : int Index of vertical level for which the data will be saved in the shapefile. idx_time : int @@ -2798,12 +2835,14 @@ class Nes(object): Parameters ---------- - ext_shp : GeoPandasDataFrame, str + ext_shp : GeoPandasDataFrame or str File or path from where the data will be obtained on the intersection. method : str Overlay method. Accepted values: ['nearest', 'intersection', 'centroid']. - var_list : List, None + var_list : List or None Variables that will be included in the resulting shapefile. + info : bool + Indicates if you want to print the process info or no """ if isinstance(ext_shp, str): ext_shp = gpd.read_file(ext_shp) @@ -2837,7 +2876,8 @@ class Nes(object): # From geodetic coordinates (e.g. 4326) to meters (e.g. 4328) to use sjoin_nearest # TODO: Check if the projection 4328 does not distort the coordinates too much - # https://gis.stackexchange.com/questions/372564/userwarning-when-trying-to-get-centroid-from-a-polygon-geopandas + # https://gis.stackexchange.com/questions/372564/ + # userwarning-when-trying-to-get-centroid-from-a-polygon-geopandas ext_shp = ext_shp.to_crs('EPSG:4328') centroids_gdf = centroids_gdf.to_crs('EPSG:4328') @@ -2875,7 +2915,7 @@ class Nes(object): intersection.loc[:, 'weight'] = 1. for i, row in intersection.iterrows(): - if i % 1000 == 0 and self.master and info: + if isinstance(i, int) and i % 1000 == 0 and self.master and info: print('\tRank {0:03d}: {1:.3f} %'.format(self.rank, i*100 / len(intersection))) # Filter to do not calculate percentages over 100% grid cells spatial joint if counts[row['FID']] > 1: @@ -2943,7 +2983,7 @@ class Nes(object): return centroids_gdf - def __gather_data_py_object(self): + def __gather_data_py_object(self, data_to_gather): """ Gather all the variable data into the MPI rank 0 to perform a serial write. @@ -2953,7 +2993,7 @@ class Nes(object): Variables dictionary with all the data from all the ranks. """ - data_list = deepcopy(self.variables) + data_list = deepcopy(data_to_gather) for var_name in data_list.keys(): try: # noinspection PyArgumentList @@ -3029,12 +3069,13 @@ class Nes(object): print("Gathering {0}".format(var_name)) shp_len = len(data_list[var_name]['data'].shape) try: - # Collect local array sizes using the high-level mpi4py gather + # Collect local array sizes using the gather communication pattern rank_shapes = np.array(self.comm.gather(data_list[var_name]['data'].shape, root=0)) sendbuf = data_list[var_name]['data'].flatten() sendcounts = np.array(self.comm.gather(len(sendbuf), root=0)) if self.master: - recvbuf = np.empty(sum(sendcounts), dtype=type(sendbuf[0])) + # recvbuf = np.empty(sum(sendcounts), dtype=type(sendbuf[0])) + recvbuf = np.empty(sum(sendcounts), dtype=type(sendbuf.max())) else: recvbuf = None self.comm.Gatherv(sendbuf=sendbuf, recvbuf=(recvbuf, sendcounts), root=0) @@ -3151,7 +3192,7 @@ class Nes(object): self : Nes Source Nes object. new_levels : List - List of new vertical levels. + New vertical levels. new_src_vertical kind : str Vertical methods type. diff --git a/nes/nc_projections/points_nes.py b/nes/nc_projections/points_nes.py index 5a2edaa418dae585cdfaaffc6ab6a38f8cb848f3..318302e23519ac3ce5ae0fb9e58353fc8a0b0de2 100644 --- a/nes/nc_projections/points_nes.py +++ b/nes/nc_projections/points_nes.py @@ -368,20 +368,32 @@ class PointsNes(Nes): var_dims = ('time',) + self._var_dim # Convert list of strings to chars for parallelization - try: - unicode_type = len(max(var_dict['data'].flatten(), key=len)) - if ((var_dict['data'].dtype == np.dtype('