diff --git a/nes/create_nes.py b/nes/create_nes.py index 33f0bf63eb76714623e9b906770014355a96ea7d..cfb0205fda0916e62ae37448ba660d4d0a2dc813 100644 --- a/nes/create_nes.py +++ b/nes/create_nes.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import warnings +import sys from netCDF4 import num2date from mpi4py import MPI import geopandas as gpd @@ -8,7 +9,7 @@ from .nc_projections import * def create_nes(comm=None, info=False, projection=None, parallel_method='Y', balanced=False, - strlen=75, times=None, avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, + times=None, avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, **kwargs): """ Create a Nes class from scratch. @@ -87,6 +88,7 @@ def create_nes(comm=None, info=False, projection=None, parallel_method='Y', bala if projection is None: if parallel_method == 'Y': warnings.warn("Parallel method cannot be 'Y' to create points NES. Setting it to 'X'") + sys.stderr.flush() parallel_method = 'X' elif parallel_method == 'T': raise NotImplementedError("Parallel method T not implemented yet") @@ -126,8 +128,7 @@ def from_shapefile(path, method=None, parallel_method='Y', **kwargs): 1. Create NES grid. 2. Create shapefile for grid. - 3. Read shapefile from mask. - 4. Spatial join to add shapefile variables to NES variables. + 3. Spatial join to add shapefile variables to NES variables. Parameters ---------- @@ -148,10 +149,7 @@ def from_shapefile(path, method=None, parallel_method='Y', **kwargs): # Create shapefile for grid nessy.create_shapefile() - # Read shapefile - shapefile = gpd.read_file(path) - # Make spatial join - nessy.spatial_join(shapefile, method=method) + nessy.spatial_join(path, method=method) return nessy diff --git a/nes/load_nes.py b/nes/load_nes.py index 8de2346d2ccce85d61048f18df922f3f5e959986..2113b485281c8b298bd1d43c6b2b99a2566765e9 100644 --- a/nes/load_nes.py +++ b/nes/load_nes.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import os +import sys from mpi4py import MPI from netCDF4 import Dataset import warnings @@ -70,6 +71,7 @@ def open_netcdf(path, comm=None, xarray=False, info=False, parallel_method='Y', elif __is_points(dataset): if parallel_method == 'Y': warnings.warn("Parallel method cannot be 'Y' to create points NES. Setting it to 'X'") + sys.stderr.flush() parallel_method = 'X' if __is_points_ghost(dataset): # Points - GHOST diff --git a/nes/methods/__init__.py b/nes/methods/__init__.py index 22c351a85dec0724f58fe1b70502c171b5286c7a..772adacfae71525d99d30e987a427decf012c3f5 100644 --- a/nes/methods/__init__.py +++ b/nes/methods/__init__.py @@ -1,3 +1,4 @@ from .vertical_interpolation import add_4d_vertical_info from .vertical_interpolation import interpolate_vertical from .horizontal_interpolation import interpolate_horizontal +from .spatial_join import spatial_join diff --git a/nes/methods/horizontal_interpolation.py b/nes/methods/horizontal_interpolation.py index 58a2abc4448edd23b7de4b632d65957aa69c0f36..e33edd428fe17e1fed98abe8f19e6cd10aeaab62 100644 --- a/nes/methods/horizontal_interpolation.py +++ b/nes/methods/horizontal_interpolation.py @@ -143,6 +143,7 @@ def interpolate_horizontal(self, dst_grid, weight_matrix_path=None, kind='Neares else: msg = "The final projection must be points to interpolate an experiment and get it in Providentia format." warnings.warn(msg) + sys.stderr.flush() else: # Convert dimensions (time, lev, lat, lon) or (time, lat, lon) to (time, station) for interpolated variables # and reshape data @@ -238,6 +239,7 @@ def get_weights_idx_t_axis(self, dst_grid, weight_matrix_path, kind, n_neighbour if len(weight_matrix.lev['data']) != n_neighbours: warn("The selected weight matrix does not have the same number of nearest neighbours." + "Re-calculating again but not saving it.") + sys.stderr.flush() weight_matrix = create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours) else: weight_matrix = True @@ -320,6 +322,7 @@ def get_weights_idx_xy_axis(self, dst_grid, weight_matrix_path, kind, n_neighbou if isinstance(dst_grid, nes.PointsNes) and weight_matrix_path is not None: if self.master: warn("To point weight matrix cannot be saved.") + sys.stderr.flush() weight_matrix_path = None if wm is not None: @@ -337,6 +340,7 @@ def get_weights_idx_xy_axis(self, dst_grid, weight_matrix_path, kind, n_neighbou if len(weight_matrix.lev['data']) != n_neighbours: warn("The selected weight matrix does not have the same number of nearest neighbours." + "Re-calculating again but not saving it.") + sys.stderr.flush() weight_matrix = create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours) else: weight_matrix = True diff --git a/nes/methods/spatial_join.py b/nes/methods/spatial_join.py new file mode 100644 index 0000000000000000000000000000000000000000..6695cf05c1f95b85a340359e65a0a36f3efcd69d --- /dev/null +++ b/nes/methods/spatial_join.py @@ -0,0 +1,277 @@ +#!/usr/bin/env python + +import sys +import warnings +import geopandas as gpd +from geopandas import GeoDataFrame +import nes +import numpy as np +import pandas as pd +from shapely.geos import TopologicalError + + +def spatial_join(self, ext_shp, method=None, var_list=None, info=False): + """ + Compute overlay intersection of two GeoPandasDataFrames. + + Parameters + ---------- + self : nes.Nes + ext_shp : GeoPandasDataFrame or str + File or path from where the data will be obtained on the intersection. + method : str + Overlay method. Accepted values: ['nearest', 'intersection', 'centroid']. + var_list : List or None or str + Variables that will be included in the resulting shapefile. + info : bool + Indicates if you want to print the process info or no + """ + if self.master and info: + print("Starting spatial join") + if isinstance(var_list, str): + # Transforming string (variable name) to a list with length 0 + var_list = [var_list] + + # Create source shapefile if it does not exist + if self.shapefile is None: + if self.master and info: + print("\tCreating shapefile") + sys.stdout.flush() + self.create_shapefile() + + ext_shp = prepare_external_shapefile(self, ext_shp=ext_shp, var_list=var_list, info=info) + + if method == 'nearest': + # Nearest centroids to the shapefile polygons + spatial_join_nearest(self, ext_shp=ext_shp, info=info) + elif method == 'intersection': + # Intersect the areas of the shapefile polygons, outside the shapefile there will be NaN + spatial_join_intersection(self, ext_shp=ext_shp, info=info) + elif method == 'centroid': + # Centroids that fall on the shapefile polygons, outside the shapefile there will be NaN + spatial_join_centroid(self, ext_shp=ext_shp, info=info) + + else: + accepted_values = ['nearest', 'intersection', 'centroid'] + raise NotImplementedError('{0} is not implemented. Choose from: {1}'.format(method, accepted_values)) + + return None + + +def prepare_external_shapefile(self, ext_shp, var_list, info=False): + """ + Prepare the external shapefile. + + It is high recommended to pass ext_shp parameter as string because it will clip the external shapefile to the rank. + + 1. Read if it is not already read + 2. Filter variables list + 3. Standardize projections + + Parameters + ---------- + self : nes.Nes + ext_shp : GeoDataFrame or str + External shapefile or path to it + var_list : List[str] or None + External shapefile variables to be computed + info : bool + Indicates if you want to print the information + + Returns + ------- + GeoDataFrame + External shapefile + """ + if isinstance(ext_shp, str): + # Reading external shapefile + if self.master and info: + print("\tReading external shapefile") + # ext_shp = gpd.read_file(ext_shp, include_fields=var_list, mask=self.shapefile.geometry) + ext_shp = gpd.read_file(ext_shp, include_fields=var_list, bbox=get_bbox(self)) + else: + msg = "WARNING!!! " + msg += "External shapefile already read. If you pass the path to the shapefile instead of the opened shapefile " + msg += "a best usage of memory is performed because the external shape will be clipped while reading." + warnings.warn(msg) + sys.stderr.flush() + if var_list is not None: + ext_shp = ext_shp.loc[:, var_list + ['geometry']] + self.comm.Barrier() + if self.master and info: + print("\t\tReading external shapefile done!") + # Standardizing projection + ext_shp = ext_shp.to_crs(self.shapefile.crs) + + return ext_shp + + +def get_bbox(self): + """ + Obtain the bounding box of the rank data + + (lon_min, lat_min, lon_max, lat_max) + + Parameters + ---------- + self : nes.Nes + + Returns + ------- + tuple + Bounding box + """ + bbox = (self.lon_bnds['data'].min(), self.lat_bnds['data'].min(), + self.lon_bnds['data'].max(), self.lat_bnds['data'].max(), ) + return bbox + + +def spatial_join_nearest(self, ext_shp, info=False): + """ + Perform the spatial join using the nearest method + + Parameters + ---------- + self : nes.Nes + ext_shp : GeoDataFrame + External shapefile + info : bool + Indicates if you want to print the information + """ + if self.master and info: + print("\tNearest spatial joint") + sys.stdout.flush() + grid_shp = self.get_centroids_from_coordinates() + # From geodetic coordinates (e.g. 4326) to meters (e.g. 4328) to use sjoin_nearest + # TODO: Check if the projection 4328 does not distort the coordinates too much + # https://gis.stackexchange.com/questions/372564/ + # userwarning-when-trying-to-get-centroid-from-a-polygon-geopandas + # ext_shp = ext_shp.to_crs('EPSG:4328') + # grid_shp = grid_shp.to_crs('EPSG:4328') + + # Calculate spatial joint by distance + aux_grid = gpd.sjoin_nearest(grid_shp, ext_shp, distance_col='distance') + + # Get data from closest shapes to centroids + del aux_grid['geometry'], aux_grid['index_right'] + self.shapefile.loc[aux_grid.index, aux_grid.columns] = aux_grid + + var_list = list(ext_shp.columns) + var_list.remove('geometry') + for var_name in var_list: + self.shapefile.loc[:, var_name] = np.array(self.shapefile.loc[:, var_name], dtype=ext_shp[var_name].dtype) + + return None + + +def spatial_join_centroid(self, ext_shp, info=False): + """ + Perform the spatial join using the centroid method + + Parameters + ---------- + self : nes.Nes + ext_shp : GeoDataFrame + External shapefile + info : bool + Indicates if you want to print the information + """ + if self.master and info: + print("\tCentroid spatial join") + sys.stdout.flush() + if info and self.master: + print("\t\tCalculating centroids") + sys.stdout.flush() + # Get centroids + grid_shp = self.get_centroids_from_coordinates() + + # Calculate spatial joint + if info and self.master: + print("\t\tCalculating centroid spatial join") + sys.stdout.flush() + aux_grid = gpd.sjoin(grid_shp, ext_shp, predicate='within') + + # Get data from shapes where there are centroids, rest will be NaN + del aux_grid['geometry'], aux_grid['index_right'] + self.shapefile.loc[aux_grid.index, aux_grid.columns] = aux_grid + + var_list = list(ext_shp.columns) + var_list.remove('geometry') + for var_name in var_list: + self.shapefile.loc[:, var_name] = np.array(self.shapefile.loc[:, var_name], dtype=ext_shp[var_name].dtype) + + return None + + +def spatial_join_intersection(self, ext_shp, info=False): + """ + Perform the spatial join using the intersection method + + Parameters + ---------- + self : nes.Nes + ext_shp : GeoDataFrame + External shapefile + info : bool + Indicates if you want to print the information + """ + var_list = list(ext_shp.columns) + var_list.remove('geometry') + + grid_shp = self.shapefile + + grid_shp['FID_grid'] = grid_shp.index + grid_shp = grid_shp.reset_index() + # Get intersected areas + # inp, res = ext_shp.sindex.query_bulk(grid_shp.geometry, predicate='intersects') + inp, res = grid_shp.sindex.query_bulk(ext_shp.geometry, predicate='intersects') + + if info: + print('\t\tRank {0:03d}: {1} intersected areas found'.format(self.rank, len(inp))) + sys.stdout.flush() + # Calculate intersected areas and fractions + intersection = pd.DataFrame(columns=['FID', 'ext_shp_id', 'weight']) + intersection['FID'] = np.array(grid_shp.loc[res, 'FID_grid'], dtype=np.uint32) + intersection['ext_shp_id'] = np.array(inp, dtype=np.uint32) + + if len(intersection) > 0: + if True: + # No Warnings Zone + counts = intersection['FID'].value_counts() + warnings.filterwarnings('ignore') + intersection.loc[:, 'weight'] = 1. + + for i, row in intersection.iterrows(): + if isinstance(i, int) and i % 1000 == 0 and info: + print('\t\t\tRank {0:03d}: {1:.3f} %'.format(self.rank, i * 100 / len(intersection))) + sys.stdout.flush() + # Filter to do not calculate percentages over 100% grid cells spatial joint + if counts[row['FID']] > 1: + try: + intersection.loc[i, 'weight'] = grid_shp.loc[res[i], 'geometry'].intersection( + ext_shp.loc[inp[i], 'geometry']).area / grid_shp.loc[res[i], 'geometry'].area + except TopologicalError: + # If for some reason the geometry is corrupted it should work with the buffer function + ext_shp.loc[[inp[i]], 'geometry'] = ext_shp.loc[[inp[i]], 'geometry'].buffer(0) + intersection.loc[i, 'weight'] = grid_shp.loc[res[i], 'geometry'].intersection( + ext_shp.loc[inp[i], 'geometry']).area / grid_shp.loc[res[i], 'geometry'].area + # intersection['intersect_area'] = intersection.apply( + # lambda x: x['geometry_grid'].intersection(x['geometry_ext']).area, axis=1) + intersection.drop(intersection[intersection['weight'] <= 0].index, inplace=True) + + warnings.filterwarnings('default') + + # Choose the biggest area from intersected areas with multiple options + intersection.sort_values('weight', ascending=False, inplace=True) + intersection = intersection.drop_duplicates(subset='FID', keep="first") + intersection = intersection.sort_values('FID').set_index('FID') + + for var_name in var_list: + self.shapefile.loc[intersection.index, var_name] = np.array( + ext_shp.loc[intersection['ext_shp_id'], var_name]) + else: + for var_name in var_list: + self.shapefile.loc[:, var_name] = np.nan + for var_name in var_list: + self.shapefile.loc[:, var_name] = np.array(self.shapefile.loc[:, var_name], dtype=ext_shp[var_name].dtype) + return None diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 9d3faea70c3a49ef5685711d3a1122379809f3fb..2677ec11493a7650ff2cdae4abee6943a2ac8e53 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -9,14 +9,13 @@ from xarray import open_dataset from netCDF4 import Dataset, num2date, date2num from mpi4py import MPI from cfunits import Units +from shapely.geos import TopologicalError import geopandas as gpd from shapely.geometry import Polygon, Point from copy import deepcopy, copy import datetime import pyproj -from ..methods import vertical_interpolation -from ..methods import horizontal_interpolation -from ..methods import cell_measures +from ..methods import vertical_interpolation, horizontal_interpolation, cell_measures, spatial_join from ..nes_formats import to_netcdf_cams_ra @@ -87,7 +86,7 @@ class Nes(object): """ def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, - balanced=False, times=None, strlen=75, **kwargs): + balanced=False, times=None, **kwargs): """ Initialize the Nes class @@ -191,6 +190,10 @@ class Nes(object): # Set NetCDF attributes self.global_attrs = self.__get_global_attributes(create_nes) + # Set string length + # 75 is the standard value used in GHOST data + self.strlen = 75 + else: if dataset is not None: @@ -243,12 +246,12 @@ class Nes(object): # Set NetCDF attributes self.global_attrs = self.__get_global_attributes() + # Get string length + self.strlen = self._get_strlen() + # Writing options self.zip_lvl = 0 - # Get string length - self.strlen = strlen - # Dimensions information self._var_dim = None self._lat_dim = None @@ -308,6 +311,23 @@ class Nes(object): return new + def _get_strlen(self, strlen=75): + """ + Get the strlen + + Parameters + ---------- + strlen : int + Max length of the string + """ + + if 'strlen' in self.netcdf.dimensions: + strlen = self.netcdf.dimensions['strlen'].size + else: + strlen = strlen + + return strlen + def set_strlen(self, strlen=75): """ Set the strlen @@ -317,7 +337,9 @@ class Nes(object): strlen : int Max length of the string """ + self.strlen = strlen + return None def __del__(self): @@ -554,11 +576,13 @@ class Nes(object): msg += "The given time bounds list has a different length than the time array. " msg += "(time:{0}, bnds:{1}). Time bounds will not be set.".format(len(self._time), len(time_bnds)) warnings.warn(msg) + sys.stderr.flush() else: msg = 'WARNING!!! ' msg += 'There is at least one element in the time bounds to be set that is not a datetime object. ' msg += 'Time bounds will not be set.' warnings.warn(msg) + sys.stderr.flush() return None @@ -1853,7 +1877,6 @@ class Nes(object): data: np.array Portion of the variable data corresponding to the rank. """ - # from numpy.ma.core import MaskError nc_var = self.netcdf.variables[var_name] var_dims = nc_var.dimensions @@ -1886,15 +1909,33 @@ class Nes(object): self.read_axis_limits['z_min']:self.read_axis_limits['z_max'], self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] - # elif len(var_dims) == 5: - # data = nc_var[self.read_axis_limits['t_min']:self.read_axis_limits['t_max'], - # :, - # self.read_axis_limits['z_min']:self.read_axis_limits['z_max'], - # self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], - # self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] + elif len(var_dims) == 5: + if 'strlen' in var_dims: + data = nc_var[self.read_axis_limits['t_min']:self.read_axis_limits['t_max'], + self.read_axis_limits['z_min']:self.read_axis_limits['z_max'], + self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max'], + :] + data_aux = np.empty(shape=(data.shape[0], data.shape[1], data.shape[2], data.shape[3]), dtype=np.object) + for time_n in range(data.shape[0]): + for lev_n in range(data.shape[1]): + for lat_n in range(data.shape[2]): + for lon_n in range(data.shape[3]): + data_aux[time_n, lev_n, lat_n, lon_n] = ''.join( + data[time_n, lev_n, lat_n, lon_n].tostring().decode('ascii').replace('\x00', '')) + data = data_aux + else: + # data = nc_var[self.read_axis_limits['t_min']:self.read_axis_limits['t_max'], + # :, + # self.read_axis_limits['z_min']:self.read_axis_limits['z_max'], + # self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + # self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] + raise NotImplementedError('Error with {0}. Only can be read netCDF with 4 dimensions or less'.format( + var_name)) else: raise NotImplementedError('Error with {0}. Only can be read netCDF with 4 dimensions or less'.format( var_name)) + # Missing to nan if np.ma.is_masked(data): # This operation is done because sometimes the missing value is lost during the calculation @@ -2264,6 +2305,9 @@ class Nes(object): for var_name in self.variables.keys(): self.variables[var_name]['cell_measures'] = 'area: cell_area' + + if self.info: + print("Rank {0:03d}: Cell measures done".format(self.rank)) return None def _create_variables(self, netcdf, chunking=False): @@ -2279,6 +2323,7 @@ class Nes(object): """ for i, (var_name, var_dict) in enumerate(self.variables.items()): + if isinstance(var_dict['data'], int) and var_dict['data'] == 0: var_dims = ('time', 'lev',) + self._var_dim var_dtype = np.float32 @@ -2297,6 +2342,7 @@ class Nes(object): msg += "Different data types for variable {0}. ".format(var_name) msg += "Input dtype={0}. Data dtype={1}.".format(var_dtype, var_dict['data'].dtype) warnings.warn(msg) + sys.stderr.flush() try: var_dict['data'] = var_dict['data'].astype(var_dtype) except Exception as e: # TODO: Detect exception @@ -2324,15 +2370,33 @@ class Nes(object): var_dims += ('strlen',) # Split strings into chars (S1) - data_aux = np.chararray(shape=(var_dict['data'].shape[0], - var_dict['data'].shape[1], - self.strlen)) - for lat_n in range(var_dict['data'].shape[0]): - for lon_n in range(var_dict['data'].shape[1]): - chars = [v for v in var_dict['data'][lat_n][lon_n]] - data = chars + ['']*(self.strlen-len(chars)) - for char_n, char in enumerate(data): - data_aux[lat_n, lon_n, char_n] = char.encode('ascii', 'ignore') + if len(var_dict['data'].shape) == 2: + data_aux = np.chararray(shape=(var_dict['data'].shape[0], + var_dict['data'].shape[1], + self.strlen)) + for lat_n in range(var_dict['data'].shape[0]): + for lon_n in range(var_dict['data'].shape[1]): + chars = [v for v in var_dict['data'][lat_n][lon_n]] + data = chars + ['']*(self.strlen-len(chars)) + for char_n, char in enumerate(data): + data_aux[lat_n, lon_n, char_n] = char.encode('ascii', 'ignore') + + elif len(var_dict['data'].shape) == 4: + data_aux = np.chararray(shape=(var_dict['data'].shape[0], + var_dict['data'].shape[1], + var_dict['data'].shape[2], + var_dict['data'].shape[3], + self.strlen)) + for time_n in range(var_dict['data'].shape[0]): + for lev_n in range(var_dict['data'].shape[1]): + for lat_n in range(var_dict['data'].shape[2]): + for lon_n in range(var_dict['data'].shape[3]): + chars = [v for v in var_dict['data'][time_n][lev_n][lat_n][lon_n]] + data = chars + ['']*(self.strlen-len(chars)) + for char_n, char in enumerate(data): + data_aux[time_n, lev_n, lat_n, lon_n, char_n] = char.encode('ascii', + 'ignore') + var_dict['data'] = data_aux var_dtype = 'S1' @@ -2371,6 +2435,7 @@ class Nes(object): for att_name, att_value in var_dict.items(): if att_name == 'data': + if att_value is not None: if self.info: print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) @@ -2379,6 +2444,17 @@ class Nes(object): self.write_axis_limits['z_min']:self.write_axis_limits['z_max'], self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], self.write_axis_limits['x_min']:self.write_axis_limits['x_max']] = 0 + + elif len(att_value.shape) == 5: + if 'strlen' in var_dims: + var[self.write_axis_limits['t_min']:self.write_axis_limits['t_max'], + self.write_axis_limits['z_min']:self.write_axis_limits['z_max'], + self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], + self.write_axis_limits['x_min']:self.write_axis_limits['x_max'], + :] = att_value + else: + raise NotImplementedError('It is not possible to write 5D variables.') + elif len(att_value.shape) == 4: var[self.write_axis_limits['t_min']:self.write_axis_limits['t_max'], self.write_axis_limits['z_min']:self.write_axis_limits['z_max'], @@ -2392,6 +2468,7 @@ class Nes(object): :] = att_value else: raise NotImplementedError('It is not possible to write 3D variables.') + if self.info: print("Rank {0:03d}: Var {1} data ({2}/{3})".format( self.rank, var_name, i + 1, len(self.variables))) @@ -2526,8 +2603,6 @@ class Nes(object): # Create cell measures self._create_cell_measures(netcdf) - if self.info: - print("Rank {0:03d}: Cell measures done".format(self.rank)) # Create variables self._create_variables(netcdf, chunking=chunking) @@ -2762,50 +2837,55 @@ class Nes(object): shapefile : GeoPandasDataFrame Shapefile dataframe. """ + + if self.shapefile is None: - if self._lat_bnds is None or self._lon_bnds is None: - self.create_spatial_bounds() - - # Reshape arrays to create geometry - aux_shape = (self.lat_bnds['data'].shape[0], self.lon_bnds['data'].shape[0], 4) - lon_bnds_aux = np.empty(aux_shape) - lon_bnds_aux[:, :, 0] = self.lon_bnds['data'][np.newaxis, :, 0] - lon_bnds_aux[:, :, 1] = self.lon_bnds['data'][np.newaxis, :, 1] - lon_bnds_aux[:, :, 2] = self.lon_bnds['data'][np.newaxis, :, 1] - lon_bnds_aux[:, :, 3] = self.lon_bnds['data'][np.newaxis, :, 0] - - lon_bnds = lon_bnds_aux - del lon_bnds_aux - - lat_bnds_aux = np.empty(aux_shape) - lat_bnds_aux[:, :, 0] = self.lat_bnds['data'][:, np.newaxis, 0] - lat_bnds_aux[:, :, 1] = self.lat_bnds['data'][:, np.newaxis, 0] - lat_bnds_aux[:, :, 2] = self.lat_bnds['data'][:, np.newaxis, 1] - lat_bnds_aux[:, :, 3] = self.lat_bnds['data'][:, np.newaxis, 1] - - lat_bnds = lat_bnds_aux - del lat_bnds_aux - - aux_b_lats = lat_bnds.reshape((lat_bnds.shape[0] * lat_bnds.shape[1], lat_bnds.shape[2])) - aux_b_lons = lon_bnds.reshape((lon_bnds.shape[0] * lon_bnds.shape[1], lon_bnds.shape[2])) - - # Create dataframe cointaining all polygons - geometry = [] - for i in range(aux_b_lons.shape[0]): - geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), - (aux_b_lons[i, 1], aux_b_lats[i, 1]), - (aux_b_lons[i, 2], aux_b_lats[i, 2]), - (aux_b_lons[i, 3], aux_b_lats[i, 3]), - (aux_b_lons[i, 0], aux_b_lats[i, 0])])) - fids = np.arange(len(self._lat['data']) * len(self._lon['data'])) - fids = fids.reshape((len(self._lat['data']), len(self._lon['data']))) - fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], - self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] - gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), - geometry=geometry, - crs="EPSG:4326") - self.shapefile = gdf - + if self._lat_bnds is None or self._lon_bnds is None: + self.create_spatial_bounds() + + # Reshape arrays to create geometry + aux_shape = (self.lat_bnds['data'].shape[0], self.lon_bnds['data'].shape[0], 4) + lon_bnds_aux = np.empty(aux_shape) + lon_bnds_aux[:, :, 0] = self.lon_bnds['data'][np.newaxis, :, 0] + lon_bnds_aux[:, :, 1] = self.lon_bnds['data'][np.newaxis, :, 1] + lon_bnds_aux[:, :, 2] = self.lon_bnds['data'][np.newaxis, :, 1] + lon_bnds_aux[:, :, 3] = self.lon_bnds['data'][np.newaxis, :, 0] + + lon_bnds = lon_bnds_aux + del lon_bnds_aux + + lat_bnds_aux = np.empty(aux_shape) + lat_bnds_aux[:, :, 0] = self.lat_bnds['data'][:, np.newaxis, 0] + lat_bnds_aux[:, :, 1] = self.lat_bnds['data'][:, np.newaxis, 0] + lat_bnds_aux[:, :, 2] = self.lat_bnds['data'][:, np.newaxis, 1] + lat_bnds_aux[:, :, 3] = self.lat_bnds['data'][:, np.newaxis, 1] + + lat_bnds = lat_bnds_aux + del lat_bnds_aux + + aux_b_lats = lat_bnds.reshape((lat_bnds.shape[0] * lat_bnds.shape[1], lat_bnds.shape[2])) + aux_b_lons = lon_bnds.reshape((lon_bnds.shape[0] * lon_bnds.shape[1], lon_bnds.shape[2])) + + # Create dataframe cointaining all polygons + geometry = [] + for i in range(aux_b_lons.shape[0]): + geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), + (aux_b_lons[i, 1], aux_b_lats[i, 1]), + (aux_b_lons[i, 2], aux_b_lats[i, 2]), + (aux_b_lons[i, 3], aux_b_lats[i, 3]), + (aux_b_lons[i, 0], aux_b_lats[i, 0])])) + fids = np.arange(len(self._lat['data']) * len(self._lon['data'])) + fids = fids.reshape((len(self._lat['data']), len(self._lon['data']))) + fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] + gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), + geometry=geometry, + crs="EPSG:4326") + self.shapefile = gdf + + else: + gdf = self.shapefile + return gdf def write_shapefile(self, path): @@ -2873,6 +2953,7 @@ class Nes(object): if lev is None: msg = 'No vertical level has been specified. The first one will be selected.' warnings.warn(msg) + sys.stderr.flush() idx_lev = 0 else: if lev not in self.lev['data']: @@ -2883,6 +2964,7 @@ class Nes(object): if time is None: msg = 'No time has been specified. The first one will be selected.' warnings.warn(msg) + sys.stderr.flush() idx_time = 0 else: if time not in self.time: @@ -2917,132 +2999,6 @@ class Nes(object): return None - def spatial_join(self, ext_shp, method=None, var_list=None, info=False): - """ - Compute overlay intersection of two GeoPandasDataFrames. - - Parameters - ---------- - ext_shp : GeoPandasDataFrame or str - File or path from where the data will be obtained on the intersection. - method : str - Overlay method. Accepted values: ['nearest', 'intersection', 'centroid']. - var_list : List or None - Variables that will be included in the resulting shapefile. - info : bool - Indicates if you want to print the process info or no - """ - if isinstance(ext_shp, str): - ext_shp = gpd.read_file(ext_shp) - - # Create shapefile if it does not exist - if self.shapefile is None: - msg = 'Shapefile does not exist. It will be created now.' - warnings.warn(msg) - grid_shp = self.create_shapefile() - else: - grid_shp = self.shapefile - grid_shp = copy(grid_shp) - - # Get variables of interest from shapefile - if var_list is not None: - if isinstance(var_list, str): - var_list = [var_list] - ext_shp = ext_shp.loc[:, var_list + ['geometry']] - else: - var_list = ext_shp.columns.to_list() - var_list.remove('geometry') - - ext_shp = ext_shp.to_crs(grid_shp.crs) - - # Nearest centroids to the shapefile polygons - if method == 'nearest': - if self.master and info: - print("Nearest spatial joint") - # Get centroids - centroids_gdf = self.get_centroids_from_coordinates() - - # From geodetic coordinates (e.g. 4326) to meters (e.g. 4328) to use sjoin_nearest - # TODO: Check if the projection 4328 does not distort the coordinates too much - # https://gis.stackexchange.com/questions/372564/ - # userwarning-when-trying-to-get-centroid-from-a-polygon-geopandas - ext_shp = ext_shp.to_crs('EPSG:4328') - centroids_gdf = centroids_gdf.to_crs('EPSG:4328') - - # Calculate spatial joint by distance - aux_grid = gpd.sjoin_nearest(centroids_gdf, ext_shp, distance_col='distance') - - # Get data from closest shapes to centroids - del aux_grid['geometry'], aux_grid['index_right'] - self.shapefile.loc[aux_grid.index, aux_grid.columns] = aux_grid - - # Intersect the areas of the shapefile polygons, outside the shapefile there will be NaN - elif method == 'intersection': - if self.master and info: - print("Intersection spatial joint") - grid_shp['FID_grid'] = grid_shp.index - grid_shp = grid_shp.reset_index() - # Get intersected areas - # inp, res = shapefile.sindex.query_bulk(self.shapefile.geometry, predicate='intersects') - inp, res = grid_shp.sindex.query_bulk(ext_shp.geometry, predicate='intersects') - - if self.master and info: - print('Rank {0:03d}: {1} intersected areas found'.format(self.rank, len(inp))) - - # Calculate intersected areas and fractions - intersection = pd.DataFrame() - intersection['FID'] = np.array(grid_shp.loc[res, 'FID_grid'], dtype=np.uint32) - intersection['ext_shp_id'] = np.array(inp, dtype=np.uint32) - - # intersection['geometry_ext'] = ext_shp.loc[inp, 'geometry'].values - # intersection['geometry_grid'] = grid_shp.loc[res, 'geometry'].values - if True: - # No Warnings Zone - counts = intersection['FID'].value_counts() - warnings.filterwarnings('ignore') - - intersection.loc[:, 'weight'] = 1. - for i, row in intersection.iterrows(): - if isinstance(i, int) and i % 1000 == 0 and self.master and info: - print('\tRank {0:03d}: {1:.3f} %'.format(self.rank, i*100 / len(intersection))) - # Filter to do not calculate percentages over 100% grid cells spatial joint - if counts[row['FID']] > 1: - intersection.loc[i, 'weight'] = grid_shp.loc[res[i], 'geometry'].intersection( - ext_shp.loc[inp[i], 'geometry']).area / grid_shp.loc[res[i], 'geometry'].area - # intersection['intersect_area'] = intersection.apply( - # lambda x: x['geometry_grid'].intersection(x['geometry_ext']).area, axis=1) - intersection.drop(intersection[intersection['weight'] <= 0].index, inplace=True) - - warnings.filterwarnings('default') - - # Choose the biggest area from intersected areas with multiple options - intersection.sort_values('weight', ascending=False, inplace=True) - intersection = intersection.drop_duplicates(subset='FID', keep="first") - intersection = intersection.sort_values('FID').set_index('FID') - - for var_name in var_list: - self.shapefile.loc[intersection.index, var_name] = np.array( - ext_shp.loc[intersection['ext_shp_id'], var_name]) - - # Centroids that fall on the shapefile polygons, outside the shapefile there will be NaN - elif method == 'centroid': - - # Get centroids - centroids_gdf = self.get_centroids_from_coordinates() - - # Calculate spatial joint - aux_grid = gpd.sjoin(centroids_gdf, ext_shp, predicate='within') - - # Get data from shapes where there are centroids, rest will be NaN - del aux_grid['geometry'], aux_grid['index_right'] - self.shapefile.loc[aux_grid.index, aux_grid.columns] = aux_grid - - else: - accepted_values = ['nearest', 'intersection', 'centroid'] - raise NotImplementedError('{0} is not implemented. Choose from: {1}'.format(method, accepted_values)) - - return None - def get_centroids_from_coordinates(self): """ Get centroids from geographical coordinates. @@ -3316,6 +3272,24 @@ class Nes(object): self, dst_grid, weight_matrix_path=weight_matrix_path, kind=kind, n_neighbours=n_neighbours, info=info, to_providentia=to_providentia, only_create_wm=only_create_wm, wm=wm) + def spatial_join(self, ext_shp, method=None, var_list=None, info=False): + """ + Compute overlay intersection of two GeoPandasDataFrames. + + Parameters + ---------- + ext_shp : GeoPandasDataFrame or str + File or path from where the data will be obtained on the intersection. + method : str + Overlay method. Accepted values: ['nearest', 'intersection', 'centroid']. + var_list : List or None + Variables that will be included in the resulting shapefile. + info : bool + Indicates if you want to print the process info or no + """ + + return spatial_join(self, ext_shp=ext_shp, method=method, var_list=var_list, info=info) + def calculate_grid_area(self): """ Get coordinate bounds and call function to calculate the area of each cell of a grid. @@ -3326,8 +3300,13 @@ class Nes(object): Source projection Nes Object. """ - grid_area = cell_measures.calculate_grid_area(self) - + if 'cell_area' not in self.cell_measures.keys(): + grid_area = cell_measures.calculate_grid_area(self) + self.cell_measures['cell_area'] = {'data': grid_area.reshape([self.lon_bnds['data'].shape[0], + self.lon_bnds['data'].shape[1]])} + else: + grid_area = self.cell_measures['cell_area']['data'] + return grid_area @staticmethod diff --git a/nes/nc_projections/latlon_nes.py b/nes/nc_projections/latlon_nes.py index cfcb11dae3619e336abbc0978e18ede3be9bfd2f..b044d984c227d7493607e9524735bf0b245f4db0 100644 --- a/nes/nc_projections/latlon_nes.py +++ b/nes/nc_projections/latlon_nes.py @@ -272,14 +272,3 @@ class LatLonNes(Nes): """ return super(LatLonNes, self).to_grib2(path, grib_keys, grib_template_path, lat_flip=lat_flip, info=info) - - def calculate_grid_area(self): - """ - Get coordinate bounds and call function to calculate the area of each cell of a grid. - - """ - grid_area = super().calculate_grid_area() - self.cell_measures['cell_area'] = {'data': grid_area.reshape([self.lon_bnds['data'].shape[0], - self.lon_bnds['data'].shape[1]])} - - return None diff --git a/nes/nc_projections/lcc_nes.py b/nes/nc_projections/lcc_nes.py index 585acbe11a338e65a382ddb1a3d2c544d4325725..b5a679e8da977eb675bc79c586faa29dcc4112f7 100644 --- a/nes/nc_projections/lcc_nes.py +++ b/nes/nc_projections/lcc_nes.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import warnings +import sys import numpy as np import pandas as pd from cfunits import Units @@ -168,6 +169,7 @@ class LCCNes(Nes): else: msg = 'There is no variable called Lambert_conformal, projection has not been defined.' warnings.warn(msg) + sys.stderr.flush() projection_data = None return projection_data @@ -465,35 +467,40 @@ class LCCNes(Nes): Shapefile dataframe. """ - # Get latitude and longitude cell boundaries - if self._lat_bnds is None or self._lon_bnds is None: - self.create_spatial_bounds() - - # Reshape arrays to create geometry - aux_b_lats = self.lat_bnds['data'].reshape((self.lat_bnds['data'].shape[0] * self.lat_bnds['data'].shape[1], - self.lat_bnds['data'].shape[2])) - aux_b_lons = self.lon_bnds['data'].reshape((self.lon_bnds['data'].shape[0] * self.lon_bnds['data'].shape[1], - self.lon_bnds['data'].shape[2])) - - # Get polygons from bounds - geometry = [] - for i in range(aux_b_lons.shape[0]): - geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), - (aux_b_lons[i, 1], aux_b_lats[i, 1]), - (aux_b_lons[i, 2], aux_b_lats[i, 2]), - (aux_b_lons[i, 3], aux_b_lats[i, 3]), - (aux_b_lons[i, 0], aux_b_lats[i, 0])])) - - # Create dataframe cointaining all polygons - fids = np.arange(self._lat['data'].shape[0] * self._lat['data'].shape[1]) - fids = fids.reshape((self._lat['data'].shape[0], self._lat['data'].shape[1])) - fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], - self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] - gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), - geometry=geometry, - crs="EPSG:4326") - self.shapefile = gdf + if self.shapefile is None: + + # Get latitude and longitude cell boundaries + if self._lat_bnds is None or self._lon_bnds is None: + self.create_spatial_bounds() + + # Reshape arrays to create geometry + aux_b_lats = self.lat_bnds['data'].reshape((self.lat_bnds['data'].shape[0] * self.lat_bnds['data'].shape[1], + self.lat_bnds['data'].shape[2])) + aux_b_lons = self.lon_bnds['data'].reshape((self.lon_bnds['data'].shape[0] * self.lon_bnds['data'].shape[1], + self.lon_bnds['data'].shape[2])) + + # Get polygons from bounds + geometry = [] + for i in range(aux_b_lons.shape[0]): + geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), + (aux_b_lons[i, 1], aux_b_lats[i, 1]), + (aux_b_lons[i, 2], aux_b_lats[i, 2]), + (aux_b_lons[i, 3], aux_b_lats[i, 3]), + (aux_b_lons[i, 0], aux_b_lats[i, 0])])) + + # Create dataframe cointaining all polygons + fids = np.arange(self._lat['data'].shape[0] * self._lat['data'].shape[1]) + fids = fids.reshape((self._lat['data'].shape[0], self._lat['data'].shape[1])) + fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] + gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), + geometry=geometry, + crs="EPSG:4326") + self.shapefile = gdf + else: + gdf = self.shapefile + return gdf def get_centroids_from_coordinates(self): @@ -524,12 +531,3 @@ class LCCNes(Nes): return centroids_gdf - def calculate_grid_area(self): - """ - Get coordinate bounds and call function to calculate the area of each cell of a grid. - """ - grid_area = super(LCCNes, self).calculate_grid_area() - self.cell_measures['cell_area'] = {'data': grid_area.reshape([self.lon_bnds['data'].shape[0], - self.lon_bnds['data'].shape[1]])} - - return None diff --git a/nes/nc_projections/mercator_nes.py b/nes/nc_projections/mercator_nes.py index e6ccf187b683ff7d2f17db3cd7657545b024ace7..b752e8d575454b54e43d10caced0949c0219a344 100644 --- a/nes/nc_projections/mercator_nes.py +++ b/nes/nc_projections/mercator_nes.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import warnings +import sys import numpy as np import pandas as pd from cfunits import Units @@ -164,6 +165,7 @@ class MercatorNes(Nes): else: msg = 'There is no variable called mercator, projection has not been defined.' warnings.warn(msg) + sys.stderr.flush() projection_data = None return projection_data @@ -441,35 +443,40 @@ class MercatorNes(Nes): Shapefile dataframe. """ - # Get latitude and longitude cell boundaries - if self._lat_bnds is None or self._lon_bnds is None: - self.create_spatial_bounds() - - # Reshape arrays to create geometry - aux_b_lats = self.lat_bnds['data'].reshape((self.lat_bnds['data'].shape[0] * self.lat_bnds['data'].shape[1], - self.lat_bnds['data'].shape[2])) - aux_b_lons = self.lon_bnds['data'].reshape((self.lon_bnds['data'].shape[0] * self.lon_bnds['data'].shape[1], - self.lon_bnds['data'].shape[2])) - - # Get polygons from bounds - geometry = [] - for i in range(aux_b_lons.shape[0]): - geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), - (aux_b_lons[i, 1], aux_b_lats[i, 1]), - (aux_b_lons[i, 2], aux_b_lats[i, 2]), - (aux_b_lons[i, 3], aux_b_lats[i, 3]), - (aux_b_lons[i, 0], aux_b_lats[i, 0])])) - - # Create dataframe cointaining all polygons - fids = np.arange(self._lat['data'].shape[0] * self._lat['data'].shape[1]) - fids = fids.reshape((self._lat['data'].shape[0], self._lat['data'].shape[1])) - fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], - self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] - gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), - geometry=geometry, - crs="EPSG:4326") - self.shapefile = gdf + if self.shapefile is None: + + # Get latitude and longitude cell boundaries + if self._lat_bnds is None or self._lon_bnds is None: + self.create_spatial_bounds() + + # Reshape arrays to create geometry + aux_b_lats = self.lat_bnds['data'].reshape((self.lat_bnds['data'].shape[0] * self.lat_bnds['data'].shape[1], + self.lat_bnds['data'].shape[2])) + aux_b_lons = self.lon_bnds['data'].reshape((self.lon_bnds['data'].shape[0] * self.lon_bnds['data'].shape[1], + self.lon_bnds['data'].shape[2])) + + # Get polygons from bounds + geometry = [] + for i in range(aux_b_lons.shape[0]): + geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), + (aux_b_lons[i, 1], aux_b_lats[i, 1]), + (aux_b_lons[i, 2], aux_b_lats[i, 2]), + (aux_b_lons[i, 3], aux_b_lats[i, 3]), + (aux_b_lons[i, 0], aux_b_lats[i, 0])])) + + # Create dataframe cointaining all polygons + fids = np.arange(self._lat['data'].shape[0] * self._lat['data'].shape[1]) + fids = fids.reshape((self._lat['data'].shape[0], self._lat['data'].shape[1])) + fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] + gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), + geometry=geometry, + crs="EPSG:4326") + self.shapefile = gdf + else: + gdf = self.shapefile + return gdf def get_centroids_from_coordinates(self): @@ -499,14 +506,3 @@ class MercatorNes(Nes): crs="EPSG:4326") return centroids_gdf - - def calculate_grid_area(self): - """ - Get coordinate bounds and call function to calculate the area of each cell of a grid. - """ - - grid_area = super(MercatorNes, self).calculate_grid_area() - self.cell_measures['cell_area'] = {'data': grid_area.reshape([self.lon_bnds['data'].shape[0], - self.lon_bnds['data'].shape[1]])} - - return None diff --git a/nes/nc_projections/points_nes.py b/nes/nc_projections/points_nes.py index 3d55257ba70994ba7cf1fc61b90ace425da1dfe2..756fe3c67a5f0c691b6a1fd01f32789752a7ee86 100644 --- a/nes/nc_projections/points_nes.py +++ b/nes/nc_projections/points_nes.py @@ -7,7 +7,6 @@ import pandas as pd from copy import deepcopy import geopandas as gpd from netCDF4 import date2num, stringtochar -from numpy.ma.core import MaskError from .default_nes import Nes @@ -31,7 +30,7 @@ class PointsNes(Nes): """ def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='X', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, - times=None, strlen=75, **kwargs): + times=None, **kwargs): """ Initialize the PointsNes class. @@ -68,10 +67,6 @@ class PointsNes(Nes): # Dimensions screening self.lat = self._get_coordinate_values(self._lat, 'X') self.lon = self._get_coordinate_values(self._lon, 'X') - self.strlen = strlen - else: - # Dimensions screening - self.strlen = self._get_strlen() # Complete dimensions self._station = {'data': np.arange(len(self._lon['data']))} @@ -311,10 +306,9 @@ class PointsNes(Nes): var_name)) # Missing to nan - try: - data[data.mask == True] = np.nan - except (AttributeError, MaskError, ValueError): - pass + if np.ma.is_masked(data): + # This operation is done because sometimes the missing value is lost during the calculation + data[data.mask] = np.nan return data @@ -343,6 +337,7 @@ class PointsNes(Nes): msg += "Input dtype={0}. Data dtype={1}.".format(var_dtype, var_dict['data'].dtype) warnings.warn(msg) + sys.stderr.flush() try: var_dict['data'] = var_dict['data'].astype(var_dtype) except Exception as e: # TODO: Detect exception @@ -486,6 +481,7 @@ class PointsNes(Nes): msg = "WARNING!!! " msg += "Variable {0} was not loaded. It will not be written.".format(var_name) warnings.warn(msg) + sys.stderr.flush() return None @@ -667,10 +663,15 @@ class PointsNes(Nes): Shapefile dataframe. """ - # Create dataframe cointaining all points - gdf = self.get_centroids_from_coordinates() - self.shapefile = gdf + if self.shapefile is None: + # Create dataframe cointaining all points + gdf = self.get_centroids_from_coordinates() + self.shapefile = gdf + + else: + gdf = self.shapefile + return gdf def get_centroids_from_coordinates(self): diff --git a/nes/nc_projections/points_nes_ghost.py b/nes/nc_projections/points_nes_ghost.py index 978f4d5447d8f51f719517d3db6f7970ac06d65a..11c47ad17938d08d935bf7432471d58893bf0c25 100644 --- a/nes/nc_projections/points_nes_ghost.py +++ b/nes/nc_projections/points_nes_ghost.py @@ -3,7 +3,6 @@ import sys import warnings import numpy as np -from numpy.ma.core import MaskError from netCDF4 import stringtochar, date2num from copy import deepcopy from .points_nes import PointsNes @@ -305,10 +304,9 @@ class PointsNesGHOST(PointsNes): var_name)) # Missing to nan - try: - data[data.mask == True] = np.nan - except (AttributeError, MaskError, ValueError): - pass + if np.ma.is_masked(data): + # This operation is done because sometimes the missing value is lost during the calculation + data[data.mask] = np.nan return data @@ -341,7 +339,7 @@ class PointsNesGHOST(PointsNes): else: var_dtype = var_dict['data'].dtype - # Get dimensions + # Get dimensions if len(var_dict['data'].shape) == 1: # Metadata var_dims = self._var_dim @@ -478,6 +476,7 @@ class PointsNesGHOST(PointsNes): msg = 'WARNING!!! ' msg += 'Variable {0} was not loaded. It will not be written.'.format(var_name) warnings.warn(msg) + sys.stderr.flush() return None @@ -603,6 +602,7 @@ class PointsNesGHOST(PointsNes): msg += 'GHOST datasets cannot be written in parallel yet. ' msg += 'Changing to serial mode.' warnings.warn(msg) + sys.stderr.flush() super(PointsNesGHOST, self).to_netcdf(path, compression_level=compression_level, serial=True, info=info, chunking=chunking) diff --git a/nes/nc_projections/points_nes_providentia.py b/nes/nc_projections/points_nes_providentia.py index 91229da8caf91f50b1160e0bc88b477389d281d6..84c6be26f0cedd7213691537bd663c8bbdc16287 100644 --- a/nes/nc_projections/points_nes_providentia.py +++ b/nes/nc_projections/points_nes_providentia.py @@ -4,7 +4,6 @@ import sys import warnings import numpy as np from copy import deepcopy -from numpy.ma.core import MaskError from netCDF4 import stringtochar from .points_nes import PointsNes @@ -111,7 +110,7 @@ class PointsNesProvidentia(PointsNes): self.grid_edge_lat = self._get_coordinate_values(self._grid_edge_lat, '') # Set strlen to be None (avoid default strlen inherited from points) - self.strlen = None + self.set_strlen(None) @staticmethod def new(comm=None, path=None, info=False, dataset=None, xarray=False, create_nes=False, balanced=False, @@ -341,10 +340,9 @@ class PointsNesProvidentia(PointsNes): var_name)) # Missing to nan - try: - data[data.mask == True] = np.nan - except (AttributeError, MaskError, ValueError): - pass + if np.ma.is_masked(data): + # This operation is done because sometimes the missing value is lost during the calculation + data[data.mask] = np.nan return data @@ -514,6 +512,7 @@ class PointsNesProvidentia(PointsNes): msg = 'WARNING!!! ' msg += 'Variable {0} was not loaded. It will not be written.'.format(var_name) warnings.warn(msg) + sys.stderr.flush() return None @@ -605,6 +604,7 @@ class PointsNesProvidentia(PointsNes): msg += 'Providentia datasets cannot be written in parallel yet. ' msg += 'Changing to serial mode.' warnings.warn(msg) + sys.stderr.flush() super(PointsNesProvidentia, self).to_netcdf(path, compression_level=compression_level, serial=True, info=info, chunking=chunking) diff --git a/nes/nc_projections/rotated_nes.py b/nes/nc_projections/rotated_nes.py index ac66c76a1825e466119da770def6cff7f33ac414..66dc2b7b5dbf681cf5a318f107ec44abf510c7df 100644 --- a/nes/nc_projections/rotated_nes.py +++ b/nes/nc_projections/rotated_nes.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import warnings +import sys import numpy as np import pandas as pd import math @@ -164,6 +165,7 @@ class RotatedNes(Nes): else: msg = 'There is no variable called rotated_pole, projection has not been defined.' warnings.warn(msg) + sys.stderr.flush() projection_data = None return projection_data @@ -492,33 +494,38 @@ class RotatedNes(Nes): Shapefile dataframe. """ - if self._lat_bnds is None or self._lon_bnds is None: - self.create_spatial_bounds() - - # Reshape arrays to create geometry - aux_b_lats = self.lat_bnds['data'].reshape((self.lat_bnds['data'].shape[0] * self.lat_bnds['data'].shape[1], - self.lat_bnds['data'].shape[2])) - aux_b_lons = self.lon_bnds['data'].reshape((self.lon_bnds['data'].shape[0] * self.lon_bnds['data'].shape[1], - self.lon_bnds['data'].shape[2])) + if self.shapefile is None: + + if self._lat_bnds is None or self._lon_bnds is None: + self.create_spatial_bounds() + + # Reshape arrays to create geometry + aux_b_lats = self.lat_bnds['data'].reshape((self.lat_bnds['data'].shape[0] * self.lat_bnds['data'].shape[1], + self.lat_bnds['data'].shape[2])) + aux_b_lons = self.lon_bnds['data'].reshape((self.lon_bnds['data'].shape[0] * self.lon_bnds['data'].shape[1], + self.lon_bnds['data'].shape[2])) + + # Get polygons from bounds + geometry = [] + for i in range(aux_b_lons.shape[0]): + geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), + (aux_b_lons[i, 1], aux_b_lats[i, 1]), + (aux_b_lons[i, 2], aux_b_lats[i, 2]), + (aux_b_lons[i, 3], aux_b_lats[i, 3]), + (aux_b_lons[i, 0], aux_b_lats[i, 0])])) - # Get polygons from bounds - geometry = [] - for i in range(aux_b_lons.shape[0]): - geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), - (aux_b_lons[i, 1], aux_b_lats[i, 1]), - (aux_b_lons[i, 2], aux_b_lats[i, 2]), - (aux_b_lons[i, 3], aux_b_lats[i, 3]), - (aux_b_lons[i, 0], aux_b_lats[i, 0])])) - - # Create dataframe cointaining all polygons - fids = np.arange(self._lat['data'].shape[0] * self._lat['data'].shape[1]) - fids = fids.reshape((self._lat['data'].shape[0], self._lat['data'].shape[1])) - fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], - self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] - gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), - geometry=geometry, - crs="EPSG:4326") - self.shapefile = gdf + # Create dataframe cointaining all polygons + fids = np.arange(self._lat['data'].shape[0] * self._lat['data'].shape[1]) + fids = fids.reshape((self._lat['data'].shape[0], self._lat['data'].shape[1])) + fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] + gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), + geometry=geometry, + crs="EPSG:4326") + self.shapefile = gdf + + else: + gdf = self.shapefile return gdf @@ -550,13 +557,3 @@ class RotatedNes(Nes): return centroids_gdf - def calculate_grid_area(self): - """ - Get coordinate bounds and call function to calculate the area of each cell of a grid. - """ - - grid_area = super(RotatedNes, self).calculate_grid_area() - self.cell_measures['cell_area'] = {'data': grid_area.reshape([self.lon_bnds['data'].shape[0], - self.lon_bnds['data'].shape[1]])} - - return None diff --git a/nes/nes_formats/cams_ra_format.py b/nes/nes_formats/cams_ra_format.py index a286d5dc0a5237d9c12854afd3ed134edd29e58c..d5b8a6b47ef36e97886603154a5c094b243a172e 100644 --- a/nes/nes_formats/cams_ra_format.py +++ b/nes/nes_formats/cams_ra_format.py @@ -187,6 +187,7 @@ def create_variables(self, netcdf, i_lev): msg = 'WARNING!!! ' msg += 'Variable {0} was not loaded. It will not be written.'.format(var_name) warnings.warn(msg) + sys.stderr.flush() return None diff --git a/tests/2.1-test_spatial_join.py b/tests/2.1-test_spatial_join.py index ede7851c737d6b591f9187d393cbccdb820c36e6..98a482df6e61b8b25861aa412cb4b9dd1e6a4aed 100644 --- a/tests/2.1-test_spatial_join.py +++ b/tests/2.1-test_spatial_join.py @@ -12,6 +12,7 @@ rank = comm.Get_rank() size = comm.Get_size() parallel_method = 'Y' +serial_write = False result_path = "Times_test_2.1_spatial_join_{0}_{1:03d}.csv".format(parallel_method, size) result = pd.DataFrame(index=['read', 'calculate', 'write'], @@ -20,11 +21,20 @@ result = pd.DataFrame(index=['read', 'calculate', 'write'], '2.1.5.Existing_file_intersection', '2.1.6.New_file_intersection']) # ===== PATH TO MASK ===== # -shapefile_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/timezones_2021c/timezones_2021c.shp' +# Timezones +# shapefile_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/timezones_2021c/timezones_2021c.shp' +# shapefile_var_list = ['tzid'] +# str_len = 32 +# Country ISO codes +shapefile_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/gadm_country_mask/gadm_country_ISO3166.shp" +shapefile_var_list = ['ISO'] +str_len = 3 # Original path: /gpfs/scratch/bsc32/bsc32538/original_files/franco_interp.nc # Regular lat-lon grid from MONARCH original_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/franco_interp.nc' +# CAMS_Global +# original_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/nox_no_201505.nc" # ====================================================================================================================== # =================================== CENTROID EXISTING FILE =================================================== @@ -37,25 +47,27 @@ if rank == 0: # READ st_time = timeit.default_timer() nessy = open_netcdf(original_path, parallel_method=parallel_method) +nessy.variables = {} +nessy.create_shapefile() comm.Barrier() result.loc['read', test_name] = timeit.default_timer() - st_time # SPATIAL JOIN # Method can be centroid, nearest and intersection st_time = timeit.default_timer() -nessy.spatial_join(shapefile_path, method='centroid') +nessy.spatial_join(shapefile_path, method='centroid', var_list=shapefile_var_list, info=True) comm.Barrier() result.loc['calculate', test_name] = timeit.default_timer() - st_time -print('SPATIAL JOIN - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) - -# ADD TIMEZONES -timezones = nessy.shapefile['tzid'].values.reshape([nessy.lat['data'].shape[0], - nessy.lon['data'].shape[-1]]) -nessy.variables['tz'] = {'data': timezones,} -# WRITE st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) + +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) +comm.Barrier() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write) comm.Barrier() result.loc['write', test_name] = timeit.default_timer() - st_time @@ -63,6 +75,13 @@ result.loc['write', test_name] = timeit.default_timer() - st_time nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) nessy.load() +# REWRITE +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}_2.nc".format(size), serial=serial_write) + +# REOPEN +nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}_2.nc".format(size), parallel_method=parallel_method) +nessy.load() + comm.Barrier() if rank == 0: print(result.loc[:, test_name]) @@ -96,14 +115,15 @@ comm.Barrier() result.loc['calculate', test_name] = timeit.default_timer() - st_time print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) -# ADD TIMEZONES -timezones = nessy.shapefile['tzid'].values.reshape([nessy.lat['data'].shape[0], - nessy.lon['data'].shape[-1]]) -nessy.variables['tz'] = {'data': timezones,} +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) # WRITE st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) comm.Barrier() result.loc['write', test_name] = timeit.default_timer() - st_time @@ -127,25 +147,28 @@ if rank == 0: # READ st_time = timeit.default_timer() nessy = open_netcdf(original_path, parallel_method=parallel_method) +nessy.variables = {} +nessy.create_shapefile() comm.Barrier() result.loc['read', test_name] = timeit.default_timer() - st_time # SPATIAL JOIN # Method can be centroid, nearest and intersection st_time = timeit.default_timer() -nessy.spatial_join(shapefile_path, method='nearest') +nessy.spatial_join(shapefile_path, method='nearest', var_list=shapefile_var_list, info=True) comm.Barrier() result.loc['calculate', test_name] = timeit.default_timer() - st_time print('SPATIAL JOIN - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) -# ADD TIMEZONES -timezones = nessy.shapefile['tzid'].values.reshape([nessy.lat['data'].shape[0], - nessy.lon['data'].shape[-1]]) -nessy.variables['tz'] = {'data': timezones,} +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) # WRITE st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) comm.Barrier() result.loc['write', test_name] = timeit.default_timer() - st_time @@ -186,14 +209,15 @@ comm.Barrier() result.loc['calculate', test_name] = timeit.default_timer() - st_time print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) -# ADD TIMEZONES -timezones = nessy.shapefile['tzid'].values.reshape([nessy.lat['data'].shape[0], - nessy.lon['data'].shape[-1]]) -nessy.variables['tz'] = {'data': timezones,} +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) # WRITE st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) comm.Barrier() result.loc['write', test_name] = timeit.default_timer() - st_time @@ -214,29 +238,33 @@ sys.stdout.flush() test_name = '2.1.5.Existing_file_intersection' if rank == 0: print(test_name) - + # READ st_time = timeit.default_timer() nessy = open_netcdf(original_path, parallel_method=parallel_method) +nessy.variables = {} +nessy.create_shapefile() comm.Barrier() result.loc['read', test_name] = timeit.default_timer() - st_time # SPATIAL JOIN # Method can be centroid, nearest and intersection st_time = timeit.default_timer() -nessy.spatial_join(shapefile_path, method='intersection') +nessy.spatial_join(shapefile_path, method='intersection', var_list=shapefile_var_list, info=True) comm.Barrier() result.loc['calculate', test_name] = timeit.default_timer() - st_time print('SPATIAL JOIN - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) -# ADD TIMEZONES -timezones = nessy.shapefile['tzid'].values.reshape([nessy.lat['data'].shape[0], - nessy.lon['data'].shape[-1]]) -nessy.variables['tz'] = {'data': timezones,} +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) # WRITE st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +nessy.set_strlen(strlen=str_len) +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) comm.Barrier() result.loc['write', test_name] = timeit.default_timer() - st_time @@ -249,7 +277,6 @@ if rank == 0: print(result.loc[:, test_name]) sys.stdout.flush() - # ====================================================================================================================== # =================================== INTERSECTION FROM NEW FILE =================================================== # ====================================================================================================================== @@ -278,14 +305,14 @@ comm.Barrier() result.loc['calculate', test_name] = timeit.default_timer() - st_time print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) -# ADD TIMEZONES -timezones = nessy.shapefile['tzid'].values.reshape([nessy.lat['data'].shape[0], - nessy.lon['data'].shape[-1]]) -nessy.variables['tz'] = {'data': timezones,} - +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) # WRITE st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) comm.Barrier() result.loc['write', test_name] = timeit.default_timer() - st_time