From d65dc6e86f009b607daa7044bef6b50b40e208e2 Mon Sep 17 00:00:00 2001 From: Alba Vilanova Date: Wed, 8 Feb 2023 04:29:36 +0100 Subject: [PATCH 1/4] #40 Fix bug in intersection spatial join, correct bounds and remove centroid function from gpd --- CHANGELOG.md | 9 +- nes/nc_projections/default_nes.py | 102 +++++++++++++----- nes/nc_projections/lcc_nes.py | 51 +++++++-- nes/nc_projections/mercator_nes.py | 51 +++++++-- nes/nc_projections/points_nes.py | 34 ++++-- nes/nc_projections/rotated_nes.py | 53 +++++++-- ...v2.sh => run_scalability_tests_nord3v2.sh} | 2 +- 7 files changed, 241 insertions(+), 61 deletions(-) rename tests/{run_scallability_tests_nord3v2.sh => run_scalability_tests_nord3v2.sh} (95%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3317390..199cc1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,7 @@ # NES CHANGELOG -### 1.0.1 -* Release date: 2023/01/26 +### 1.1.0 +* Release date: 2023/02/07 * Changes and new features: * Improve Lat-Lon to Cartesian coordinates method (used in Providentia). * Function to_shapefile() to create shapefiles from a NES object without losing data from the original grid and being able to select the time and level. @@ -11,7 +11,10 @@ * Correct the dimensions of the resulting points datasets from any interpolation. * Amend the interpolation method to take into account the cases in which the distance among points equals zero. * Correct the way we retrieve the level positive value. - * Correct how to calculate the spatial bounds of LCC and Mercator grids. + * Correct how to calculate the spatial bounds of LCC and Mercator grids: the dimensions were flipped. + * Correct how to calculate the spatial bounds for all grids: use read axis limits instead of write axis limits. + * Calculate centroids from coordinates in the creation of shapefiles, instead of using the geopandas function 'centroid', that raises a warning for possible errors. + * Enable selection of variables on the creation of shapefiles ### 1.0.0 * Release date: 2022/11/24 diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 06c8ce7..351ef5f 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -13,7 +13,7 @@ from mpi4py import MPI from cfunits import Units from numpy.ma.core import MaskError import geopandas as gpd -from shapely.geometry import Polygon +from shapely.geometry import Polygon, Point from copy import deepcopy, copy import datetime from ..methods import vertical_interpolation @@ -306,6 +306,7 @@ class Nes(object): del self.lat_bnds del self._lon_bnds del self.lon_bnds + del self.shapefile except AttributeError: pass @@ -525,7 +526,7 @@ class Nes(object): self._lat_bnds = {} self._lat_bnds['data'] = deepcopy(lat_bnds) self.lat_bnds = {} - self.lat_bnds['data'] = lat_bnds[self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], + self.lat_bnds['data'] = lat_bnds[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], :] inc_lon = np.abs(np.mean(np.diff(self._lon['data']))) @@ -534,7 +535,7 @@ class Nes(object): self._lon_bnds = {} self._lon_bnds['data'] = deepcopy(lon_bnds) self.lon_bnds = {} - self.lon_bnds['data'] = lon_bnds[self.write_axis_limits['x_min']:self.write_axis_limits['x_max'], + self.lon_bnds['data'] = lon_bnds[self.read_axis_limits['x_min']:self.read_axis_limits['x_max'], :] return None @@ -976,10 +977,8 @@ class Nes(object): """ if self.balanced: - print("balance") return self.get_read_axis_limits_balanced() else: - print("unbalance") return self.get_read_axis_limits_unbalanced() def get_read_axis_limits_unbalanced(self): @@ -999,7 +998,6 @@ class Nes(object): 't_min': None, 't_max': None} idx = self.get_idx_intervals() - print("IDX", idx) if self.parallel_method == 'Y': y_len = idx['idx_y_max'] - idx['idx_y_min'] @@ -1066,7 +1064,7 @@ class Nes(object): else: self.last_level += 1 axis_limits['z_max'] = self.last_level - print("Rank {0:03d}: Axis limits: {1}".format(self.rank, axis_limits)) + return axis_limits def get_read_axis_limits_balanced(self): @@ -2368,6 +2366,11 @@ class Nes(object): def create_shapefile(self): """ Create spatial geodataframe (shapefile). + + Returns + ------- + shapefile : GeoPandasDataFrame + Shapefile dataframe. """ if self._lat_bnds is None or self._lon_bnds is None: @@ -2406,8 +2409,8 @@ class Nes(object): (aux_b_lons[i, 0], aux_b_lats[i, 0])])) fids = np.arange(len(self._lat['data']) * len(self._lon['data'])) fids = fids.reshape((len(self._lat['data']), len(self._lon['data']))) - fids = fids[self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], - self.write_axis_limits['x_min']:self.write_axis_limits['x_max']] + fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), geometry=geometry, crs="EPSG:4326") @@ -2534,48 +2537,66 @@ class Nes(object): method : str Overlay method. Accepted values: ['nearest', 'intersection', 'centroid']. var_list : list, None - + Variables that will be included in the resulting shapefile. """ if isinstance(shapefile, str): shapefile = gpd.read_file(shapefile) + # Create shapefile if it does not exist if self.shapefile is None: - msg = 'Warning: Shapefile does not exist. It will be created now.' + msg = 'Shapefile does not exist. It will be created now.' warnings.warn(msg) self.create_shapefile() + # Get variables of interest from shapefile + if var_list is not None: + if isinstance(var_list, str): + var_list = [var_list] + self.shapefile = self.shapefile.loc[:, var_list + ['geometry']] + # Nearest centroids to the shapefile polygons if method == 'nearest': - # Make copy - aux_grid = deepcopy(self.shapefile) - - # Get centroids of shapefile to shapefile - aux_grid.geometry = aux_grid.centroid - + # Get centroids + centroids_gdf = self.get_centroids_from_coordinates() + + # From geodetic coordinates (e.g. 4326) to meters (e.g. 4328) to use sjoin_nearest + # TODO: Check if the projection 4328 does not distort the coordinates too much + # https://gis.stackexchange.com/questions/372564/userwarning-when-trying-to-get-centroid-from-a-polygon-geopandas + shapefile = shapefile.set_crs('EPSG:4326').to_crs('EPSG:4328') + centroids_gdf = centroids_gdf.set_crs('EPSG:4326').to_crs('EPSG:4328') + # Calculate spatial joint by distance - aux_grid = gpd.sjoin_nearest(aux_grid, shapefile.to_crs(aux_grid.crs), distance_col='distance') + aux_grid = gpd.sjoin_nearest(centroids_gdf, shapefile, distance_col='distance') + # Change projection back into geodetic coordinates + aux_grid = aux_grid.to_crs('EPSG:4326') + # Get data from closest shapes to centroids del aux_grid['geometry'], aux_grid['index_right'] self.shapefile.loc[aux_grid.index, aux_grid.columns] = aux_grid # Intersect the areas of the shapefile polygons, outside of the shapefile there will be NaN elif method == 'intersection': - + # Get intersected areas inp, res = shapefile.sindex.query_bulk(self.shapefile.geometry, predicate='intersects') print('Rank {0:03d}: {1} intersected areas found'.format(self.rank, len(inp))) + # The function query_bulk does not use FID, but normal indices always starting at 0 + # Therefore the indices of the shapefile need to be corrected by adding the first FID index + inp += self.shapefile.geometry.index[0] + # Calculate intersected areas and fractions intersection = pd.concat([self.shapefile.geometry[inp].reset_index(), shapefile.geometry[res].reset_index()], axis=1, ignore_index=True) intersection.columns = (list(self.shapefile.geometry[inp].reset_index().columns) + list(shapefile.geometry[res].reset_index().rename(columns={'geometry': 'geometry_shapefile', - 'index': 'index_shapefile'}).columns)) + 'index': 'index_shapefile'}).columns)) intersection['area'] = intersection.apply(lambda x: x['geometry'].intersection(x['geometry_shapefile']).buffer(0).area, axis=1) + intersection = intersection.loc[intersection['area'] > 0] intersection['fraction'] = intersection.apply(lambda x: x['area'] / x['geometry'].area, axis=1) # Choose biggest area from intersected areas with multiple options @@ -2590,14 +2611,15 @@ class Nes(object): # Centroids that fall on the shapefile polygons, outside of the shapefile there will be NaN elif method == 'centroid': - # Make copy - aux_grid = deepcopy(self.shapefile) - - # Get centroids of shapefile to shapefile - aux_grid.geometry = aux_grid.centroid + # Get centroids + centroids_gdf = self.get_centroids_from_coordinates() + + # Make sure both datasets are in geodetic coordinates (e.g. 4326) + shapefile = shapefile.set_crs('EPSG:4326').to_crs('EPSG:4326') + centroids_gdf = centroids_gdf.set_crs('EPSG:4326').to_crs('EPSG:4326') # Calculate spatial joint - aux_grid = gpd.sjoin(aux_grid, shapefile.to_crs(aux_grid.crs)) + aux_grid = gpd.sjoin(centroids_gdf, shapefile) # Get data from shapes where there are centroids, rest will be NaN del aux_grid['geometry'], aux_grid['index_right'] @@ -2609,6 +2631,34 @@ class Nes(object): return None + def get_centroids_from_coordinates(self): + """ + Get centroids from geographical coordinates. + + Returns + ------- + centroids_gdf: GeoPandasDataFrame + Centroids dataframe. + """ + + # Get centroids from coordinates + centroids = [] + for lat_ind in range(0, len(self.lat['data'])): + for lon_ind in range(0, len(self.lon['data'])): + centroids.append(Point(self.lon['data'][lon_ind], + self.lat['data'][lat_ind])) + + # Create dataframe cointaining all points + fids = np.arange(len(self._lat['data']) * len(self._lon['data'])) + fids = fids.reshape((len(self._lat['data']), len(self._lon['data']))) + fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] + centroids_gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), + geometry=centroids, + crs="EPSG:4326") + + return centroids_gdf + def __gather_data_py_object(self): """ Gather all the variable data into the MPI rank 0 to perform a serial write. diff --git a/nes/nc_projections/lcc_nes.py b/nes/nc_projections/lcc_nes.py index 8ae1795..c3ddf34 100644 --- a/nes/nc_projections/lcc_nes.py +++ b/nes/nc_projections/lcc_nes.py @@ -7,7 +7,7 @@ from cfunits import Units from pyproj import Proj from copy import deepcopy import geopandas as gpd -from shapely.geometry import Polygon +from shapely.geometry import Polygon, Point from .default_nes import Nes @@ -391,15 +391,15 @@ class LCCNes(Nes): self._lat_bnds = {} self._lat_bnds['data'] = deepcopy(lat_bnds) self.lat_bnds = {} - self.lat_bnds['data'] = lat_bnds[self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], - self.write_axis_limits['x_min']:self.write_axis_limits['x_max'], + self.lat_bnds['data'] = lat_bnds[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max'], :] self._lon_bnds = {} self._lon_bnds['data'] = deepcopy(lon_bnds) self.lon_bnds = {} - self.lon_bnds['data'] = lon_bnds[self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], - self.write_axis_limits['x_min']:self.write_axis_limits['x_max'], + self.lon_bnds['data'] = lon_bnds[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max'], :] return None @@ -460,6 +460,11 @@ class LCCNes(Nes): def create_shapefile(self): """ Create spatial geodataframe (shapefile). + + Returns + ------- + shapefile : GeoPandasDataFrame + Shapefile dataframe. """ # Get latitude and longitude cell boundaries @@ -472,7 +477,7 @@ class LCCNes(Nes): aux_b_lons = self.lon_bnds['data'].reshape((self.lon_bnds['data'].shape[0] * self.lon_bnds['data'].shape[1], self.lon_bnds['data'].shape[2])) - # Create dataframe cointaining all polygons + # Get polygons from bounds geometry = [] for i in range(aux_b_lons.shape[0]): geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), @@ -480,13 +485,43 @@ class LCCNes(Nes): (aux_b_lons[i, 2], aux_b_lats[i, 2]), (aux_b_lons[i, 3], aux_b_lats[i, 3]), (aux_b_lons[i, 0], aux_b_lats[i, 0])])) + + # Create dataframe cointaining all polygons fids = np.arange(self._lat['data'].shape[0] * self._lat['data'].shape[1]) fids = fids.reshape((self._lat['data'].shape[0], self._lat['data'].shape[1])) - fids = fids[self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], - self.write_axis_limits['x_min']:self.write_axis_limits['x_max']] + fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), geometry=geometry, crs="EPSG:4326") self.shapefile = gdf return gdf + + def get_centroids_from_coordinates(self): + """ + Get centroids from geographical coordinates. + + Returns + ------- + centroids_gdf: GeoPandasDataFrame + Centroids dataframe. + """ + + # Get centroids from coordinates + centroids = [] + for lat_ind in range(0, self.lon['data'].shape[0]): + for lon_ind in range(0, self.lon['data'].shape[1]): + centroids.append(Point(self.lon['data'][lat_ind, lon_ind], + self.lat['data'][lat_ind, lon_ind])) + + # Create dataframe cointaining all points + fids = np.arange(self._lat['data'].shape[0] * self._lat['data'].shape[1]) + fids = fids.reshape((self._lat['data'].shape[0], self._lat['data'].shape[1])) + fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] + centroids_gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), + geometry=centroids, + crs="EPSG:4326") + + return centroids_gdf diff --git a/nes/nc_projections/mercator_nes.py b/nes/nc_projections/mercator_nes.py index 6acbf99..bbbd877 100644 --- a/nes/nc_projections/mercator_nes.py +++ b/nes/nc_projections/mercator_nes.py @@ -7,7 +7,7 @@ from cfunits import Units from pyproj import Proj from copy import deepcopy import geopandas as gpd -from shapely.geometry import Polygon +from shapely.geometry import Polygon, Point from nes.nc_projections.default_nes import Nes @@ -368,15 +368,15 @@ class MercatorNes(Nes): self._lat_bnds = {} self._lat_bnds['data'] = deepcopy(lat_bnds) self.lat_bnds = {} - self.lat_bnds['data'] = lat_bnds[self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], - self.write_axis_limits['x_min']:self.write_axis_limits['x_max'], + self.lat_bnds['data'] = lat_bnds[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max'], :] self._lon_bnds = {} self._lon_bnds['data'] = deepcopy(lon_bnds) self.lon_bnds = {} - self.lon_bnds['data'] = lon_bnds[self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], - self.write_axis_limits['x_min']:self.write_axis_limits['x_max'], + self.lon_bnds['data'] = lon_bnds[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max'], :] return None @@ -436,6 +436,11 @@ class MercatorNes(Nes): def create_shapefile(self): """ Create spatial geodataframe (shapefile). + + Returns + ------- + shapefile : GeoPandasDataFrame + Shapefile dataframe. """ # Get latitude and longitude cell boundaries @@ -448,7 +453,7 @@ class MercatorNes(Nes): aux_b_lons = self.lon_bnds['data'].reshape((self.lon_bnds['data'].shape[0] * self.lon_bnds['data'].shape[1], self.lon_bnds['data'].shape[2])) - # Create dataframe cointaining all polygons + # Get polygons from bounds geometry = [] for i in range(aux_b_lons.shape[0]): geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), @@ -456,13 +461,43 @@ class MercatorNes(Nes): (aux_b_lons[i, 2], aux_b_lats[i, 2]), (aux_b_lons[i, 3], aux_b_lats[i, 3]), (aux_b_lons[i, 0], aux_b_lats[i, 0])])) + + # Create dataframe cointaining all polygons fids = np.arange(self._lat['data'].shape[0] * self._lat['data'].shape[1]) fids = fids.reshape((self._lat['data'].shape[0], self._lat['data'].shape[1])) - fids = fids[self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], - self.write_axis_limits['x_min']:self.write_axis_limits['x_max']] + fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), geometry=geometry, crs="EPSG:4326") self.shapefile = gdf return gdf + + def get_centroids_from_coordinates(self): + """ + Get centroids from geographical coordinates. + + Returns + ------- + centroids_gdf: GeoPandasDataFrame + Centroids dataframe. + """ + + # Get centroids from coordinates + centroids = [] + for lat_ind in range(0, self.lon['data'].shape[0]): + for lon_ind in range(0, self.lon['data'].shape[1]): + centroids.append(Point(self.lon['data'][lat_ind, lon_ind], + self.lat['data'][lat_ind, lon_ind])) + + # Create dataframe cointaining all points + fids = np.arange(self._lat['data'].shape[0] * self._lat['data'].shape[1]) + fids = fids.reshape((self._lat['data'].shape[0], self._lat['data'].shape[1])) + fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] + centroids_gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), + geometry=centroids, + crs="EPSG:4326") + + return centroids_gdf diff --git a/nes/nc_projections/points_nes.py b/nes/nc_projections/points_nes.py index f8426de..1ca8c86 100644 --- a/nes/nc_projections/points_nes.py +++ b/nes/nc_projections/points_nes.py @@ -649,19 +649,41 @@ class PointsNes(Nes): def create_shapefile(self): """ Create spatial geodataframe (shapefile). + + Returns + ------- + shapefile : GeoPandasDataFrame + Shapefile dataframe. """ # Create dataframe cointaining all points - geometry = gpd.points_from_xy(self._lon['data'], self._lat['data']) - fids = np.arange(len(self._lon['data'])) - fids = fids[self.write_axis_limits['x_min']:self.write_axis_limits['x_max']] - gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids), - geometry=geometry, - crs="EPSG:4326") + gdf = self.get_centroids_from_coordinates() self.shapefile = gdf return gdf + def get_centroids_from_coordinates(self): + """ + Get centroids from geographical coordinates. + + Returns + ------- + centroids_gdf: GeoPandasDataFrame + Centroids dataframe. + """ + + # Get centroids from coordinates + centroids = gpd.points_from_xy(self.lon['data'], self.lat['data']) + + # Create dataframe cointaining all points + fids = np.arange(len(self._lon['data'])) + fids = fids[self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] + centroids_gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids), + geometry=centroids, + crs="EPSG:4326") + + return centroids_gdf + def add_variables_to_shapefile(self, var_list, idx_lev=0, idx_time=0): """ Add variables data to shapefile. diff --git a/nes/nc_projections/rotated_nes.py b/nes/nc_projections/rotated_nes.py index a3e07a9..42b6ebe 100644 --- a/nes/nc_projections/rotated_nes.py +++ b/nes/nc_projections/rotated_nes.py @@ -7,7 +7,7 @@ import math from cfunits import Units from copy import deepcopy import geopandas as gpd -from shapely.geometry import Polygon +from shapely.geometry import Polygon, Point from .default_nes import Nes @@ -418,15 +418,15 @@ class RotatedNes(Nes): self._lat_bnds = {} self._lat_bnds['data'] = deepcopy(lat_bnds) self.lat_bnds = {} - self.lat_bnds['data'] = lat_bnds[self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], - self.write_axis_limits['x_min']:self.write_axis_limits['x_max'], + self.lat_bnds['data'] = lat_bnds[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max'], :] self._lon_bnds = {} self._lon_bnds['data'] = deepcopy(lon_bnds) self.lon_bnds = {} - self.lon_bnds['data']= lon_bnds[self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], - self.write_axis_limits['x_min']:self.write_axis_limits['x_max'], + self.lon_bnds['data']= lon_bnds[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max'], :] return None @@ -486,6 +486,11 @@ class RotatedNes(Nes): def create_shapefile(self): """ Create spatial geodataframe (shapefile). + + Returns + ------- + shapefile : GeoPandasDataFrame + Shapefile dataframe. """ if self._lat_bnds is None or self._lon_bnds is None: @@ -496,8 +501,8 @@ class RotatedNes(Nes): self.lat_bnds['data'].shape[2])) aux_b_lons = self.lon_bnds['data'].reshape((self.lon_bnds['data'].shape[0] * self.lon_bnds['data'].shape[1], self.lon_bnds['data'].shape[2])) - - # Create dataframe cointaining all polygons + + # Get polygons from bounds geometry = [] for i in range(aux_b_lons.shape[0]): geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), @@ -505,13 +510,43 @@ class RotatedNes(Nes): (aux_b_lons[i, 2], aux_b_lats[i, 2]), (aux_b_lons[i, 3], aux_b_lats[i, 3]), (aux_b_lons[i, 0], aux_b_lats[i, 0])])) + + # Create dataframe cointaining all polygons fids = np.arange(self._lat['data'].shape[0] * self._lat['data'].shape[1]) fids = fids.reshape((self._lat['data'].shape[0], self._lat['data'].shape[1])) - fids = fids[self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], - self.write_axis_limits['x_min']:self.write_axis_limits['x_max']] + fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), geometry=geometry, crs="EPSG:4326") self.shapefile = gdf return gdf + + def get_centroids_from_coordinates(self): + """ + Get centroids from geographical coordinates. + + Returns + ------- + centroids_gdf: GeoPandasDataFrame + Centroids dataframe. + """ + + # Get centroids from coordinates + centroids = [] + for lat_ind in range(0, self.lon['data'].shape[0]): + for lon_ind in range(0, self.lon['data'].shape[1]): + centroids.append(Point(self.lon['data'][lat_ind, lon_ind], + self.lat['data'][lat_ind, lon_ind])) + + # Create dataframe cointaining all points + fids = np.arange(self._lat['data'].shape[0] * self._lat['data'].shape[1]) + fids = fids.reshape((self._lat['data'].shape[0], self._lat['data'].shape[1])) + fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] + centroids_gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), + geometry=centroids, + crs="EPSG:4326") + + return centroids_gdf diff --git a/tests/run_scallability_tests_nord3v2.sh b/tests/run_scalability_tests_nord3v2.sh similarity index 95% rename from tests/run_scallability_tests_nord3v2.sh rename to tests/run_scalability_tests_nord3v2.sh index b06f89a..79e482e 100644 --- a/tests/run_scallability_tests_nord3v2.sh +++ b/tests/run_scalability_tests_nord3v2.sh @@ -18,7 +18,7 @@ module load mpi4py/3.0.3-foss-2019b-Python-3.7.4 module load filelock/3.7.1-foss-2019b-Python-3.7.4 module load pyproj/2.5.0-foss-2019b-Python-3.7.4 module load eccodes-python/0.9.5-foss-2019b-Python-3.7.4 -module load geopandas/0.8.1-foss-2019b-Python-3.7.4 +module load geopandas/0.10.2-foss-2019b-Python-3.7.4 module load Shapely/1.7.1-foss-2019b-Python-3.7.4 for nprocs in 1 2 4 8 16 -- GitLab From 1dde0d1106dbb60bb529dd2c8c3d35476e990164 Mon Sep 17 00:00:00 2001 From: Alba Vilanova Date: Thu, 9 Feb 2023 06:23:46 +0100 Subject: [PATCH 2/4] Update implementation of geometry area --- nes/methods/cell_measures.py | 57 +++++++++++++----------------- nes/nc_projections/default_nes.py | 33 +++++++++++------ nes/nc_projections/lcc_nes.py | 4 +-- nes/nc_projections/mercator_nes.py | 4 +-- 4 files changed, 50 insertions(+), 48 deletions(-) diff --git a/nes/methods/cell_measures.py b/nes/methods/cell_measures.py index d701cc4..d93000e 100644 --- a/nes/methods/cell_measures.py +++ b/nes/methods/cell_measures.py @@ -76,41 +76,32 @@ def calculate_geometry_area(geometry_list, earth_radius_minor_axis=6356752.3142, Radius of the major axis of the Earth. """ - # Get coordinate bounds from polygons - geometry_corner_lon, geometry_corner_lat = get_bnds_from_polygon(geometry_list) - - # Calculate cell areas - geometry_area = calculate_cell_area(geometry_corner_lon, geometry_corner_lat, - earth_radius_minor_axis=earth_radius_minor_axis, - earth_radius_major_axis=earth_radius_major_axis) + geometry_area = np.empty(shape=(len(geometry_list,))) - return geometry_area - - -def get_bnds_from_polygon(geometry_list): - """ - Get coordinate bounds from of a set of geometries. - - Parameters - ---------- - geometry_list : list - List with polygon geometries. - """ - - if geometry_list is not None: - - # Extract the point values that define the perimeter of the polygon - lon_bnds, lat_bnds = geometry_list[0].exterior.coords.xy - for poly_ind in range(1, len(geometry_list)): - poly_lon_bnds, poly_lat_bnds = geometry_list[poly_ind].exterior.coords.xy - lon_bnds = np.vstack((lon_bnds, np.array(poly_lon_bnds))) - lat_bnds = np.vstack((lat_bnds, np.array(poly_lat_bnds))) - - return lon_bnds, lat_bnds - - else: + for geom_ind in range(0, len(geometry_list)): + + # Calculate the area of each geometry in multipolygon and collection objects + if geometry_list[geom_ind].type in ['MultiPolygon', 'GeometryCollection']: + multi_geom_area = 0 + for multi_geom_ind in range(0, len(geometry_list[geom_ind].geoms)): + if geometry_list[geom_ind].geoms[multi_geom_ind].type == 'Point': + continue + geometry_corner_lon, geometry_corner_lat = geometry_list[geom_ind].geoms[multi_geom_ind].exterior.coords.xy + geometry_corner_lon = np.array(geometry_corner_lon) + geometry_corner_lat = np.array(geometry_corner_lat) + geom_area = mod_huiliers_area(geometry_corner_lon, geometry_corner_lat) + multi_geom_area += geom_area + geometry_area[geom_ind] = multi_geom_area * earth_radius_minor_axis * earth_radius_major_axis - return None + # Calculate the area of each geometry + else: + geometry_corner_lon, geometry_corner_lat = geometry_list[geom_ind].exterior.coords.xy + geometry_corner_lon = np.array(geometry_corner_lon) + geometry_corner_lat = np.array(geometry_corner_lat) + geom_area = mod_huiliers_area(geometry_corner_lon, geometry_corner_lat) + geometry_area[geom_ind] = geom_area * earth_radius_minor_axis * earth_radius_major_axis + + return geometry_area def calculate_cell_area(grid_corner_lon, grid_corner_lat, diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index f1e21e9..7ccb2f0 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -321,6 +321,11 @@ class Nes(object): del self._lon_bnds del self.lon_bnds del self.shapefile + for cell_measure in self.cell_measures.keys(): + if self.cell_measures[cell_measure]['data'] is not None: + del self.cell_measures[cell_measure]['data'] + del self.cell_measures[cell_measure] + del self.cell_measures except AttributeError: pass @@ -2100,8 +2105,8 @@ class Nes(object): zlib=self.zip_lvl > 0, complevel=self.zip_lvl) if self.size > 1: cell_area.set_collective(True) - cell_area[self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], - self.write_axis_limits['x_min']:self.write_axis_limits['x_max']] = self.cell_measures['cell_area']['data'] + cell_area[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] = self.cell_measures['cell_area']['data'] cell_area.long_name = 'area of grid cell' cell_area.standard_name = 'cell_area' @@ -2692,16 +2697,21 @@ class Nes(object): inp += self.shapefile.geometry.index[0] # Calculate intersected areas and fractions - intersection = pd.concat([self.shapefile.geometry[inp].reset_index(), shapefile.geometry[res].reset_index()], + intersection = pd.concat([self.shapefile.geometry[inp].reset_index(), + shapefile.geometry[res].reset_index()], axis=1, ignore_index=True) intersection.columns = (list(self.shapefile.geometry[inp].reset_index().columns) + - list(shapefile.geometry[res].reset_index().rename(columns={'geometry': 'geometry_shapefile', - 'index': 'index_shapefile'}).columns)) - intersection['area'] = intersection.apply(lambda x: x['geometry'].intersection(x['geometry_shapefile']).buffer(0).area, - axis=1) - intersection = intersection.loc[intersection['area'] > 0] - intersection['fraction'] = intersection.apply(lambda x: x['area'] / x['geometry'].area, axis=1) + list(shapefile.geometry[res].reset_index().rename( + columns={'geometry': 'geometry_shapefile', + 'index': 'index_shapefile'}).columns)) + intersection['intersect_geometry'] = intersection.apply(lambda x: x['geometry'].intersection(x['geometry_shapefile']), axis=1) + intersection['intersect_area'] = cell_measures.calculate_geometry_area(intersection['intersect_geometry'].values) + intersection['geometry_area'] = cell_measures.calculate_geometry_area(intersection['geometry'].values) + intersection['fraction'] = intersection['intersect_area'] / intersection['geometry_area'] + intersection = intersection.loc[intersection['fraction'] > 0] + self.comm.Barrier() + # Choose biggest area from intersected areas with multiple options intersection.sort_values('fraction', ascending=False, inplace=True) intersection = intersection.drop_duplicates(subset='FID', keep="first") @@ -2709,7 +2719,8 @@ class Nes(object): # Get data from shapefile del shapefile['geometry'] - self.shapefile.loc[intersection.index, shapefile.columns] = np.array(shapefile.loc[intersection.index_shapefile, :]) + self.shapefile.loc[intersection.index, shapefile.columns] = np.array( + shapefile.loc[intersection.index_shapefile, :]) # Centroids that fall on the shapefile polygons, outside of the shapefile there will be NaN elif method == 'centroid': @@ -2722,7 +2733,7 @@ class Nes(object): centroids_gdf = centroids_gdf.set_crs('EPSG:4326').to_crs('EPSG:4326') # Calculate spatial joint - aux_grid = gpd.sjoin(centroids_gdf, shapefile) + aux_grid = gpd.sjoin(centroids_gdf, shapefile, predicate='within') # Get data from shapes where there are centroids, rest will be NaN del aux_grid['geometry'], aux_grid['index_right'] diff --git a/nes/nc_projections/lcc_nes.py b/nes/nc_projections/lcc_nes.py index bc1fc78..6f107ea 100644 --- a/nes/nc_projections/lcc_nes.py +++ b/nes/nc_projections/lcc_nes.py @@ -246,11 +246,11 @@ class LCCNes(Nes): self._x = {'data': np.linspace(kwargs['x_0'] + (kwargs['inc_x'] / 2), kwargs['x_0'] + (kwargs['inc_x'] / 2) + (kwargs['inc_x'] * (kwargs['nx'] - 1)), kwargs['nx'], - dtype=np.float)} + dtype=np.float64)} self._y = {'data': np.linspace(kwargs['y_0'] + (kwargs['inc_y'] / 2), kwargs['y_0'] + (kwargs['inc_y'] / 2) + (kwargs['inc_y'] * (kwargs['ny'] - 1)), kwargs['ny'], - dtype=np.float)} + dtype=np.float64)} # Create a regular grid in metres (1D to 2D) x = np.array([self._x['data']] * len(self._y['data'])) diff --git a/nes/nc_projections/mercator_nes.py b/nes/nc_projections/mercator_nes.py index 4f831d8..c539bcd 100644 --- a/nes/nc_projections/mercator_nes.py +++ b/nes/nc_projections/mercator_nes.py @@ -242,13 +242,13 @@ class MercatorNes(Nes): self._x = {'data': np.linspace(kwargs['x_0'] + (kwargs['inc_x'] / 2), kwargs['x_0'] + (kwargs['inc_x'] / 2) + (kwargs['inc_x'] * (kwargs['nx'] - 1)), kwargs['nx'], - dtype=np.float)} + dtype=np.float64)} self._y = {'data': np.linspace(kwargs['y_0'] + (kwargs['inc_y'] / 2), kwargs['y_0'] + (kwargs['inc_y'] / 2) + (kwargs['inc_y'] * (kwargs['ny'] - 1)), kwargs['ny'], - dtype=np.float)} + dtype=np.float64)} # Create a regular grid in metres (1D to 2D) x = np.array([self._x['data']] * len(self._y['data'])) -- GitLab From dc09f9d860e610a86efea85297ee4e59c9ffff66 Mon Sep 17 00:00:00 2001 From: ctena Date: Fri, 10 Feb 2023 16:08:05 +0100 Subject: [PATCH 3/4] Spatial joint intersection working in serial --- nes/nc_projections/default_nes.py | 137 +++++++++++++++++------------- 1 file changed, 78 insertions(+), 59 deletions(-) diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 7ccb2f0..e9338a2 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -23,6 +23,7 @@ from ..methods import cell_measures from ..nes_formats import to_netcdf_cams_ra from ..nc_projections import * + class Nes(object): """ @@ -2106,7 +2107,8 @@ class Nes(object): if self.size > 1: cell_area.set_collective(True) cell_area[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], - self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] = self.cell_measures['cell_area']['data'] + self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] = self.cell_measures[ + 'cell_area']['data'] cell_area.long_name = 'area of grid cell' cell_area.standard_name = 'cell_area' @@ -2130,7 +2132,8 @@ class Nes(object): for i, (var_name, var_dict) in enumerate(self.variables.items()): if var_dict['data'] is not None: if self.info: - print("Rank {0:03d}: Writing {1} var ({2}/{3})".format(self.rank, var_name, i + 1, len(self.variables))) + print("Rank {0:03d}: Writing {1} var ({2}/{3})".format( + self.rank, var_name, i + 1, len(self.variables))) try: if not chunking: var = netcdf.createVariable(var_name, var_dict['data'].dtype, ('time', 'lev',) + self._var_dim, @@ -2144,7 +2147,8 @@ class Nes(object): chunk_size = None chunk_size = self.comm.bcast(chunk_size, root=0) var = netcdf.createVariable(var_name, var_dict['data'].dtype, ('time', 'lev',) + self._var_dim, - zlib=self.zip_lvl > 0, complevel=self.zip_lvl, chunksizes=chunk_size) + zlib=self.zip_lvl > 0, complevel=self.zip_lvl, + chunksizes=chunk_size) if self.info: print("Rank {0:03d}: Var {1} created ({2}/{3})".format( self.rank, var_name, i + 1, len(self.variables))) @@ -2584,7 +2588,8 @@ class Nes(object): if self.variables[var_name]['data'] is None: unloaded_vars.append(var_name) if len(unloaded_vars) > 0: - raise ValueError('The variables {0} need to be loaded/created before using to_shapefile.'.format(unloaded_vars)) + raise ValueError('The variables {0} need to be loaded/created before using to_shapefile.'.format( + unloaded_vars)) # Select first vertical level (if needed) if lev is None: @@ -2634,106 +2639,118 @@ class Nes(object): return None - def spatial_join(self, shapefile, method=None, var_list=None): + def spatial_join(self, ext_shp, method=None, var_list=None, info=False): """ Compute overlay intersection of two GeoPandasDataFrames. Parameters ---------- - shapefile : GeoPandasDataFrame, str + ext_shp : GeoPandasDataFrame, str File or path from where the data will be obtained on the intersection. method : str Overlay method. Accepted values: ['nearest', 'intersection', 'centroid']. var_list : list, None Variables that will be included in the resulting shapefile. """ - - if isinstance(shapefile, str): - shapefile = gpd.read_file(shapefile) + if isinstance(ext_shp, str): + ext_shp = gpd.read_file(ext_shp) # Create shapefile if it does not exist if self.shapefile is None: msg = 'Shapefile does not exist. It will be created now.' warnings.warn(msg) - self.create_shapefile() + grid_shp = self.create_shapefile() + else: + grid_shp = self.shapefile + grid_shp = copy(grid_shp) # Get variables of interest from shapefile if var_list is not None: if isinstance(var_list, str): var_list = [var_list] - self.shapefile = self.shapefile.loc[:, var_list + ['geometry']] + ext_shp = ext_shp.loc[:, var_list + ['geometry']] + else: + var_list = ext_shp.columns.to_list() + var_list.remove('geometry') + + ext_shp = ext_shp.to_crs(grid_shp.crs) # Nearest centroids to the shapefile polygons if method == 'nearest': - + if self.master and info: + print("Nearest spatial joint") # Get centroids centroids_gdf = self.get_centroids_from_coordinates() # From geodetic coordinates (e.g. 4326) to meters (e.g. 4328) to use sjoin_nearest # TODO: Check if the projection 4328 does not distort the coordinates too much # https://gis.stackexchange.com/questions/372564/userwarning-when-trying-to-get-centroid-from-a-polygon-geopandas - shapefile = shapefile.set_crs('EPSG:4326').to_crs('EPSG:4328') - centroids_gdf = centroids_gdf.set_crs('EPSG:4326').to_crs('EPSG:4328') + ext_shp = ext_shp.to_crs('EPSG:4328') + centroids_gdf = centroids_gdf.to_crs('EPSG:4328') # Calculate spatial joint by distance - aux_grid = gpd.sjoin_nearest(centroids_gdf, shapefile, distance_col='distance') - - # Change projection back into geodetic coordinates - aux_grid = aux_grid.to_crs('EPSG:4326') + aux_grid = gpd.sjoin_nearest(centroids_gdf, ext_shp, distance_col='distance') # Get data from closest shapes to centroids del aux_grid['geometry'], aux_grid['index_right'] self.shapefile.loc[aux_grid.index, aux_grid.columns] = aux_grid - # Intersect the areas of the shapefile polygons, outside of the shapefile there will be NaN + # Intersect the areas of the shapefile polygons, outside the shapefile there will be NaN elif method == 'intersection': - + if self.master and info: + print("Intersection spatial joint") + grid_shp['FID_grid'] = grid_shp.index + grid_shp = grid_shp.reset_index() # Get intersected areas - inp, res = shapefile.sindex.query_bulk(self.shapefile.geometry, predicate='intersects') - print('Rank {0:03d}: {1} intersected areas found'.format(self.rank, len(inp))) - - # The function query_bulk does not use FID, but normal indices always starting at 0 - # Therefore the indices of the shapefile need to be corrected by adding the first FID index - inp += self.shapefile.geometry.index[0] + # inp, res = shapefile.sindex.query_bulk(self.shapefile.geometry, predicate='intersects') + inp, res = grid_shp.sindex.query_bulk(ext_shp.geometry, predicate='intersects') - # Calculate intersected areas and fractions - intersection = pd.concat([self.shapefile.geometry[inp].reset_index(), - shapefile.geometry[res].reset_index()], - axis=1, ignore_index=True) - intersection.columns = (list(self.shapefile.geometry[inp].reset_index().columns) + - list(shapefile.geometry[res].reset_index().rename( - columns={'geometry': 'geometry_shapefile', - 'index': 'index_shapefile'}).columns)) - intersection['intersect_geometry'] = intersection.apply(lambda x: x['geometry'].intersection(x['geometry_shapefile']), axis=1) - intersection['intersect_area'] = cell_measures.calculate_geometry_area(intersection['intersect_geometry'].values) - intersection['geometry_area'] = cell_measures.calculate_geometry_area(intersection['geometry'].values) - intersection['fraction'] = intersection['intersect_area'] / intersection['geometry_area'] - intersection = intersection.loc[intersection['fraction'] > 0] - - self.comm.Barrier() + if self.master and info: + print('Rank {0:03d}: {1} intersected areas found'.format(self.rank, len(inp))) - # Choose biggest area from intersected areas with multiple options - intersection.sort_values('fraction', ascending=False, inplace=True) + # Calculate intersected areas and fractions + intersection = pd.DataFrame() + intersection['FID'] = np.array(grid_shp.loc[res, 'FID_grid'], dtype=np.uint32) + intersection['ext_shp_id'] = np.array(inp, dtype=np.uint32) + + # intersection['geometry_ext'] = ext_shp.loc[inp, 'geometry'].values + # intersection['geometry_grid'] = grid_shp.loc[res, 'geometry'].values + if True: + # No Warnings Zone + counts = intersection['FID'].value_counts() + warnings.filterwarnings('ignore') + + intersection.loc[:, 'weight'] = 1. + for i, row in intersection.iterrows(): + if i % 1000 == 0 and self.master and info: + print('\tRank {0:03d}: {1:.3f} %'.format(self.rank, i*100 / len(intersection))) + # Filter to do not calculate percentages over 100% grid cells spatial joint + if counts[row['FID']] > 1: + intersection.loc[i, 'weight'] = grid_shp.loc[res[i], 'geometry'].intersection( + ext_shp.loc[inp[i], 'geometry']).area / grid_shp.loc[res[i], 'geometry'].area + # intersection['intersect_area'] = intersection.apply( + # lambda x: x['geometry_grid'].intersection(x['geometry_ext']).area, axis=1) + intersection.drop(intersection[intersection['weight'] <= 0].index, inplace=True) + + warnings.filterwarnings('default') + + # Choose the biggest area from intersected areas with multiple options + intersection.sort_values('weight', ascending=False, inplace=True) intersection = intersection.drop_duplicates(subset='FID', keep="first") intersection = intersection.sort_values('FID').set_index('FID') - # Get data from shapefile - del shapefile['geometry'] - self.shapefile.loc[intersection.index, shapefile.columns] = np.array( - shapefile.loc[intersection.index_shapefile, :]) + for var_name in var_list: + self.shapefile.loc[intersection.index, var_name] = np.array( + ext_shp.loc[intersection['ext_shp_id'], var_name]) - # Centroids that fall on the shapefile polygons, outside of the shapefile there will be NaN + # Centroids that fall on the shapefile polygons, outside the shapefile there will be NaN elif method == 'centroid': # Get centroids centroids_gdf = self.get_centroids_from_coordinates() - - # Make sure both datasets are in geodetic coordinates (e.g. 4326) - shapefile = shapefile.set_crs('EPSG:4326').to_crs('EPSG:4326') - centroids_gdf = centroids_gdf.set_crs('EPSG:4326').to_crs('EPSG:4326') # Calculate spatial joint - aux_grid = gpd.sjoin(centroids_gdf, shapefile, predicate='within') + aux_grid = gpd.sjoin(centroids_gdf, ext_shp, predicate='within') # Get data from shapes where there are centroids, rest will be NaN del aux_grid['geometry'], aux_grid['index_right'] @@ -2832,7 +2849,8 @@ class Nes(object): data_list[var_name]['data'] = np.concatenate(data_aux, axis=axis) except Exception as e: print("**ERROR** an error has occurred while gathering the '{0}' variable.\n".format(var_name)) - sys.stderr.write("**ERROR** an error has occurred while gathering the '{0}' variable.\n".format(var_name)) + sys.stderr.write("**ERROR** an error has occurred while gathering the '{0}' variable.\n".format( + var_name)) print(e) sys.stderr.write(str(e)) # print(e, file=sys.stderr) @@ -2917,7 +2935,8 @@ class Nes(object): data_list[var_name]['data'] = np.concatenate(recvbuf, axis=axis) except Exception as e: print("**ERROR** an error has occurred while gathering the '{0}' variable.\n".format(var_name)) - sys.stderr.write("**ERROR** an error has occurred while gathering the '{0}' variable.\n".format(var_name)) + sys.stderr.write("**ERROR** an error has occurred while gathering the '{0}' variable.\n".format( + var_name)) print(e) sys.stderr.write(str(e)) # print(e, file=sys.stderr) @@ -2930,8 +2949,8 @@ class Nes(object): # ================================================================================================================== # Extra Methods # ================================================================================================================== - - def lon_lat_to_cartesian_ecef(self, lon, lat): + @staticmethod + def lon_lat_to_cartesian_ecef(lon, lat): """ # Convert observational/model geographic longitude/latitude coordinates to cartesian ECEF (Earth Centred, # Earth Fixed) coordinates, assuming WGS84 datum and ellipsoid, and that all heights = 0. @@ -2972,7 +2991,7 @@ class Nes(object): def interpolate_vertical(self, new_levels, new_src_vertical=None, kind='linear', extrapolate=None, info=None): """ - Vertical methods method. + Vertical interpolation function. Parameters ---------- @@ -3066,4 +3085,4 @@ class Nes(object): # WGS84 with radius defined in Cartopy source code earth_radius_dict = {'WGS84': [6356752.3142, 6378137.0]} - return earth_radius_dict[ellps] \ No newline at end of file + return earth_radius_dict[ellps] -- GitLab From 56a77d4227386c683c76af863710a768cb333c73 Mon Sep 17 00:00:00 2001 From: ctena Date: Fri, 10 Feb 2023 16:46:41 +0100 Subject: [PATCH 4/4] Spatial joint intersection tests working well --- tests/2.1-test_spatial_join.py | 262 ++++++++++++++++----------------- 1 file changed, 131 insertions(+), 131 deletions(-) diff --git a/tests/2.1-test_spatial_join.py b/tests/2.1-test_spatial_join.py index 845a05c..2df1e07 100644 --- a/tests/2.1-test_spatial_join.py +++ b/tests/2.1-test_spatial_join.py @@ -16,145 +16,145 @@ original_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_20221 parallel_method = 'Y' -result_path = "Times_test_2-1_spatial_join_{0}_{1:03d}.csv".format(parallel_method, size) +result_path = "Times_test_2.1_spatial_join_{0}_{1:03d}.csv".format(parallel_method, size) result = pd.DataFrame(index=['read', 'calculate', 'write'], - columns=['2-1.1.Existing_file_centroid', '2-1.2.New_file_centroid', - '2-1.2.Existing_file_nearest', '2-1.4.New_file_nearest', - '2-1.5.Existing_file_intersection', '2-1.6.New_file_intersection']) + columns=['2.1.1.Existing_file_centroid', '2.1.2.New_file_centroid', + '2.1.3.Existing_file_nearest', '2.1.4.New_file_nearest', + '2.1.5.Existing_file_intersection', '2.1.6.New_file_intersection']) # ===== PATH TO MASK ===== # shapefile_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/timezones_2021c/timezones_2021c.shp' -# # ====================================================================================================================== -# # =================================== CENTROID EXISTING FILE =================================================== -# # ====================================================================================================================== - -# test_name = '2-1.1.Existing_file_centroid' -# print(test_name) -# -# # READ -# st_time = timeit.default_timer() -# nessy = open_netcdf(original_path, parallel_method=parallel_method) -# -# comm.Barrier() -# result.loc['read', test_name] = timeit.default_timer() - st_time -# -# # SPATIAL JOIN -# st_time = timeit.default_timer() -# # Method can be centroid, nearest and intersection -# nessy.spatial_join(shapefile_path, method='centroid') -# comm.Barrier() -# result.loc['calculate', test_name] = timeit.default_timer() - st_time -# print('SPATIAL JOIN - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) -# -# comm.Barrier() -# if rank == 0: -# print(result.loc[:, test_name]) -# sys.stdout.flush() -# -# # ====================================================================================================================== -# # =================================== CENTROID FROM NEW FILE =================================================== -# # ====================================================================================================================== - -# test_name = '2-1.2.New_file_centroid' -# print(test_name) -# -# # CREATE -# st_time = timeit.default_timer() -# # Define projection -# projection = 'regular' -# lat_orig = 41.1 -# lon_orig = 1.8 -# inc_lat = 0.2 -# inc_lon = 0.2 -# n_lat = 100 -# n_lon = 100 -# -# # SPATIAL JOIN -# # Method can be centroid, nearest and intersection -# nessy = from_shapefile(shapefile_path, method='centroid', projection=projection, -# lat_orig=lat_orig, lon_orig=lon_orig, -# inc_lat=inc_lat, inc_lon=inc_lon, -# n_lat=n_lat, n_lon=n_lon) -# comm.Barrier() -# result.loc['calculate', test_name] = timeit.default_timer() - st_time -# -# print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) -# -# comm.Barrier() -# if rank == 0: -# print(result.loc[:, test_name]) -# sys.stdout.flush() -# -# # ====================================================================================================================== -# # =================================== NEAREST EXISTING FILE =================================================== -# # ====================================================================================================================== - -# test_name = '2-1.3.Existing_file_nearest' -# print(test_name) -# -# # READ -# st_time = timeit.default_timer() -# nessy = open_netcdf(original_path, parallel_method=parallel_method) -# -# comm.Barrier() -# result.loc['read', test_name] = timeit.default_timer() - st_time -# -# # SPATIAL JOIN -# st_time = timeit.default_timer() -# # Method can be centroid, nearest and intersection -# nessy.spatial_join(shapefile_path, method='nearest') -# comm.Barrier() -# result.loc['calculate', test_name] = timeit.default_timer() - st_time -# print('SPATIAL JOIN - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) -# -# comm.Barrier() -# if rank == 0: -# print(result.loc[:, test_name]) -# sys.stdout.flush() -# -# # ====================================================================================================================== -# # =================================== NEAREST FROM NEW FILE =================================================== -# # ====================================================================================================================== - -# test_name = '2-1.4.New_file_nearest' -# print(test_name) -# -# # CREATE -# st_time = timeit.default_timer() -# # Define projection -# projection = 'regular' -# lat_orig = 41.1 -# lon_orig = 1.8 -# inc_lat = 0.2 -# inc_lon = 0.2 -# n_lat = 100 -# n_lon = 100 -# -# # SPATIAL JOIN -# # Method can be centroid, nearest and intersection -# nessy = from_shapefile(shapefile_path, method='nearest', projection=projection, -# lat_orig=lat_orig, lon_orig=lon_orig, -# inc_lat=inc_lat, inc_lon=inc_lon, -# n_lat=n_lat, n_lon=n_lon) -# comm.Barrier() -# result.loc['calculate', test_name] = timeit.default_timer() - st_time -# -# print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) -# -# comm.Barrier() -# if rank == 0: -# print(result.loc[:, test_name]) -# sys.stdout.flush() +# ====================================================================================================================== +# =================================== CENTROID EXISTING FILE =================================================== +# ====================================================================================================================== + +test_name = '2.1.1.Existing_file_centroid' +if rank == 0: + print(test_name) +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(original_path, parallel_method=parallel_method) +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# SPATIAL JOIN +st_time = timeit.default_timer() +# Method can be centroid, nearest and intersection +nessy.spatial_join(shapefile_path, method='centroid') +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('SPATIAL JOIN - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() # ====================================================================================================================== -# =================================== INTERSECTION EXISTING FILE =================================================== +# =================================== CENTROID FROM NEW FILE =================================================== # ====================================================================================================================== -test_name = '2-1.5.Existing_file_intersection' -print(test_name) +test_name = '2.1.2.New_file_centroid' +if rank == 0: + print(test_name) +# CREATE +st_time = timeit.default_timer() +# Define projection +projection = 'regular' +lat_orig = 41.1 +lon_orig = 1.8 +inc_lat = 0.2 +inc_lon = 0.2 +n_lat = 100 +n_lon = 100 + +# SPATIAL JOIN +# Method can be centroid, nearest and intersection +nessy = from_shapefile(shapefile_path, method='centroid', projection=projection, + lat_orig=lat_orig, lon_orig=lon_orig, + inc_lat=inc_lat, inc_lon=inc_lon, + n_lat=n_lat, n_lon=n_lon) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# =================================== NEAREST EXISTING FILE =================================================== +# ====================================================================================================================== + +test_name = '2.1.3.Existing_file_nearest' +if rank == 0: + print(test_name) +# READ +st_time = timeit.default_timer() +nessy = open_netcdf(original_path, parallel_method=parallel_method) + +comm.Barrier() +result.loc['read', test_name] = timeit.default_timer() - st_time + +# SPATIAL JOIN +st_time = timeit.default_timer() +# Method can be centroid, nearest and intersection +nessy.spatial_join(shapefile_path, method='nearest') +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('SPATIAL JOIN - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + +# ====================================================================================================================== +# =================================== NEAREST FROM NEW FILE =================================================== +# ====================================================================================================================== + +test_name = '2.1.4.New_file_nearest' +if rank == 0: + print(test_name) +# CREATE +st_time = timeit.default_timer() +# Define projection +projection = 'regular' +lat_orig = 41.1 +lon_orig = 1.8 +inc_lat = 0.2 +inc_lon = 0.2 +n_lat = 100 +n_lon = 100 + +# SPATIAL JOIN +# Method can be centroid, nearest and intersection +nessy = from_shapefile(shapefile_path, method='nearest', projection=projection, + lat_orig=lat_orig, lon_orig=lon_orig, + inc_lat=inc_lat, inc_lon=inc_lon, + n_lat=n_lat, n_lon=n_lon) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time + +print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() + + +# ====================================================================================================================== +# =================================== INTERSECTION EXISTING FILE =================================================== +# ====================================================================================================================== + +test_name = '2.1.5.Existing_file_intersection' +if rank == 0: + print(test_name) # READ st_time = timeit.default_timer() nessy = open_netcdf(original_path, parallel_method=parallel_method) @@ -180,8 +180,9 @@ sys.stdout.flush() # =================================== INTERSECTION FROM NEW FILE =================================================== # ====================================================================================================================== -test_name = '2-1.6.New_file_intersection' -print(test_name) +test_name = '2.1.6.New_file_intersection' +if rank == 0: + print(test_name) # DEFINE PROJECTION st_time = timeit.default_timer() @@ -211,4 +212,3 @@ sys.stdout.flush() if rank == 0: result.to_csv(result_path) - -- GitLab