Source code for nes.methods.spatial_join

#!/usr/bin/env python

import sys
import warnings
import geopandas as gpd
from geopandas import GeoDataFrame
import nes
import numpy as np
import pandas as pd
from shapely.geos import TopologicalError


[docs] def spatial_join(self, ext_shp, method=None, var_list=None, info=False, apply_bbox=True): """ Compute overlay intersection of two GeoPandasDataFrames. Parameters ---------- self : nes.Nes Nes Object. ext_shp : GeoPandasDataFrame or str File or path from where the data will be obtained on the intersection. method : str Overlay method. Accepted values: ['nearest', 'intersection', 'centroid']. var_list : List or None or str Variables that will be included in the resulting shapefile. info : bool Indicates if you want to print the process info. apply_bbox : bool Indicates if you want to reduce the shapefile to a bbox. """ if self.master and info: print("Starting spatial join") if isinstance(var_list, str): # Transforming string (variable name) to a list with length 0 var_list = [var_list] # Create source shapefile if it does not exist if self.shapefile is None: if self.master and info: print("\tCreating shapefile") sys.stdout.flush() self.create_shapefile() ext_shp = prepare_external_shapefile(self, ext_shp=ext_shp, var_list=var_list, info=info, apply_bbox=apply_bbox) if method == 'nearest': # Nearest centroids to the shapefile polygons spatial_join_nearest(self, ext_shp=ext_shp, info=info) elif method == 'intersection': # Intersect the areas of the shapefile polygons, outside the shapefile there will be NaN spatial_join_intersection(self, ext_shp=ext_shp, info=info) elif method == 'centroid': # Centroids that fall on the shapefile polygons, outside the shapefile there will be NaN spatial_join_centroid(self, ext_shp=ext_shp, info=info) else: accepted_values = ['nearest', 'intersection', 'centroid'] raise NotImplementedError('{0} is not implemented. Choose from: {1}'.format(method, accepted_values)) return None
[docs] def prepare_external_shapefile(self, ext_shp, var_list, info=False, apply_bbox=True): """ Prepare the external shapefile. It is high recommended to pass ext_shp parameter as string because it will clip the external shapefile to the rank. 1. Read if it is not already read 2. Filter variables list 3. Standardize projections Parameters ---------- self : nes.Nes Nes Object. ext_shp : GeoDataFrame or str External shapefile or path to it. var_list : List[str] or None External shapefile variables to be computed. info : bool Indicates if you want to print the information. apply_bbox : bool Indicates if you want to reduce the shapefile to a bbox. Returns ------- GeoDataFrame External shapefile. """ if isinstance(ext_shp, str): # Reading external shapefile if self.master and info: print("\tReading external shapefile") # ext_shp = gpd.read_file(ext_shp, include_fields=var_list, mask=self.shapefile.geometry) if apply_bbox: ext_shp = gpd.read_file(ext_shp, include_fields=var_list, bbox=get_bbox(self)) else: ext_shp = gpd.read_file(ext_shp, include_fields=var_list) else: msg = "WARNING!!! " msg += "External shapefile already read. If you pass the path to the shapefile instead of the opened shapefile " msg += "a best usage of memory is performed because the external shape will be clipped while reading." warnings.warn(msg) sys.stderr.flush() ext_shp.reset_index(inplace=True) if var_list is not None: ext_shp = ext_shp.loc[:, var_list + ['geometry']] self.comm.Barrier() if self.master and info: print("\t\tReading external shapefile done!") # Standardizing projection ext_shp = ext_shp.to_crs(self.shapefile.crs) return ext_shp
[docs] def get_bbox(self): """ Obtain the bounding box of the rank data (lon_min, lat_min, lon_max, lat_max). Parameters ---------- self : nes.Nes Nes Object. Returns ------- tuple Bounding box """ bbox = (self.lon_bnds['data'].min(), self.lat_bnds['data'].min(), self.lon_bnds['data'].max(), self.lat_bnds['data'].max(), ) return bbox
[docs] def spatial_join_nearest(self, ext_shp, info=False): """ Perform the spatial join using the nearest method. Parameters ---------- self : nes.Nes Nes Object. ext_shp : GeoDataFrame External shapefile. info : bool Indicates if you want to print the information. """ if self.master and info: print("\tNearest spatial joint") sys.stdout.flush() grid_shp = self.get_centroids_from_coordinates() # From geodetic coordinates (e.g. 4326) to meters (e.g. 4328) to use sjoin_nearest # TODO: Check if the projection 4328 does not distort the coordinates too much # https://gis.stackexchange.com/questions/372564/ # userwarning-when-trying-to-get-centroid-from-a-polygon-geopandas # ext_shp = ext_shp.to_crs('EPSG:4328') # grid_shp = grid_shp.to_crs('EPSG:4328') # Calculate spatial joint by distance aux_grid = gpd.sjoin_nearest(grid_shp, ext_shp, distance_col='distance') # Get data from closest shapes to centroids del aux_grid['geometry'], aux_grid['index_right'] self.shapefile.loc[aux_grid.index, aux_grid.columns] = aux_grid var_list = list(ext_shp.columns) var_list.remove('geometry') for var_name in var_list: self.shapefile.loc[:, var_name] = np.array(self.shapefile.loc[:, var_name], dtype=ext_shp[var_name].dtype) return None
[docs] def spatial_join_centroid(self, ext_shp, info=False): """ Perform the spatial join using the centroid method. Parameters ---------- self : nes.Nes Nes Object. ext_shp : GeoDataFrame External shapefile. info : bool Indicates if you want to print the information. """ if self.master and info: print("\tCentroid spatial join") sys.stdout.flush() if info and self.master: print("\t\tCalculating centroids") sys.stdout.flush() # Get centroids grid_shp = self.get_centroids_from_coordinates() # Calculate spatial joint if info and self.master: print("\t\tCalculating centroid spatial join") sys.stdout.flush() aux_grid = gpd.sjoin(grid_shp, ext_shp, predicate='within') # Get data from shapes where there are centroids, rest will be NaN del aux_grid['geometry'], aux_grid['index_right'] self.shapefile.loc[aux_grid.index, aux_grid.columns] = aux_grid var_list = list(ext_shp.columns) var_list.remove('geometry') for var_name in var_list: self.shapefile.loc[:, var_name] = np.array(self.shapefile.loc[:, var_name], dtype=ext_shp[var_name].dtype) return None
[docs] def spatial_join_intersection(self, ext_shp, info=False): """ Perform the spatial join using the intersection method. Parameters ---------- self : nes.Nes Nes Object. ext_shp : GeoDataFrame External shapefile. info : bool Indicates if you want to print the information. """ var_list = list(ext_shp.columns) var_list.remove('geometry') grid_shp = self.shapefile grid_shp['FID_grid'] = grid_shp.index grid_shp = grid_shp.reset_index() # Get intersected areas # inp, res = ext_shp.sindex.query_bulk(grid_shp.geometry, predicate='intersects') inp, res = grid_shp.sindex.query_bulk(ext_shp.geometry, predicate='intersects') if info: print('\t\tRank {0:03d}: {1} intersected areas found'.format(self.rank, len(inp))) sys.stdout.flush() # Calculate intersected areas and fractions intersection = pd.DataFrame(columns=['FID', 'ext_shp_id', 'weight']) intersection['FID'] = np.array(grid_shp.loc[res, 'FID_grid'], dtype=np.uint32) intersection['ext_shp_id'] = np.array(inp, dtype=np.uint32) if len(intersection) > 0: if True: # No Warnings Zone counts = intersection['FID'].value_counts() warnings.filterwarnings('ignore') intersection.loc[:, 'weight'] = 1. for i, row in intersection.iterrows(): if isinstance(i, int) and i % 1000 == 0 and info: print('\t\t\tRank {0:03d}: {1:.3f} %'.format(self.rank, i * 100 / len(intersection))) sys.stdout.flush() # Filter to do not calculate percentages over 100% grid cells spatial joint if counts[row['FID']] > 1: try: intersection.loc[i, 'weight'] = grid_shp.loc[res[i], 'geometry'].intersection( ext_shp.loc[inp[i], 'geometry']).area / grid_shp.loc[res[i], 'geometry'].area except TopologicalError: # If for some reason the geometry is corrupted it should work with the buffer function ext_shp.loc[[inp[i]], 'geometry'] = ext_shp.loc[[inp[i]], 'geometry'].buffer(0) intersection.loc[i, 'weight'] = grid_shp.loc[res[i], 'geometry'].intersection( ext_shp.loc[inp[i], 'geometry']).area / grid_shp.loc[res[i], 'geometry'].area # intersection['intersect_area'] = intersection.apply( # lambda x: x['geometry_grid'].intersection(x['geometry_ext']).area, axis=1) intersection.drop(intersection[intersection['weight'] <= 0].index, inplace=True) warnings.filterwarnings('default') # Choose the biggest area from intersected areas with multiple options intersection.sort_values('weight', ascending=False, inplace=True) intersection = intersection.drop_duplicates(subset='FID', keep="first") intersection = intersection.sort_values('FID').set_index('FID') for var_name in var_list: self.shapefile.loc[intersection.index, var_name] = np.array( ext_shp.loc[intersection['ext_shp_id'], var_name]) else: for var_name in var_list: self.shapefile.loc[:, var_name] = np.nan for var_name in var_list: self.shapefile.loc[:, var_name] = np.array(self.shapefile.loc[:, var_name], dtype=ext_shp[var_name].dtype) return None