Source code for nes.methods.spatial_join

#!/usr/bin/env python

import sys
from warnings import warn, filterwarnings
from geopandas import sjoin_nearest, sjoin, read_file
from pandas import DataFrame
from numpy import array, uint32, nan
from shapely.errors import TopologicalError


[docs] def spatial_join(self, ext_shp, method=None, var_list=None, info=False, apply_bbox=True): """ Compute overlay intersection of two GeoPandasDataFrames. Parameters ---------- self : nes.Nes A Nes Object. ext_shp : GeoPandasDataFrame or str File or path from where the data will be obtained on the intersection. method : str Overlay method. Accepted values: ["nearest", "intersection", "centroid"]. var_list : List or None or str Variables that will be included in the resulting shapefile. info : bool Indicates if you want to print the process info. apply_bbox : bool Indicates if you want to reduce the shapefile to a bbox. """ if self.master and info: print("Starting spatial join") if isinstance(var_list, str): # Transforming string (variable name) to a list with length 0 var_list = [var_list] # Create source shapefile if it does not exist if self.shapefile is None: if self.master and info: print("\tCreating shapefile") sys.stdout.flush() self.create_shapefile() ext_shp = __prepare_external_shapefile(self, ext_shp=ext_shp, var_list=var_list, info=info, apply_bbox=apply_bbox) if method == "nearest": # Nearest centroids to the shapefile polygons __spatial_join_nearest(self, ext_shp=ext_shp, info=info) elif method == "intersection": # Intersect the areas of the shapefile polygons, outside the shapefile there will be NaN __spatial_join_intersection(self, ext_shp=ext_shp, info=info) elif method == "centroid": # Centroids that fall on the shapefile polygons, outside the shapefile there will be NaN __spatial_join_centroid(self, ext_shp=ext_shp, info=info) else: accepted_values = ["nearest", "intersection", "centroid"] raise NotImplementedError("{0} is not implemented. Choose from: {1}".format(method, accepted_values)) return None
def __prepare_external_shapefile(self, ext_shp, var_list, info=False, apply_bbox=True): """ Prepare the external shapefile. It is high recommended to pass ext_shp parameter as string because it will clip the external shapefile to the rank. 1. Read if it is not already read 2. Filter variables list 3. Standardize projections Parameters ---------- self : nes.Nes A Nes Object. ext_shp : geopandas.GeoDataFrame or str External shapefile or path to it. var_list : List[str] or None External shapefile variables to be computed. info : bool Indicates if you want to print the information. apply_bbox : bool Indicates if you want to reduce the shapefile to a bbox. Returns ------- GeoDataFrame External shapefile. """ if isinstance(ext_shp, str): # Reading external shapefile if self.master and info: print("\tReading external shapefile") # ext_shp = read_file(ext_shp, include_fields=var_list, mask=self.shapefile.geometry) if apply_bbox: ext_shp = read_file(ext_shp, include_fields=var_list, bbox=__get_bbox(self)) else: ext_shp = read_file(ext_shp, include_fields=var_list) else: msg = "WARNING!!! " msg += "External shapefile already read. If you pass the path to the shapefile instead of the opened shapefile " msg += "a best usage of memory is performed because the external shape will be clipped while reading." warn(msg) sys.stderr.flush() ext_shp.reset_index(inplace=True) if var_list is not None: ext_shp = ext_shp.loc[:, var_list + ["geometry"]] self.comm.Barrier() if self.master and info: print("\t\tReading external shapefile done!") # Standardizing projection ext_shp = ext_shp.to_crs(self.shapefile.crs) return ext_shp def __get_bbox(self): """ Obtain the bounding box of the rank data (lon_min, lat_min, lon_max, lat_max). Parameters ---------- self : nes.Nes A Nes Object. Returns ------- tuple Bounding box """ bbox = (self.lon_bnds["data"].min(), self.lat_bnds["data"].min(), self.lon_bnds["data"].max(), self.lat_bnds["data"].max(), ) return bbox # noinspection DuplicatedCode def __spatial_join_nearest(self, ext_shp, info=False): """ Perform the spatial join using the nearest method. Parameters ---------- self : nes.Nes A Nes Object. ext_shp : GeoDataFrame External shapefile. info : bool Indicates if you want to print the information. """ if self.master and info: print("\tNearest spatial joint") sys.stdout.flush() grid_shp = self.get_centroids_from_coordinates() # From geodetic coordinates (e.g. 4326) to meters (e.g. 4328) to use sjoin_nearest # TODO: Check if the projection 4328 does not distort the coordinates too much # https://gis.stackexchange.com/questions/372564/ # userwarning-when-trying-to-get-centroid-from-a-polygon-geopandas # ext_shp = ext_shp.to_crs("EPSG:4328") # grid_shp = grid_shp.to_crs("EPSG:4328") # Calculate spatial joint by distance aux_grid = sjoin_nearest(grid_shp, ext_shp, distance_col="distance") # Get data from closest shapes to centroids del aux_grid["geometry"], aux_grid["index_right"] self.shapefile.loc[aux_grid.index, aux_grid.columns] = aux_grid var_list = list(ext_shp.columns) var_list.remove("geometry") for var_name in var_list: self.shapefile.loc[:, var_name] = array(self.shapefile.loc[:, var_name], dtype=ext_shp[var_name].dtype) return None # noinspection DuplicatedCode def __spatial_join_centroid(self, ext_shp, info=False): """ Perform the spatial join using the centroid method. Parameters ---------- self : nes.Nes A Nes Object. ext_shp : GeoDataFrame External shapefile. info : bool Indicates if you want to print the information. """ if self.master and info: print("\tCentroid spatial join") sys.stdout.flush() if info and self.master: print("\t\tCalculating centroids") sys.stdout.flush() # Get centroids grid_shp = self.get_centroids_from_coordinates() # Calculate spatial joint if info and self.master: print("\t\tCalculating centroid spatial join") sys.stdout.flush() aux_grid = sjoin(grid_shp, ext_shp, predicate="within") # Get data from shapes where there are centroids, rest will be NaN del aux_grid["geometry"], aux_grid["index_right"] self.shapefile.loc[aux_grid.index, aux_grid.columns] = aux_grid var_list = list(ext_shp.columns) var_list.remove("geometry") for var_name in var_list: self.shapefile.loc[:, var_name] = array(self.shapefile.loc[:, var_name], dtype=ext_shp[var_name].dtype) return None def __spatial_join_intersection(self, ext_shp, info=False): """ Perform the spatial join using the intersection method. Parameters ---------- self : nes.Nes A Nes Object. ext_shp : GeoDataFrame External shapefile. info : bool Indicates if you want to print the information. """ var_list = list(ext_shp.columns) var_list.remove("geometry") grid_shp = self.shapefile grid_shp["FID_grid"] = grid_shp.index grid_shp = grid_shp.reset_index() # Get intersected areas # inp, res = ext_shp.sindex.query(grid_shp.geometry, predicate="intersects") inp, res = grid_shp.sindex.query(ext_shp.geometry, predicate="intersects") if info: print("\t\tRank {0:03d}: {1} intersected areas found".format(self.rank, len(inp))) sys.stdout.flush() # Calculate intersected areas and fractions intersection = DataFrame(columns=["FID", "ext_shp_id", "weight"]) intersection["FID"] = array(grid_shp.loc[res, "FID_grid"], dtype=uint32) intersection["ext_shp_id"] = array(inp, dtype=uint32) if len(intersection) > 0: if True: # No Warnings Zone counts = intersection["FID"].value_counts() filterwarnings("ignore") intersection.loc[:, "weight"] = 1. for i, row in intersection.iterrows(): if isinstance(i, int) and i % 1000 == 0 and info: print("\t\t\tRank {0:03d}: {1:.3f} %".format(self.rank, i * 100 / len(intersection))) sys.stdout.flush() # Filter to do not calculate percentages over 100% grid cells spatial joint if counts[row["FID"]] > 1: try: intersection.loc[i, "weight"] = grid_shp.loc[res[i], "geometry"].intersection( ext_shp.loc[inp[i], "geometry"]).area / grid_shp.loc[res[i], "geometry"].area except TopologicalError: # If for some reason the geometry is corrupted it should work with the buffer function ext_shp.loc[[inp[i]], "geometry"] = ext_shp.loc[[inp[i]], "geometry"].buffer(0) intersection.loc[i, "weight"] = grid_shp.loc[res[i], "geometry"].intersection( ext_shp.loc[inp[i], "geometry"]).area / grid_shp.loc[res[i], "geometry"].area # intersection["intersect_area"] = intersection.apply( # lambda x: x["geometry_grid"].intersection(x["geometry_ext"]).area, axis=1) intersection.drop(intersection[intersection["weight"] <= 0].index, inplace=True) filterwarnings("default") # Choose the biggest area from intersected areas with multiple options intersection.sort_values("weight", ascending=False, inplace=True) intersection = intersection.drop_duplicates(subset="FID", keep="first") intersection = intersection.sort_values("FID").set_index("FID") for var_name in var_list: self.shapefile.loc[intersection.index, var_name] = array( ext_shp.loc[intersection["ext_shp_id"], var_name]) else: for var_name in var_list: self.shapefile.loc[:, var_name] = nan for var_name in var_list: self.shapefile.loc[:, var_name] = array(self.shapefile.loc[:, var_name], dtype=ext_shp[var_name].dtype) return None