diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index d716b35918de93cc494265f957b476f7ac7d591a..964f8dac35ea20c19074fdccb6da245775dbd731 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -2112,7 +2112,9 @@ class Nes(object): fids = fids.reshape((len(self._lat['data']), len(self._lon['data']))) fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] - gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), geometry=geometry, crs="EPSG:4326") + gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), + geometry=geometry, + crs="EPSG:4326") self.shapefile = gdf return gdf @@ -2141,6 +2143,127 @@ class Nes(object): return None + def spatial_join(self, mask, method=None): + """ + Compute overlay intersection of two GeoPandasDataFrames + + Parameters + ---------- + mask : GeoPandasDataFrame + File from where the data will be obtained on the intersection. + method : str + Overlay method. Accepted values: ['nearest', 'intersection', None]. + """ + + # Nearest centroids to the mask polygons + if method == 'nearest': + + # Get centroids of shapefile to mask + shapefile_aux = deepcopy(self.shapefile) + shapefile_aux.geometry = self.shapefile.centroid + + # Calculate spatial joint by distance + shapefile_aux = gpd.sjoin_nearest(shapefile_aux, mask.to_crs(self.shapefile.crs), distance_col='distance') + + # Get data from closest shapes to centroids + del shapefile_aux['geometry'], shapefile_aux['index_right'] + self.shapefile.loc[shapefile_aux.index, shapefile_aux.columns] = shapefile_aux + + # Intersect the areas of the mask polygons, outside of the mask there will be NaN + elif method == 'intersection': + + # Get intersected areas + inp, res = mask.sindex.query_bulk(self.shapefile.geometry, predicate='intersects') + print('Rank {0:03d}: {1} intersected areas found'.format(self.rank, len(inp))) + + # Calculate intersected areas and fractions + intersection = pd.concat([self.shapefile.geometry[inp].reset_index(), mask.geometry[res].reset_index()], + axis=1, ignore_index=True) + intersection.columns = (list(self.shapefile.geometry[inp].reset_index().columns) + + list(mask.geometry[res].reset_index().rename(columns={'geometry': 'geometry_mask', + 'index': 'index_mask'}).columns)) + intersection['area'] = intersection.apply(lambda x: x['geometry'].intersection(x['geometry_mask']).buffer(0).area, + axis=1) + intersection['fraction'] = intersection.apply(lambda x: x['area'] / x['geometry'].area, axis=1) + + # Choose biggest area from intersected areas with multiple options + intersection.sort_values('fraction', ascending=False, inplace=True) + intersection = intersection.drop_duplicates(subset='FID', keep="first") + intersection = intersection.sort_values('FID').set_index('FID') + + # Get data from mask + del mask['geometry'] + self.shapefile.loc[intersection.index, mask.columns] = np.array(mask.loc[intersection.index_mask, :]) + + # Centroids that fall on the mask polygons, outside of the mask there will be NaN + elif method is None: + + # Get centroids of shapefile to mask + shapefile_aux = deepcopy(self.shapefile) + shapefile_aux.geometry = self.shapefile.centroid + + # Calculate spatial joint + shapefile_aux = gpd.sjoin(shapefile_aux, mask.to_crs(self.shapefile.crs)) + + # Get data from shapes where there are centroids, rest will be NaN + del shapefile_aux['geometry'], shapefile_aux['index_right'] + self.shapefile.loc[shapefile_aux.index, shapefile_aux.columns] = shapefile_aux + + return None + + @staticmethod + def spatial_overlays(df1, df2, how='intersection'): + """ + Compute overlay intersection of two GeoPandasDataFrames df1 and df2 + + https://github.com/geopandas/geopandas/issues/400 + + :param df1: GeoDataFrame + :param df2: GeoDataFrame + :param how: Operation to do + :return: GeoDataFrame + """ + from functools import reduce + + df1 = df1.copy() + df2 = df2.copy() + df1['geometry'] = df1.geometry.buffer(0) + df2['geometry'] = df2.geometry.buffer(0) + if how == 'intersection': + # Spatial Index to create intersections + spatial_index = df2.sindex + df1['bbox'] = df1.geometry.apply(lambda x: x.bounds) + df1['histreg'] = df1.bbox.apply(lambda x: list(spatial_index.intersection(x))) + pairs = df1['histreg'].to_dict() + nei = [] + for i, j in pairs.items(): + for k in j: + nei.append([i, k]) + pairs = pd.DataFrame(nei, columns=['idx1', 'idx2']) + pairs = pairs.merge(df1, left_on='idx1', right_index=True) + pairs = pairs.merge(df2, left_on='idx2', right_index=True, suffixes=['_1', '_2']) + pairs['geometry'] = pairs.apply(lambda x: (x['geometry_1'].intersection(x['geometry_2'])).buffer(0), axis=1) + + pairs.drop(columns=['geometry_1', 'geometry_2', 'histreg', 'bbox'], inplace=True) + pairs = gpd.GeoDataFrame(pairs, columns=pairs.columns, crs=df1.crs) + pairs = pairs.loc[~pairs.geometry.is_empty] + + return_value = pairs + elif how == 'difference': + spatial_index = df2.sindex + df1['bbox'] = df1.geometry.apply(lambda x: x.bounds) + df1['histreg'] = df1.bbox.apply(lambda x: list(spatial_index.intersection(x))) + df1['new_g'] = df1.apply(lambda x: reduce(lambda x, y: x.difference(y).buffer(0), + [x.geometry] + list(df2.iloc[x.histreg].geometry)), axis=1) + df1.geometry = df1.new_g + df1 = df1.loc[~df1.geometry.is_empty].copy() + df1.drop(['bbox', 'histreg', 'new_g'], axis=1, inplace=True) + return_value = df1 + else: + raise NotImplementedError(how) + + return return_value + def __gather_data_py_object(self): """ Gather all the variable data into the MPI rank 0 to perform a serial write. diff --git a/nes/nc_projections/lcc_nes.py b/nes/nc_projections/lcc_nes.py index e8048044774c87488130d87ade90335717c5663d..f9aa7e282507ca4b3e9436929df02ed40adc77fe 100644 --- a/nes/nc_projections/lcc_nes.py +++ b/nes/nc_projections/lcc_nes.py @@ -455,7 +455,9 @@ class LCCNes(Nes): fids = fids.reshape((self._lat['data'].shape[0],self._lat['data'].shape[1])) fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] - gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), geometry=geometry, crs="EPSG:4326") + gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), + geometry=geometry, + crs="EPSG:4326") self.shapefile = gdf return gdf diff --git a/nes/nc_projections/mercator_nes.py b/nes/nc_projections/mercator_nes.py index 4ca7d4e3cb91e18d0acc67a7ab784700377c06bc..7c42bd6d52424002aac54b1abbf5bd44160df811 100644 --- a/nes/nc_projections/mercator_nes.py +++ b/nes/nc_projections/mercator_nes.py @@ -432,7 +432,9 @@ class MercatorNes(Nes): fids = fids.reshape((self._lat['data'].shape[0],self._lat['data'].shape[1])) fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] - gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), geometry=geometry, crs="EPSG:4326") + gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), + geometry=geometry, + crs="EPSG:4326") self.shapefile = gdf return gdf diff --git a/nes/nc_projections/points_nes.py b/nes/nc_projections/points_nes.py index f42934c5862cdb2836521dbdfb5a6ac9495bd524..018bd79a351780dfc41c54be17503d972be9c798 100644 --- a/nes/nc_projections/points_nes.py +++ b/nes/nc_projections/points_nes.py @@ -293,7 +293,7 @@ class PointsNes(Nes): elif len(var_dims) == 2: if 'strlen' in nc_var.dimensions: data = nc_var[self.read_axis_limits['x_min']:self.read_axis_limits['x_max'], :] - data = np.array([''.join(i) for i in np.char.decode(data)]) + data = np.array([''.join(i) for i in data]) else: data = nc_var[self.read_axis_limits['t_min']:self.read_axis_limits['t_max'], self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] @@ -605,7 +605,6 @@ class PointsNes(Nes): variables = {} interpolated_variables = deepcopy(self.variables) for var_name, var_info in interpolated_variables.items(): - print(var_name) variables[var_name] = {} if var_info['dimensions'] == ('time', 'lat', 'lon') and len(var_info['data'].shape) == 2: variables[var_name]['data'] = var_info['data'].T @@ -646,7 +645,9 @@ class PointsNes(Nes): geometry = gpd.points_from_xy(self.lon['data'], self.lat['data']) fids = np.arange(len(self._lon['data'])) fids = fids[self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] - gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), geometry=geometry, crs="EPSG:4326") + gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids), + geometry=geometry, + crs="EPSG:4326") self.shapefile = gdf return gdf diff --git a/nes/nc_projections/rotated_nes.py b/nes/nc_projections/rotated_nes.py index 54f9e44d2a4ef4068e0303878a4bf8a27a07d0f9..f933c184b41351536c371c5af5aed292636d607c 100644 --- a/nes/nc_projections/rotated_nes.py +++ b/nes/nc_projections/rotated_nes.py @@ -481,7 +481,9 @@ class RotatedNes(Nes): fids = fids.reshape((self._lat['data'].shape[0],self._lat['data'].shape[1])) fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] - gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), geometry=geometry, crs="EPSG:4326") + gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), + geometry=geometry, + crs="EPSG:4326") self.shapefile = gdf return gdf diff --git a/tests/1-nes_tests_by_size.py b/tests/1-test_read_write_size.py similarity index 100% rename from tests/1-nes_tests_by_size.py rename to tests/1-test_read_write_size.py diff --git a/tests/2-nes_tests_by_projection.py b/tests/2-test_read_write_projection.py similarity index 98% rename from tests/2-nes_tests_by_projection.py rename to tests/2-test_read_write_projection.py index 12ef38e30ef6c7a2e903f945554e3efff247b8dd..5f4590c1482f5e2690643caeaaf765ab9db2cbb2 100644 --- a/tests/2-nes_tests_by_projection.py +++ b/tests/2-test_read_write_projection.py @@ -25,7 +25,7 @@ paths = {'regular_file': {'path': '/gpfs/scratch/bsc32/bsc32538/mr_multiplyby/or 'projection': 'lcc', 'variables': [], # all 'parallel_methods': ['X', 'Y', 'T']}, - 'mercator_file': {'path': '/esarchive/scratch/avilanova/software/NES/Jupyter_notebooks/input/mercator_grid_example.nc', + 'mercator_file': {'path': '/esarchive/scratch/avilanova/software/NES/tutorials/data/mercator_grid_example.nc', 'projection': 'mercator', 'variables': [], # all 'parallel_methods': ['X', 'Y', 'T']} diff --git a/tests/scalability_test_nord3v2.bash b/tests/2-test_read_write_projection_nord3v2.bash similarity index 96% rename from tests/scalability_test_nord3v2.bash rename to tests/2-test_read_write_projection_nord3v2.bash index c7256757d51f3597aa9da92d265745e507d8a1d3..87fb5c63555df7c92d09891d4d5b43dbf201ceb7 100644 --- a/tests/scalability_test_nord3v2.bash +++ b/tests/2-test_read_write_projection_nord3v2.bash @@ -3,7 +3,7 @@ #EXPORTPATH="/esarchive/scratch/avilanova/software/NES" EXPORTPATH="/gpfs/projects/bsc32/models/NES" SRCPATH="/esarchive/scratch/avilanova/software/NES/tests" -EXE="2-nes_tests_by_projection.py" +EXE="2-test_read_write_projection.py" module purge module load Python/3.7.4-GCCcore-8.3.0 diff --git a/tests/3-test_spatial_join.py b/tests/3-test_spatial_join.py new file mode 100644 index 0000000000000000000000000000000000000000..e456c64ee2cd1ea24e63e864f851678d37cc5936 --- /dev/null +++ b/tests/3-test_spatial_join.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python +import geopandas as gpd +import pandas as pd +import timeit +import sys +from mpi4py import MPI +from nes import * + +# Hide warning +pd.options.mode.chained_assignment = None + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +results = [] +for method in ['spatial_overlay', 'spatial_join']: + for projection in ['regular', 'rotated']: + for projection_type in ['created', 'read']: + + # Regular projection + if projection == 'regular': + # Create dataset and get shapefile + if projection_type == 'created': + lat_orig = 41.1 + lon_orig = 1.8 + inc_lat = 0.2 + inc_lon = 0.2 + n_lat = 100 + n_lon = 100 + coordinates = create_nes(comm=None, info=True, + projection='regular', + lat_orig=lat_orig, + lon_orig=lon_orig, + inc_lat=inc_lat, + inc_lon=inc_lon, + n_lat=n_lat, + n_lon=n_lon) + coordinates.create_shapefile() + + # Open dataset and get shapefile + elif projection_type == 'read': + coordinates_path = '/gpfs/scratch/bsc32/bsc32538/mr_multiplyby/original_file/MONARCH_d01_2008123100.nc' + coordinates = open_netcdf(path=coordinates_path, info=True) + coordinates.create_shapefile() + coordinates.keep_vars(['O3']) + coordinates.load() + coordinates.shapefile['O3'] = coordinates.variables['O3']['data'][-1, -1, :].ravel() + + # Rotated projection + elif projection == 'rotated': + # Create dataset and get shapefile + if projection_type == 'created': + centre_lat = 51 + centre_lon = 10 + west_boundary = -35 + south_boundary = -27 + inc_rlat = 0.2 + inc_rlon = 0.2 + coordinates = create_nes(comm=None, info=True, + projection='rotated', + centre_lat=centre_lat, + centre_lon=centre_lon, + west_boundary=west_boundary, + south_boundary=south_boundary, + inc_rlat=inc_rlat, + inc_rlon=inc_rlon) + coordinates.create_shapefile() + + # Open dataset and get shapefile + elif projection_type == 'read': + coordinates_path = '/gpfs/scratch/bsc32/bsc32538/original_files/CAMS_MONARCH_d01_2022070412.nc' + coordinates = open_netcdf(path=coordinates_path, info=True) + coordinates.create_shapefile() + coordinates.keep_vars(['O3']) + coordinates.load() + coordinates.shapefile['O3'] = coordinates.variables['O3']['data'][-1, -1, :].ravel() + + coordinates.write_shapefile('coordinates_{0}_{1}'.format(projection, + projection_type)) + + mask_path = '/esarchive/scratch/avilanova/software/NES/tutorials/data/timezones_2021c/timezones_2021c.shp' + mask = gpd.read_file(mask_path) + + # Spatial overlay (old method) + if method == 'spatial_overlay': + start_time = timeit.default_timer() + intersection = coordinates.shapefile.copy() + #intersection['area'] = intersection.geometry.area + intersection = coordinates.spatial_overlays(intersection, mask) + #intersection.rename(columns={'idx1': 'FID', 'idx2': 'shp_id'}, inplace=True) + #intersection['fraction'] = intersection.geometry.area / intersection['area'] + #intersection.sort_values('fraction', ascending=False, inplace=True) + #intersection = intersection.drop_duplicates(subset='FID', keep="first") + #intersection.set_index('FID', inplace=True) + #coordinates.loc[intersection.index, coordinates.shp_colname] = intersection[coordinates.shp_colname] + time = timeit.default_timer() - start_time + + # Spatial join (new method) + elif method == 'spatial_join': + start_time = timeit.default_timer() + coordinates.spatial_join(mask, method='intersection') + time = timeit.default_timer() - start_time + + coordinates.write_shapefile('masked_coordinates_{0}_{1}_{2}'.format(projection, + projection_type, + method)) + + results = [] + results.append({'Projection': projection, + 'Projection type': projection_type, + 'Method': '{min:02d}:{sec:02.3f}'.format( + min=int(time // 60), sec=time - (int(time // 60) * 60)) + }) + + comm.Barrier() + +comm.Barrier() + +if rank == 0: + table = pd.DataFrame(results) + print('RESULTS TABLE') + print(table) + sys.stdout.flush() diff --git a/tests/3-test_spatial_join_nord3v2.bash b/tests/3-test_spatial_join_nord3v2.bash new file mode 100644 index 0000000000000000000000000000000000000000..079aa7928696599a280f2c555704dbd543b65385 --- /dev/null +++ b/tests/3-test_spatial_join_nord3v2.bash @@ -0,0 +1,23 @@ +#!/bin/bash + +EXPORTPATH="/esarchive/scratch/avilanova/software/NES" +SRCPATH="/esarchive/scratch/avilanova/software/NES/tests" +EXE="3-test_spatial_join.py" + +module purge +module load Python/3.7.4-GCCcore-8.3.0 +module load netcdf4-python/1.5.3-foss-2019b-Python-3.7.4 +module load cfunits/1.8-foss-2019b-Python-3.7.4 +module load xarray/0.17.0-foss-2019b-Python-3.7.4 +module load pandas/1.2.4-foss-2019b-Python-3.7.4 +module load mpi4py/3.0.3-foss-2019b-Python-3.7.4 +module load filelock/3.7.1-foss-2019b-Python-3.7.4 +module load pyproj/2.5.0-foss-2019b-Python-3.7.4 +module load eccodes-python/0.9.5-foss-2019b-Python-3.7.4 +module load geopandas/0.8.1-foss-2019b-Python-3.7.4 +module load Shapely/1.7.1-foss-2019b-Python-3.7.4 + +for nprocs in 1 +do + JOB_ID=`sbatch --ntasks=${nprocs} --exclusive --job-name=nes_${nprocs} --output=./log_nord3v2_NES_${nprocs}_%J.out --error=./log_nord3v2_NES_${nprocs}_%J.err -D . --time=02:00:00 --wrap="export PYTHONPATH=${EXPORTPATH}:${PYTHONPATH}; cd ${SRCPATH}; mpirun --mca mpi_warn_on_fork 0 -np ${nprocs} python ${SRCPATH}/${EXE}"` +done \ No newline at end of file diff --git a/tests/4-test_bounds.py b/tests/4-test_bounds.py new file mode 100644 index 0000000000000000000000000000000000000000..e3d3063fddd238f0dc601a6a1c2837e1bcd98e94 --- /dev/null +++ b/tests/4-test_bounds.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python +import sys +import timeit +import pandas as pd +from mpi4py import MPI +from nes import * + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() + +for projection_type in ['read', 'created']: + + # Open dataset + if projection_type == 'read': + test_path = "/gpfs/scratch/bsc32/bsc32538/mr_multiplyby/OUT/stats_bnds/monarch/a45g/regional/daily_max/O3_all/O3_all-000_2021080300.nc" + nessy = open_netcdf(path=test_path, info=True) + + # Create dataset + elif projection_type == 'created': + lat_orig = 41.1 + lon_orig = 1.8 + inc_lat = 0.2 + inc_lon = 0.2 + n_lat = 100 + n_lon = 100 + nessy = create_nes(comm=None, info=True, projection='regular', + lat_orig=lat_orig, lon_orig=lon_orig, + inc_lat=inc_lat, inc_lon=inc_lon, + n_lat=n_lat, n_lon=n_lon) + + # Add bounds + nessy.create_spatial_bounds() + + print('NES', projection_type, '-', 'Rank', rank, '-', nessy) + print('NES', projection_type, '-', 'Rank', rank, '-', 'Lat bounds', + nessy.lat_bnds) + print('NES', projection_type, '-', 'Rank', rank, '-', 'Lon bounds', + nessy.lon_bnds) + + comm.Barrier() + sys.stdout.flush() \ No newline at end of file diff --git a/tests/4-test_bounds_nord3v2.bash b/tests/4-test_bounds_nord3v2.bash new file mode 100644 index 0000000000000000000000000000000000000000..fc6e06b0824d8682c80372867da0d429df1a80b6 --- /dev/null +++ b/tests/4-test_bounds_nord3v2.bash @@ -0,0 +1,23 @@ +#!/bin/bash + +EXPORTPATH="/esarchive/scratch/avilanova/software/NES" +SRCPATH="/esarchive/scratch/avilanova/software/NES/tests" +EXE="4-test_bounds.py" + +module purge +module load Python/3.7.4-GCCcore-8.3.0 +module load netcdf4-python/1.5.3-foss-2019b-Python-3.7.4 +module load cfunits/1.8-foss-2019b-Python-3.7.4 +module load xarray/0.17.0-foss-2019b-Python-3.7.4 +module load pandas/1.2.4-foss-2019b-Python-3.7.4 +module load mpi4py/3.0.3-foss-2019b-Python-3.7.4 +module load filelock/3.7.1-foss-2019b-Python-3.7.4 +module load pyproj/2.5.0-foss-2019b-Python-3.7.4 +module load eccodes-python/0.9.5-foss-2019b-Python-3.7.4 +module load geopandas/0.8.1-foss-2019b-Python-3.7.4 +module load Shapely/1.7.1-foss-2019b-Python-3.7.4 + +for nprocs in 1 2 +do + JOB_ID=`sbatch --ntasks=${nprocs} --exclusive --job-name=nes_${nprocs} --output=./log_nord3v2_NES_${nprocs}_%J.out --error=./log_nord3v2_NES_${nprocs}_%J.err -D . --time=02:00:00 --wrap="export PYTHONPATH=${EXPORTPATH}:${PYTHONPATH}; cd ${SRCPATH}; mpirun --mca mpi_warn_on_fork 0 -np ${nprocs} python ${SRCPATH}/${EXE}"` +done \ No newline at end of file diff --git a/tutorials/2.Creation/2.5.Create_Points_CSIC.ipynb b/tutorials/2.Creation/2.5.Create_Points_CSIC.ipynb index 48cd49d6883628b607be18fed904d3f40ceb9282..65d623e8c48565ecd1279c7d57168bbcac3e44c2 100644 --- a/tutorials/2.Creation/2.5.Create_Points_CSIC.ipynb +++ b/tutorials/2.Creation/2.5.Create_Points_CSIC.ipynb @@ -1869,7 +1869,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.6.8" }, "vscode": { "interpreter": { diff --git a/tutorials/5.Others/5.2.Add_Coordinates_Bounds.ipynb b/tutorials/5.Others/5.2.Add_Coordinates_Bounds.ipynb index 29ca013c2b0ef269dec76430d975e01c6b5c8a97..537349001ae305b0d7ac343a269a5bb13b18ad22 100644 --- a/tutorials/5.Others/5.2.Add_Coordinates_Bounds.ipynb +++ b/tutorials/5.Others/5.2.Add_Coordinates_Bounds.ipynb @@ -209,7 +209,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3.6.8 64-bit", "language": "python", "name": "python3" }, @@ -223,7 +223,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.6.8" + }, + "vscode": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + } } }, "nbformat": 4, diff --git a/tutorials/5.Others/5.4.Spatial_Join.ipynb b/tutorials/5.Others/5.4.Spatial_Join.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..b00e62288dc5f117c373579760892b9c5d86452a --- /dev/null +++ b/tutorials/5.Others/5.4.Spatial_Join.ipynb @@ -0,0 +1,892 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to make spatial joins" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from nes import *\n", + "import xarray as xr\n", + "import geopandas as gpd\n", + "import pandas as pd\n", + "\n", + "pd.options.mode.chained_assignment = None" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Method 1: Centroids" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | geometry | \n", + "
---|---|
FID | \n", + "\n", + " |
0 | \n", + "POLYGON ((1.80000 41.10000, 2.00000 41.10000, ... | \n", + "
1 | \n", + "POLYGON ((2.00000 41.10000, 2.20000 41.10000, ... | \n", + "
2 | \n", + "POLYGON ((2.20000 41.10000, 2.40000 41.10000, ... | \n", + "
3 | \n", + "POLYGON ((2.40000 41.10000, 2.60000 41.10000, ... | \n", + "
4 | \n", + "POLYGON ((2.60000 41.10000, 2.80000 41.10000, ... | \n", + "
... | \n", + "... | \n", + "
9995 | \n", + "POLYGON ((20.80000 60.90000, 21.00000 60.90000... | \n", + "
9996 | \n", + "POLYGON ((21.00000 60.90000, 21.20000 60.90000... | \n", + "
9997 | \n", + "POLYGON ((21.20000 60.90000, 21.40000 60.90000... | \n", + "
9998 | \n", + "POLYGON ((21.40000 60.90000, 21.60000 60.90000... | \n", + "
9999 | \n", + "POLYGON ((21.60000 60.90000, 21.80000 60.90000... | \n", + "
10000 rows × 1 columns
\n", + "\n", + " | geometry | \n", + "tzid | \n", + "
---|---|---|
FID | \n", + "\n", + " | \n", + " |
0 | \n", + "POLYGON ((1.80000 41.10000, 2.00000 41.10000, ... | \n", + "Europe/Madrid | \n", + "
1 | \n", + "POLYGON ((2.00000 41.10000, 2.20000 41.10000, ... | \n", + "Europe/Madrid | \n", + "
2 | \n", + "POLYGON ((2.20000 41.10000, 2.40000 41.10000, ... | \n", + "Europe/Madrid | \n", + "
3 | \n", + "POLYGON ((2.40000 41.10000, 2.60000 41.10000, ... | \n", + "NaN | \n", + "
4 | \n", + "POLYGON ((2.60000 41.10000, 2.80000 41.10000, ... | \n", + "NaN | \n", + "
... | \n", + "... | \n", + "... | \n", + "
9995 | \n", + "POLYGON ((20.80000 60.90000, 21.00000 60.90000... | \n", + "Europe/Helsinki | \n", + "
9996 | \n", + "POLYGON ((21.00000 60.90000, 21.20000 60.90000... | \n", + "Europe/Helsinki | \n", + "
9997 | \n", + "POLYGON ((21.20000 60.90000, 21.40000 60.90000... | \n", + "Europe/Helsinki | \n", + "
9998 | \n", + "POLYGON ((21.40000 60.90000, 21.60000 60.90000... | \n", + "Europe/Helsinki | \n", + "
9999 | \n", + "POLYGON ((21.60000 60.90000, 21.80000 60.90000... | \n", + "Europe/Helsinki | \n", + "
10000 rows × 2 columns
\n", + "\n", + " | geometry | \n", + "
---|---|
FID | \n", + "\n", + " |
0 | \n", + "POLYGON ((1.80000 41.10000, 2.00000 41.10000, ... | \n", + "
1 | \n", + "POLYGON ((2.00000 41.10000, 2.20000 41.10000, ... | \n", + "
2 | \n", + "POLYGON ((2.20000 41.10000, 2.40000 41.10000, ... | \n", + "
3 | \n", + "POLYGON ((2.40000 41.10000, 2.60000 41.10000, ... | \n", + "
4 | \n", + "POLYGON ((2.60000 41.10000, 2.80000 41.10000, ... | \n", + "
... | \n", + "... | \n", + "
95 | \n", + "POLYGON ((2.80000 42.90000, 3.00000 42.90000, ... | \n", + "
96 | \n", + "POLYGON ((3.00000 42.90000, 3.20000 42.90000, ... | \n", + "
97 | \n", + "POLYGON ((3.20000 42.90000, 3.40000 42.90000, ... | \n", + "
98 | \n", + "POLYGON ((3.40000 42.90000, 3.60000 42.90000, ... | \n", + "
99 | \n", + "POLYGON ((3.60000 42.90000, 3.80000 42.90000, ... | \n", + "
100 rows × 1 columns
\n", + "\n", + " | geometry | \n", + "
---|---|
FID | \n", + "\n", + " |
0 | \n", + "POLYGON ((1.80000 41.10000, 2.00000 41.10000, ... | \n", + "
1 | \n", + "POLYGON ((2.00000 41.10000, 2.20000 41.10000, ... | \n", + "
2 | \n", + "POLYGON ((2.20000 41.10000, 2.40000 41.10000, ... | \n", + "
3 | \n", + "POLYGON ((2.40000 41.10000, 2.60000 41.10000, ... | \n", + "
4 | \n", + "POLYGON ((2.60000 41.10000, 2.80000 41.10000, ... | \n", + "
... | \n", + "... | \n", + "
95 | \n", + "POLYGON ((2.80000 42.90000, 3.00000 42.90000, ... | \n", + "
96 | \n", + "POLYGON ((3.00000 42.90000, 3.20000 42.90000, ... | \n", + "
97 | \n", + "POLYGON ((3.20000 42.90000, 3.40000 42.90000, ... | \n", + "
98 | \n", + "POLYGON ((3.40000 42.90000, 3.60000 42.90000, ... | \n", + "
99 | \n", + "POLYGON ((3.60000 42.90000, 3.80000 42.90000, ... | \n", + "
100 rows × 1 columns
\n", + "\n", + " | geometry | \n", + "
---|---|
FID | \n", + "\n", + " |
0 | \n", + "POLYGON ((1.80000 41.10000, 2.00000 41.10000, ... | \n", + "
1 | \n", + "POLYGON ((2.00000 41.10000, 2.20000 41.10000, ... | \n", + "
2 | \n", + "POLYGON ((2.20000 41.10000, 2.40000 41.10000, ... | \n", + "
3 | \n", + "POLYGON ((2.40000 41.10000, 2.60000 41.10000, ... | \n", + "
4 | \n", + "POLYGON ((2.60000 41.10000, 2.80000 41.10000, ... | \n", + "
... | \n", + "... | \n", + "
9995 | \n", + "POLYGON ((20.80000 60.90000, 21.00000 60.90000... | \n", + "
9996 | \n", + "POLYGON ((21.00000 60.90000, 21.20000 60.90000... | \n", + "
9997 | \n", + "POLYGON ((21.20000 60.90000, 21.40000 60.90000... | \n", + "
9998 | \n", + "POLYGON ((21.40000 60.90000, 21.60000 60.90000... | \n", + "
9999 | \n", + "POLYGON ((21.60000 60.90000, 21.80000 60.90000... | \n", + "
10000 rows × 1 columns
\n", + "\n", + " | geometry | \n", + "tzid | \n", + "
---|---|---|
FID | \n", + "\n", + " | \n", + " |
0 | \n", + "POLYGON ((1.80000 41.10000, 2.00000 41.10000, ... | \n", + "Europe/Madrid | \n", + "
1 | \n", + "POLYGON ((2.00000 41.10000, 2.20000 41.10000, ... | \n", + "Europe/Madrid | \n", + "
2 | \n", + "POLYGON ((2.20000 41.10000, 2.40000 41.10000, ... | \n", + "Europe/Madrid | \n", + "
3 | \n", + "POLYGON ((2.40000 41.10000, 2.60000 41.10000, ... | \n", + "Europe/Madrid | \n", + "
4 | \n", + "POLYGON ((2.60000 41.10000, 2.80000 41.10000, ... | \n", + "NaN | \n", + "
... | \n", + "... | \n", + "... | \n", + "
9995 | \n", + "POLYGON ((20.80000 60.90000, 21.00000 60.90000... | \n", + "Europe/Helsinki | \n", + "
9996 | \n", + "POLYGON ((21.00000 60.90000, 21.20000 60.90000... | \n", + "Europe/Helsinki | \n", + "
9997 | \n", + "POLYGON ((21.20000 60.90000, 21.40000 60.90000... | \n", + "Europe/Helsinki | \n", + "
9998 | \n", + "POLYGON ((21.40000 60.90000, 21.60000 60.90000... | \n", + "Europe/Helsinki | \n", + "
9999 | \n", + "POLYGON ((21.60000 60.90000, 21.80000 60.90000... | \n", + "Europe/Helsinki | \n", + "
10000 rows × 2 columns
\n", + "