From 85275f74a26e0350d9ad35587d70beb029fe7d82 Mon Sep 17 00:00:00 2001 From: ctena Date: Wed, 22 Mar 2023 17:04:38 +0100 Subject: [PATCH 1/7] - Added stdout.flush() to each warning - Improved spatial_join --- nes/create_nes.py | 2 + nes/load_nes.py | 2 + nes/methods/__init__.py | 1 + nes/methods/horizontal_interpolation.py | 4 + nes/methods/spatial_join.py | 277 +++++++++++++++ nes/nc_projections/default_nes.py | 159 ++------- nes/nc_projections/lcc_nes.py | 2 + nes/nc_projections/mercator_nes.py | 2 + nes/nc_projections/points_nes.py | 2 + nes/nc_projections/points_nes_ghost.py | 2 + nes/nc_projections/points_nes_providentia.py | 2 + nes/nc_projections/rotated_nes.py | 2 + nes/nes_formats/cams_ra_format.py | 1 + tests/2.1-test_spatial_join.py | 342 ++++++++++--------- 14 files changed, 508 insertions(+), 292 deletions(-) create mode 100644 nes/methods/spatial_join.py diff --git a/nes/create_nes.py b/nes/create_nes.py index 33f0bf6..da7d29c 100644 --- a/nes/create_nes.py +++ b/nes/create_nes.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import warnings +import sys from netCDF4 import num2date from mpi4py import MPI import geopandas as gpd @@ -87,6 +88,7 @@ def create_nes(comm=None, info=False, projection=None, parallel_method='Y', bala if projection is None: if parallel_method == 'Y': warnings.warn("Parallel method cannot be 'Y' to create points NES. Setting it to 'X'") + sys.stderr.flush() parallel_method = 'X' elif parallel_method == 'T': raise NotImplementedError("Parallel method T not implemented yet") diff --git a/nes/load_nes.py b/nes/load_nes.py index 8de2346..2113b48 100644 --- a/nes/load_nes.py +++ b/nes/load_nes.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import os +import sys from mpi4py import MPI from netCDF4 import Dataset import warnings @@ -70,6 +71,7 @@ def open_netcdf(path, comm=None, xarray=False, info=False, parallel_method='Y', elif __is_points(dataset): if parallel_method == 'Y': warnings.warn("Parallel method cannot be 'Y' to create points NES. Setting it to 'X'") + sys.stderr.flush() parallel_method = 'X' if __is_points_ghost(dataset): # Points - GHOST diff --git a/nes/methods/__init__.py b/nes/methods/__init__.py index 22c351a..772adac 100644 --- a/nes/methods/__init__.py +++ b/nes/methods/__init__.py @@ -1,3 +1,4 @@ from .vertical_interpolation import add_4d_vertical_info from .vertical_interpolation import interpolate_vertical from .horizontal_interpolation import interpolate_horizontal +from .spatial_join import spatial_join diff --git a/nes/methods/horizontal_interpolation.py b/nes/methods/horizontal_interpolation.py index 58a2abc..e33edd4 100644 --- a/nes/methods/horizontal_interpolation.py +++ b/nes/methods/horizontal_interpolation.py @@ -143,6 +143,7 @@ def interpolate_horizontal(self, dst_grid, weight_matrix_path=None, kind='Neares else: msg = "The final projection must be points to interpolate an experiment and get it in Providentia format." warnings.warn(msg) + sys.stderr.flush() else: # Convert dimensions (time, lev, lat, lon) or (time, lat, lon) to (time, station) for interpolated variables # and reshape data @@ -238,6 +239,7 @@ def get_weights_idx_t_axis(self, dst_grid, weight_matrix_path, kind, n_neighbour if len(weight_matrix.lev['data']) != n_neighbours: warn("The selected weight matrix does not have the same number of nearest neighbours." + "Re-calculating again but not saving it.") + sys.stderr.flush() weight_matrix = create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours) else: weight_matrix = True @@ -320,6 +322,7 @@ def get_weights_idx_xy_axis(self, dst_grid, weight_matrix_path, kind, n_neighbou if isinstance(dst_grid, nes.PointsNes) and weight_matrix_path is not None: if self.master: warn("To point weight matrix cannot be saved.") + sys.stderr.flush() weight_matrix_path = None if wm is not None: @@ -337,6 +340,7 @@ def get_weights_idx_xy_axis(self, dst_grid, weight_matrix_path, kind, n_neighbou if len(weight_matrix.lev['data']) != n_neighbours: warn("The selected weight matrix does not have the same number of nearest neighbours." + "Re-calculating again but not saving it.") + sys.stderr.flush() weight_matrix = create_nn_weight_matrix(self, dst_grid, n_neighbours=n_neighbours) else: weight_matrix = True diff --git a/nes/methods/spatial_join.py b/nes/methods/spatial_join.py new file mode 100644 index 0000000..6695cf0 --- /dev/null +++ b/nes/methods/spatial_join.py @@ -0,0 +1,277 @@ +#!/usr/bin/env python + +import sys +import warnings +import geopandas as gpd +from geopandas import GeoDataFrame +import nes +import numpy as np +import pandas as pd +from shapely.geos import TopologicalError + + +def spatial_join(self, ext_shp, method=None, var_list=None, info=False): + """ + Compute overlay intersection of two GeoPandasDataFrames. + + Parameters + ---------- + self : nes.Nes + ext_shp : GeoPandasDataFrame or str + File or path from where the data will be obtained on the intersection. + method : str + Overlay method. Accepted values: ['nearest', 'intersection', 'centroid']. + var_list : List or None or str + Variables that will be included in the resulting shapefile. + info : bool + Indicates if you want to print the process info or no + """ + if self.master and info: + print("Starting spatial join") + if isinstance(var_list, str): + # Transforming string (variable name) to a list with length 0 + var_list = [var_list] + + # Create source shapefile if it does not exist + if self.shapefile is None: + if self.master and info: + print("\tCreating shapefile") + sys.stdout.flush() + self.create_shapefile() + + ext_shp = prepare_external_shapefile(self, ext_shp=ext_shp, var_list=var_list, info=info) + + if method == 'nearest': + # Nearest centroids to the shapefile polygons + spatial_join_nearest(self, ext_shp=ext_shp, info=info) + elif method == 'intersection': + # Intersect the areas of the shapefile polygons, outside the shapefile there will be NaN + spatial_join_intersection(self, ext_shp=ext_shp, info=info) + elif method == 'centroid': + # Centroids that fall on the shapefile polygons, outside the shapefile there will be NaN + spatial_join_centroid(self, ext_shp=ext_shp, info=info) + + else: + accepted_values = ['nearest', 'intersection', 'centroid'] + raise NotImplementedError('{0} is not implemented. Choose from: {1}'.format(method, accepted_values)) + + return None + + +def prepare_external_shapefile(self, ext_shp, var_list, info=False): + """ + Prepare the external shapefile. + + It is high recommended to pass ext_shp parameter as string because it will clip the external shapefile to the rank. + + 1. Read if it is not already read + 2. Filter variables list + 3. Standardize projections + + Parameters + ---------- + self : nes.Nes + ext_shp : GeoDataFrame or str + External shapefile or path to it + var_list : List[str] or None + External shapefile variables to be computed + info : bool + Indicates if you want to print the information + + Returns + ------- + GeoDataFrame + External shapefile + """ + if isinstance(ext_shp, str): + # Reading external shapefile + if self.master and info: + print("\tReading external shapefile") + # ext_shp = gpd.read_file(ext_shp, include_fields=var_list, mask=self.shapefile.geometry) + ext_shp = gpd.read_file(ext_shp, include_fields=var_list, bbox=get_bbox(self)) + else: + msg = "WARNING!!! " + msg += "External shapefile already read. If you pass the path to the shapefile instead of the opened shapefile " + msg += "a best usage of memory is performed because the external shape will be clipped while reading." + warnings.warn(msg) + sys.stderr.flush() + if var_list is not None: + ext_shp = ext_shp.loc[:, var_list + ['geometry']] + self.comm.Barrier() + if self.master and info: + print("\t\tReading external shapefile done!") + # Standardizing projection + ext_shp = ext_shp.to_crs(self.shapefile.crs) + + return ext_shp + + +def get_bbox(self): + """ + Obtain the bounding box of the rank data + + (lon_min, lat_min, lon_max, lat_max) + + Parameters + ---------- + self : nes.Nes + + Returns + ------- + tuple + Bounding box + """ + bbox = (self.lon_bnds['data'].min(), self.lat_bnds['data'].min(), + self.lon_bnds['data'].max(), self.lat_bnds['data'].max(), ) + return bbox + + +def spatial_join_nearest(self, ext_shp, info=False): + """ + Perform the spatial join using the nearest method + + Parameters + ---------- + self : nes.Nes + ext_shp : GeoDataFrame + External shapefile + info : bool + Indicates if you want to print the information + """ + if self.master and info: + print("\tNearest spatial joint") + sys.stdout.flush() + grid_shp = self.get_centroids_from_coordinates() + # From geodetic coordinates (e.g. 4326) to meters (e.g. 4328) to use sjoin_nearest + # TODO: Check if the projection 4328 does not distort the coordinates too much + # https://gis.stackexchange.com/questions/372564/ + # userwarning-when-trying-to-get-centroid-from-a-polygon-geopandas + # ext_shp = ext_shp.to_crs('EPSG:4328') + # grid_shp = grid_shp.to_crs('EPSG:4328') + + # Calculate spatial joint by distance + aux_grid = gpd.sjoin_nearest(grid_shp, ext_shp, distance_col='distance') + + # Get data from closest shapes to centroids + del aux_grid['geometry'], aux_grid['index_right'] + self.shapefile.loc[aux_grid.index, aux_grid.columns] = aux_grid + + var_list = list(ext_shp.columns) + var_list.remove('geometry') + for var_name in var_list: + self.shapefile.loc[:, var_name] = np.array(self.shapefile.loc[:, var_name], dtype=ext_shp[var_name].dtype) + + return None + + +def spatial_join_centroid(self, ext_shp, info=False): + """ + Perform the spatial join using the centroid method + + Parameters + ---------- + self : nes.Nes + ext_shp : GeoDataFrame + External shapefile + info : bool + Indicates if you want to print the information + """ + if self.master and info: + print("\tCentroid spatial join") + sys.stdout.flush() + if info and self.master: + print("\t\tCalculating centroids") + sys.stdout.flush() + # Get centroids + grid_shp = self.get_centroids_from_coordinates() + + # Calculate spatial joint + if info and self.master: + print("\t\tCalculating centroid spatial join") + sys.stdout.flush() + aux_grid = gpd.sjoin(grid_shp, ext_shp, predicate='within') + + # Get data from shapes where there are centroids, rest will be NaN + del aux_grid['geometry'], aux_grid['index_right'] + self.shapefile.loc[aux_grid.index, aux_grid.columns] = aux_grid + + var_list = list(ext_shp.columns) + var_list.remove('geometry') + for var_name in var_list: + self.shapefile.loc[:, var_name] = np.array(self.shapefile.loc[:, var_name], dtype=ext_shp[var_name].dtype) + + return None + + +def spatial_join_intersection(self, ext_shp, info=False): + """ + Perform the spatial join using the intersection method + + Parameters + ---------- + self : nes.Nes + ext_shp : GeoDataFrame + External shapefile + info : bool + Indicates if you want to print the information + """ + var_list = list(ext_shp.columns) + var_list.remove('geometry') + + grid_shp = self.shapefile + + grid_shp['FID_grid'] = grid_shp.index + grid_shp = grid_shp.reset_index() + # Get intersected areas + # inp, res = ext_shp.sindex.query_bulk(grid_shp.geometry, predicate='intersects') + inp, res = grid_shp.sindex.query_bulk(ext_shp.geometry, predicate='intersects') + + if info: + print('\t\tRank {0:03d}: {1} intersected areas found'.format(self.rank, len(inp))) + sys.stdout.flush() + # Calculate intersected areas and fractions + intersection = pd.DataFrame(columns=['FID', 'ext_shp_id', 'weight']) + intersection['FID'] = np.array(grid_shp.loc[res, 'FID_grid'], dtype=np.uint32) + intersection['ext_shp_id'] = np.array(inp, dtype=np.uint32) + + if len(intersection) > 0: + if True: + # No Warnings Zone + counts = intersection['FID'].value_counts() + warnings.filterwarnings('ignore') + intersection.loc[:, 'weight'] = 1. + + for i, row in intersection.iterrows(): + if isinstance(i, int) and i % 1000 == 0 and info: + print('\t\t\tRank {0:03d}: {1:.3f} %'.format(self.rank, i * 100 / len(intersection))) + sys.stdout.flush() + # Filter to do not calculate percentages over 100% grid cells spatial joint + if counts[row['FID']] > 1: + try: + intersection.loc[i, 'weight'] = grid_shp.loc[res[i], 'geometry'].intersection( + ext_shp.loc[inp[i], 'geometry']).area / grid_shp.loc[res[i], 'geometry'].area + except TopologicalError: + # If for some reason the geometry is corrupted it should work with the buffer function + ext_shp.loc[[inp[i]], 'geometry'] = ext_shp.loc[[inp[i]], 'geometry'].buffer(0) + intersection.loc[i, 'weight'] = grid_shp.loc[res[i], 'geometry'].intersection( + ext_shp.loc[inp[i], 'geometry']).area / grid_shp.loc[res[i], 'geometry'].area + # intersection['intersect_area'] = intersection.apply( + # lambda x: x['geometry_grid'].intersection(x['geometry_ext']).area, axis=1) + intersection.drop(intersection[intersection['weight'] <= 0].index, inplace=True) + + warnings.filterwarnings('default') + + # Choose the biggest area from intersected areas with multiple options + intersection.sort_values('weight', ascending=False, inplace=True) + intersection = intersection.drop_duplicates(subset='FID', keep="first") + intersection = intersection.sort_values('FID').set_index('FID') + + for var_name in var_list: + self.shapefile.loc[intersection.index, var_name] = np.array( + ext_shp.loc[intersection['ext_shp_id'], var_name]) + else: + for var_name in var_list: + self.shapefile.loc[:, var_name] = np.nan + for var_name in var_list: + self.shapefile.loc[:, var_name] = np.array(self.shapefile.loc[:, var_name], dtype=ext_shp[var_name].dtype) + return None diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 0cdb8a7..6c1972a 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -9,14 +9,13 @@ from xarray import open_dataset from netCDF4 import Dataset, num2date, date2num from mpi4py import MPI from cfunits import Units +from shapely.geos import TopologicalError import geopandas as gpd from shapely.geometry import Polygon, Point from copy import deepcopy, copy import datetime import pyproj -from ..methods import vertical_interpolation -from ..methods import horizontal_interpolation -from ..methods import cell_measures +from ..methods import vertical_interpolation, horizontal_interpolation, cell_measures, spatial_join from ..nes_formats import to_netcdf_cams_ra @@ -544,11 +543,13 @@ class Nes(object): msg += "The given time bounds list has a different length than the time array. " msg += "(time:{0}, bnds:{1}). Time bounds will not be set.".format(len(self._time), len(time_bnds)) warnings.warn(msg) + sys.stderr.flush() else: msg = 'WARNING!!! ' msg += 'There is at least one element in the time bounds to be set that is not a datetime object. ' msg += 'Time bounds will not be set.' warnings.warn(msg) + sys.stderr.flush() return None @@ -2255,6 +2256,9 @@ class Nes(object): for var_name in self.variables.keys(): self.variables[var_name]['cell_measures'] = 'area: cell_area' + + if self.info: + print("Rank {0:03d}: Cell measures done".format(self.rank)) return None def _create_variables(self, netcdf, chunking=False): @@ -2288,6 +2292,7 @@ class Nes(object): msg += "Different data types for variable {0}. ".format(var_name) msg += "Input dtype={0}. Data dtype={1}.".format(var_dtype, var_dict['data'].dtype) warnings.warn(msg) + sys.stderr.flush() try: var_dict['data'] = var_dict['data'].astype(var_dtype) except Exception as e: # TODO: Detect exception @@ -2517,8 +2522,6 @@ class Nes(object): # Create cell measures self._create_cell_measures(netcdf) - if self.info: - print("Rank {0:03d}: Cell measures done".format(self.rank)) # Create variables self._create_variables(netcdf, chunking=chunking) @@ -2864,6 +2867,7 @@ class Nes(object): if lev is None: msg = 'No vertical level has been specified. The first one will be selected.' warnings.warn(msg) + sys.stderr.flush() idx_lev = 0 else: if lev not in self.lev['data']: @@ -2874,6 +2878,7 @@ class Nes(object): if time is None: msg = 'No time has been specified. The first one will be selected.' warnings.warn(msg) + sys.stderr.flush() idx_time = 0 else: if time not in self.time: @@ -2908,132 +2913,6 @@ class Nes(object): return None - def spatial_join(self, ext_shp, method=None, var_list=None, info=False): - """ - Compute overlay intersection of two GeoPandasDataFrames. - - Parameters - ---------- - ext_shp : GeoPandasDataFrame or str - File or path from where the data will be obtained on the intersection. - method : str - Overlay method. Accepted values: ['nearest', 'intersection', 'centroid']. - var_list : List or None - Variables that will be included in the resulting shapefile. - info : bool - Indicates if you want to print the process info or no - """ - if isinstance(ext_shp, str): - ext_shp = gpd.read_file(ext_shp) - - # Create shapefile if it does not exist - if self.shapefile is None: - msg = 'Shapefile does not exist. It will be created now.' - warnings.warn(msg) - grid_shp = self.create_shapefile() - else: - grid_shp = self.shapefile - grid_shp = copy(grid_shp) - - # Get variables of interest from shapefile - if var_list is not None: - if isinstance(var_list, str): - var_list = [var_list] - ext_shp = ext_shp.loc[:, var_list + ['geometry']] - else: - var_list = ext_shp.columns.to_list() - var_list.remove('geometry') - - ext_shp = ext_shp.to_crs(grid_shp.crs) - - # Nearest centroids to the shapefile polygons - if method == 'nearest': - if self.master and info: - print("Nearest spatial joint") - # Get centroids - centroids_gdf = self.get_centroids_from_coordinates() - - # From geodetic coordinates (e.g. 4326) to meters (e.g. 4328) to use sjoin_nearest - # TODO: Check if the projection 4328 does not distort the coordinates too much - # https://gis.stackexchange.com/questions/372564/ - # userwarning-when-trying-to-get-centroid-from-a-polygon-geopandas - ext_shp = ext_shp.to_crs('EPSG:4328') - centroids_gdf = centroids_gdf.to_crs('EPSG:4328') - - # Calculate spatial joint by distance - aux_grid = gpd.sjoin_nearest(centroids_gdf, ext_shp, distance_col='distance') - - # Get data from closest shapes to centroids - del aux_grid['geometry'], aux_grid['index_right'] - self.shapefile.loc[aux_grid.index, aux_grid.columns] = aux_grid - - # Intersect the areas of the shapefile polygons, outside the shapefile there will be NaN - elif method == 'intersection': - if self.master and info: - print("Intersection spatial joint") - grid_shp['FID_grid'] = grid_shp.index - grid_shp = grid_shp.reset_index() - # Get intersected areas - # inp, res = shapefile.sindex.query_bulk(self.shapefile.geometry, predicate='intersects') - inp, res = grid_shp.sindex.query_bulk(ext_shp.geometry, predicate='intersects') - - if self.master and info: - print('Rank {0:03d}: {1} intersected areas found'.format(self.rank, len(inp))) - - # Calculate intersected areas and fractions - intersection = pd.DataFrame() - intersection['FID'] = np.array(grid_shp.loc[res, 'FID_grid'], dtype=np.uint32) - intersection['ext_shp_id'] = np.array(inp, dtype=np.uint32) - - # intersection['geometry_ext'] = ext_shp.loc[inp, 'geometry'].values - # intersection['geometry_grid'] = grid_shp.loc[res, 'geometry'].values - if True: - # No Warnings Zone - counts = intersection['FID'].value_counts() - warnings.filterwarnings('ignore') - - intersection.loc[:, 'weight'] = 1. - for i, row in intersection.iterrows(): - if isinstance(i, int) and i % 1000 == 0 and self.master and info: - print('\tRank {0:03d}: {1:.3f} %'.format(self.rank, i*100 / len(intersection))) - # Filter to do not calculate percentages over 100% grid cells spatial joint - if counts[row['FID']] > 1: - intersection.loc[i, 'weight'] = grid_shp.loc[res[i], 'geometry'].intersection( - ext_shp.loc[inp[i], 'geometry']).area / grid_shp.loc[res[i], 'geometry'].area - # intersection['intersect_area'] = intersection.apply( - # lambda x: x['geometry_grid'].intersection(x['geometry_ext']).area, axis=1) - intersection.drop(intersection[intersection['weight'] <= 0].index, inplace=True) - - warnings.filterwarnings('default') - - # Choose the biggest area from intersected areas with multiple options - intersection.sort_values('weight', ascending=False, inplace=True) - intersection = intersection.drop_duplicates(subset='FID', keep="first") - intersection = intersection.sort_values('FID').set_index('FID') - - for var_name in var_list: - self.shapefile.loc[intersection.index, var_name] = np.array( - ext_shp.loc[intersection['ext_shp_id'], var_name]) - - # Centroids that fall on the shapefile polygons, outside the shapefile there will be NaN - elif method == 'centroid': - - # Get centroids - centroids_gdf = self.get_centroids_from_coordinates() - - # Calculate spatial joint - aux_grid = gpd.sjoin(centroids_gdf, ext_shp, predicate='within') - - # Get data from shapes where there are centroids, rest will be NaN - del aux_grid['geometry'], aux_grid['index_right'] - self.shapefile.loc[aux_grid.index, aux_grid.columns] = aux_grid - - else: - accepted_values = ['nearest', 'intersection', 'centroid'] - raise NotImplementedError('{0} is not implemented. Choose from: {1}'.format(method, accepted_values)) - - return None - def get_centroids_from_coordinates(self): """ Get centroids from geographical coordinates. @@ -3307,6 +3186,24 @@ class Nes(object): self, dst_grid, weight_matrix_path=weight_matrix_path, kind=kind, n_neighbours=n_neighbours, info=info, to_providentia=to_providentia, only_create_wm=only_create_wm, wm=wm) + def spatial_join(self, ext_shp, method=None, var_list=None, info=False): + """ + Compute overlay intersection of two GeoPandasDataFrames. + + Parameters + ---------- + ext_shp : GeoPandasDataFrame or str + File or path from where the data will be obtained on the intersection. + method : str + Overlay method. Accepted values: ['nearest', 'intersection', 'centroid']. + var_list : List or None + Variables that will be included in the resulting shapefile. + info : bool + Indicates if you want to print the process info or no + """ + + return spatial_join(self, ext_shp=ext_shp, method=method, var_list=var_list, info=info) + def calculate_grid_area(self): """ Get coordinate bounds and call function to calculate the area of each cell of a grid. diff --git a/nes/nc_projections/lcc_nes.py b/nes/nc_projections/lcc_nes.py index 585acbe..a049b50 100644 --- a/nes/nc_projections/lcc_nes.py +++ b/nes/nc_projections/lcc_nes.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import warnings +import sys import numpy as np import pandas as pd from cfunits import Units @@ -168,6 +169,7 @@ class LCCNes(Nes): else: msg = 'There is no variable called Lambert_conformal, projection has not been defined.' warnings.warn(msg) + sys.stderr.flush() projection_data = None return projection_data diff --git a/nes/nc_projections/mercator_nes.py b/nes/nc_projections/mercator_nes.py index e6ccf18..d100e6f 100644 --- a/nes/nc_projections/mercator_nes.py +++ b/nes/nc_projections/mercator_nes.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import warnings +import sys import numpy as np import pandas as pd from cfunits import Units @@ -164,6 +165,7 @@ class MercatorNes(Nes): else: msg = 'There is no variable called mercator, projection has not been defined.' warnings.warn(msg) + sys.stderr.flush() projection_data = None return projection_data diff --git a/nes/nc_projections/points_nes.py b/nes/nc_projections/points_nes.py index 3d55257..708f2b1 100644 --- a/nes/nc_projections/points_nes.py +++ b/nes/nc_projections/points_nes.py @@ -343,6 +343,7 @@ class PointsNes(Nes): msg += "Input dtype={0}. Data dtype={1}.".format(var_dtype, var_dict['data'].dtype) warnings.warn(msg) + sys.stderr.flush() try: var_dict['data'] = var_dict['data'].astype(var_dtype) except Exception as e: # TODO: Detect exception @@ -486,6 +487,7 @@ class PointsNes(Nes): msg = "WARNING!!! " msg += "Variable {0} was not loaded. It will not be written.".format(var_name) warnings.warn(msg) + sys.stderr.flush() return None diff --git a/nes/nc_projections/points_nes_ghost.py b/nes/nc_projections/points_nes_ghost.py index 978f4d5..79dd468 100644 --- a/nes/nc_projections/points_nes_ghost.py +++ b/nes/nc_projections/points_nes_ghost.py @@ -478,6 +478,7 @@ class PointsNesGHOST(PointsNes): msg = 'WARNING!!! ' msg += 'Variable {0} was not loaded. It will not be written.'.format(var_name) warnings.warn(msg) + sys.stderr.flush() return None @@ -603,6 +604,7 @@ class PointsNesGHOST(PointsNes): msg += 'GHOST datasets cannot be written in parallel yet. ' msg += 'Changing to serial mode.' warnings.warn(msg) + sys.stderr.flush() super(PointsNesGHOST, self).to_netcdf(path, compression_level=compression_level, serial=True, info=info, chunking=chunking) diff --git a/nes/nc_projections/points_nes_providentia.py b/nes/nc_projections/points_nes_providentia.py index 91229da..81a771b 100644 --- a/nes/nc_projections/points_nes_providentia.py +++ b/nes/nc_projections/points_nes_providentia.py @@ -514,6 +514,7 @@ class PointsNesProvidentia(PointsNes): msg = 'WARNING!!! ' msg += 'Variable {0} was not loaded. It will not be written.'.format(var_name) warnings.warn(msg) + sys.stderr.flush() return None @@ -605,6 +606,7 @@ class PointsNesProvidentia(PointsNes): msg += 'Providentia datasets cannot be written in parallel yet. ' msg += 'Changing to serial mode.' warnings.warn(msg) + sys.stderr.flush() super(PointsNesProvidentia, self).to_netcdf(path, compression_level=compression_level, serial=True, info=info, chunking=chunking) diff --git a/nes/nc_projections/rotated_nes.py b/nes/nc_projections/rotated_nes.py index ac66c76..7176da7 100644 --- a/nes/nc_projections/rotated_nes.py +++ b/nes/nc_projections/rotated_nes.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import warnings +import sys import numpy as np import pandas as pd import math @@ -164,6 +165,7 @@ class RotatedNes(Nes): else: msg = 'There is no variable called rotated_pole, projection has not been defined.' warnings.warn(msg) + sys.stderr.flush() projection_data = None return projection_data diff --git a/nes/nes_formats/cams_ra_format.py b/nes/nes_formats/cams_ra_format.py index a286d5d..d5b8a6b 100644 --- a/nes/nes_formats/cams_ra_format.py +++ b/nes/nes_formats/cams_ra_format.py @@ -187,6 +187,7 @@ def create_variables(self, netcdf, i_lev): msg = 'WARNING!!! ' msg += 'Variable {0} was not loaded. It will not be written.'.format(var_name) warnings.warn(msg) + sys.stderr.flush() return None diff --git a/tests/2.1-test_spatial_join.py b/tests/2.1-test_spatial_join.py index ede7851..c377965 100644 --- a/tests/2.1-test_spatial_join.py +++ b/tests/2.1-test_spatial_join.py @@ -12,6 +12,7 @@ rank = comm.Get_rank() size = comm.Get_size() parallel_method = 'Y' +serial_write = False result_path = "Times_test_2.1_spatial_join_{0}_{1:03d}.csv".format(parallel_method, size) result = pd.DataFrame(index=['read', 'calculate', 'write'], @@ -20,11 +21,20 @@ result = pd.DataFrame(index=['read', 'calculate', 'write'], '2.1.5.Existing_file_intersection', '2.1.6.New_file_intersection']) # ===== PATH TO MASK ===== # -shapefile_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/timezones_2021c/timezones_2021c.shp' +# Timezones +# shapefile_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/timezones_2021c/timezones_2021c.shp' +# shapefile_var_list = ['tzid'] +# str_len = 32 +# Country ISO codes +shapefile_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/gadm_country_mask/gadm_country_ISO3166.shp" +shapefile_var_list = ['ISO'] +str_len = 3 # Original path: /gpfs/scratch/bsc32/bsc32538/original_files/franco_interp.nc # Regular lat-lon grid from MONARCH original_path = '/gpfs/projects/bsc32/models/NES_tutorial_data/franco_interp.nc' +# CAMS_Global +# original_path = "/gpfs/projects/bsc32/models/NES_tutorial_data/nox_no_201505.nc" # ====================================================================================================================== # =================================== CENTROID EXISTING FILE =================================================== @@ -37,73 +47,27 @@ if rank == 0: # READ st_time = timeit.default_timer() nessy = open_netcdf(original_path, parallel_method=parallel_method) +nessy.variables = {} +nessy.create_shapefile() comm.Barrier() result.loc['read', test_name] = timeit.default_timer() - st_time # SPATIAL JOIN # Method can be centroid, nearest and intersection st_time = timeit.default_timer() -nessy.spatial_join(shapefile_path, method='centroid') +nessy.spatial_join(shapefile_path, method='centroid', var_list=shapefile_var_list, info=True) comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time -print('SPATIAL JOIN - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) - -# ADD TIMEZONES -timezones = nessy.shapefile['tzid'].values.reshape([nessy.lat['data'].shape[0], - nessy.lon['data'].shape[-1]]) -nessy.variables['tz'] = {'data': timezones,} - -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -# REOPEN -nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) -nessy.load() - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() - -# ====================================================================================================================== -# =================================== CENTROID FROM NEW FILE =================================================== -# ====================================================================================================================== - -test_name = '2.1.2.New_file_centroid' -if rank == 0: - print(test_name) +result.loc['calcul', test_name] = timeit.default_timer() - st_time -# DEFINE PROJECTION st_time = timeit.default_timer() -projection = 'regular' -lat_orig = 41.1 -lon_orig = 1.8 -inc_lat = 0.2 -inc_lon = 0.2 -n_lat = 100 -n_lon = 100 -# SPATIAL JOIN -# Method can be centroid, nearest and intersection -nessy = from_shapefile(shapefile_path, method='centroid', projection=projection, - lat_orig=lat_orig, lon_orig=lon_orig, - inc_lat=inc_lat, inc_lon=inc_lon, - n_lat=n_lat, n_lon=n_lon) +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time -print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) - -# ADD TIMEZONES -timezones = nessy.shapefile['tzid'].values.reshape([nessy.lat['data'].shape[0], - nessy.lon['data'].shape[-1]]) -nessy.variables['tz'] = {'data': timezones,} - -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write) comm.Barrier() result.loc['write', test_name] = timeit.default_timer() - st_time @@ -116,6 +80,55 @@ if rank == 0: print(result.loc[:, test_name]) sys.stdout.flush() +# # ====================================================================================================================== +# # =================================== CENTROID FROM NEW FILE =================================================== +# # ====================================================================================================================== +# +# test_name = '2.1.2.New_file_centroid' +# if rank == 0: +# print(test_name) +# +# # DEFINE PROJECTION +# st_time = timeit.default_timer() +# projection = 'regular' +# lat_orig = 41.1 +# lon_orig = 1.8 +# inc_lat = 0.2 +# inc_lon = 0.2 +# n_lat = 100 +# n_lon = 100 +# +# # SPATIAL JOIN +# # Method can be centroid, nearest and intersection +# nessy = from_shapefile(shapefile_path, method='centroid', projection=projection, +# lat_orig=lat_orig, lon_orig=lon_orig, +# inc_lat=inc_lat, inc_lon=inc_lon, +# n_lat=n_lat, n_lon=n_lon) +# comm.Barrier() +# result.loc['calculate', test_name] = timeit.default_timer() - st_time +# print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) +# +# # ADD Var +# for var_name in shapefile_var_list: +# data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) +# nessy.variables[var_name] = {'data': data, 'dtype': str} +# nessy.set_strlen(str_len) +# +# # WRITE +# st_time = timeit.default_timer() +# nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) +# comm.Barrier() +# result.loc['write', test_name] = timeit.default_timer() - st_time +# +# # REOPEN +# nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) +# nessy.load() +# +# comm.Barrier() +# if rank == 0: +# print(result.loc[:, test_name]) +# sys.stdout.flush() + # ====================================================================================================================== # =================================== NEAREST EXISTING FILE =================================================== # ====================================================================================================================== @@ -127,25 +140,28 @@ if rank == 0: # READ st_time = timeit.default_timer() nessy = open_netcdf(original_path, parallel_method=parallel_method) +nessy.variables = {} +nessy.create_shapefile() comm.Barrier() result.loc['read', test_name] = timeit.default_timer() - st_time # SPATIAL JOIN # Method can be centroid, nearest and intersection st_time = timeit.default_timer() -nessy.spatial_join(shapefile_path, method='nearest') +nessy.spatial_join(shapefile_path, method='nearest', var_list=shapefile_var_list, info=True) comm.Barrier() result.loc['calculate', test_name] = timeit.default_timer() - st_time print('SPATIAL JOIN - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) -# ADD TIMEZONES -timezones = nessy.shapefile['tzid'].values.reshape([nessy.lat['data'].shape[0], - nessy.lon['data'].shape[-1]]) -nessy.variables['tz'] = {'data': timezones,} +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) # WRITE st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) comm.Barrier() result.loc['write', test_name] = timeit.default_timer() - st_time @@ -158,53 +174,54 @@ if rank == 0: print(result.loc[:, test_name]) sys.stdout.flush() -# ====================================================================================================================== -# =================================== NEAREST FROM NEW FILE =================================================== -# ====================================================================================================================== - -test_name = '2.1.4.New_file_nearest' -if rank == 0: - print(test_name) - -# DEFINE PROJECTION -st_time = timeit.default_timer() -projection = 'regular' -lat_orig = 41.1 -lon_orig = 1.8 -inc_lat = 0.2 -inc_lon = 0.2 -n_lat = 100 -n_lon = 100 - -# SPATIAL JOIN -# Method can be centroid, nearest and intersection -nessy = from_shapefile(shapefile_path, method='nearest', projection=projection, - lat_orig=lat_orig, lon_orig=lon_orig, - inc_lat=inc_lat, inc_lon=inc_lon, - n_lat=n_lat, n_lon=n_lon) -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time -print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) - -# ADD TIMEZONES -timezones = nessy.shapefile['tzid'].values.reshape([nessy.lat['data'].shape[0], - nessy.lon['data'].shape[-1]]) -nessy.variables['tz'] = {'data': timezones,} - -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -# REOPEN -nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) -nessy.load() - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() +# # ====================================================================================================================== +# # =================================== NEAREST FROM NEW FILE =================================================== +# # ====================================================================================================================== +# +# test_name = '2.1.4.New_file_nearest' +# if rank == 0: +# print(test_name) +# +# # DEFINE PROJECTION +# st_time = timeit.default_timer() +# projection = 'regular' +# lat_orig = 41.1 +# lon_orig = 1.8 +# inc_lat = 0.2 +# inc_lon = 0.2 +# n_lat = 100 +# n_lon = 100 +# +# # SPATIAL JOIN +# # Method can be centroid, nearest and intersection +# nessy = from_shapefile(shapefile_path, method='nearest', projection=projection, +# lat_orig=lat_orig, lon_orig=lon_orig, +# inc_lat=inc_lat, inc_lon=inc_lon, +# n_lat=n_lat, n_lon=n_lon) +# comm.Barrier() +# result.loc['calculate', test_name] = timeit.default_timer() - st_time +# print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) +# +# # ADD Var +# for var_name in shapefile_var_list: +# data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) +# nessy.variables[var_name] = {'data': data, 'dtype': str} +# nessy.set_strlen(str_len) +# +# # WRITE +# st_time = timeit.default_timer() +# nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) +# comm.Barrier() +# result.loc['write', test_name] = timeit.default_timer() - st_time +# +# # REOPEN +# nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) +# nessy.load() +# +# comm.Barrier() +# if rank == 0: +# print(result.loc[:, test_name]) +# sys.stdout.flush() # ====================================================================================================================== @@ -214,29 +231,33 @@ sys.stdout.flush() test_name = '2.1.5.Existing_file_intersection' if rank == 0: print(test_name) - + # READ st_time = timeit.default_timer() nessy = open_netcdf(original_path, parallel_method=parallel_method) +nessy.variables = {} +nessy.create_shapefile() comm.Barrier() result.loc['read', test_name] = timeit.default_timer() - st_time # SPATIAL JOIN # Method can be centroid, nearest and intersection st_time = timeit.default_timer() -nessy.spatial_join(shapefile_path, method='intersection') +nessy.spatial_join(shapefile_path, method='intersection', var_list=shapefile_var_list, info=True) comm.Barrier() result.loc['calculate', test_name] = timeit.default_timer() - st_time print('SPATIAL JOIN - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) -# ADD TIMEZONES -timezones = nessy.shapefile['tzid'].values.reshape([nessy.lat['data'].shape[0], - nessy.lon['data'].shape[-1]]) -nessy.variables['tz'] = {'data': timezones,} +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) # WRITE st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) +nessy.set_strlen(strlen=str_len) +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) comm.Barrier() result.loc['write', test_name] = timeit.default_timer() - st_time @@ -249,54 +270,53 @@ if rank == 0: print(result.loc[:, test_name]) sys.stdout.flush() - # ====================================================================================================================== # =================================== INTERSECTION FROM NEW FILE =================================================== # ====================================================================================================================== -test_name = '2.1.6.New_file_intersection' -if rank == 0: - print(test_name) - -# DEFINE PROJECTION -st_time = timeit.default_timer() -projection = 'regular' -lat_orig = 41.1 -lon_orig = 1.8 -inc_lat = 0.2 -inc_lon = 0.2 -n_lat = 100 -n_lon = 100 - -# SPATIAL JOIN -# Method can be centroid, nearest and intersection -nessy = from_shapefile(shapefile_path, method='intersection', projection=projection, - lat_orig=lat_orig, lon_orig=lon_orig, - inc_lat=inc_lat, inc_lon=inc_lon, - n_lat=n_lat, n_lon=n_lon) -comm.Barrier() -result.loc['calculate', test_name] = timeit.default_timer() - st_time -print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) - -# ADD TIMEZONES -timezones = nessy.shapefile['tzid'].values.reshape([nessy.lat['data'].shape[0], - nessy.lon['data'].shape[-1]]) -nessy.variables['tz'] = {'data': timezones,} - -# WRITE -st_time = timeit.default_timer() -nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), info=True) -comm.Barrier() -result.loc['write', test_name] = timeit.default_timer() - st_time - -# REOPEN -nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) -nessy.load() - -comm.Barrier() -if rank == 0: - print(result.loc[:, test_name]) -sys.stdout.flush() +# test_name = '2.1.6.New_file_intersection' +# if rank == 0: +# print(test_name) +# +# # DEFINE PROJECTION +# st_time = timeit.default_timer() +# projection = 'regular' +# lat_orig = 41.1 +# lon_orig = 1.8 +# inc_lat = 0.2 +# inc_lon = 0.2 +# n_lat = 100 +# n_lon = 100 +# +# # SPATIAL JOIN +# # Method can be centroid, nearest and intersection +# nessy = from_shapefile(shapefile_path, method='intersection', projection=projection, +# lat_orig=lat_orig, lon_orig=lon_orig, +# inc_lat=inc_lat, inc_lon=inc_lon, +# n_lat=n_lat, n_lon=n_lon) +# comm.Barrier() +# result.loc['calculate', test_name] = timeit.default_timer() - st_time +# print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) +# +# # ADD Var +# for var_name in shapefile_var_list: +# data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) +# nessy.variables[var_name] = {'data': data, 'dtype': str} +# nessy.set_strlen(str_len) +# # WRITE +# st_time = timeit.default_timer() +# nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) +# comm.Barrier() +# result.loc['write', test_name] = timeit.default_timer() - st_time +# +# # REOPEN +# nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) +# nessy.load() +# +# comm.Barrier() +# if rank == 0: +# print(result.loc[:, test_name]) +# sys.stdout.flush() if rank == 0: result.to_csv(result_path) -- GitLab From a23ab9c0d3537e9f3ff1e468385ae634262271b7 Mon Sep 17 00:00:00 2001 From: ctena Date: Fri, 24 Mar 2023 12:43:10 +0100 Subject: [PATCH 2/7] - spatial_join added rewrite to spatial join test --- tests/2.1-test_spatial_join.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/2.1-test_spatial_join.py b/tests/2.1-test_spatial_join.py index c377965..283cfb8 100644 --- a/tests/2.1-test_spatial_join.py +++ b/tests/2.1-test_spatial_join.py @@ -74,6 +74,7 @@ result.loc['write', test_name] = timeit.default_timer() - st_time # REOPEN nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) nessy.load() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}_2.nc".format(size), serial=serial_write) comm.Barrier() if rank == 0: -- GitLab From 41fc08890b4fadfb4edc3fe6a3d1fe265b62de01 Mon Sep 17 00:00:00 2001 From: Alba Vilanova Cortezon Date: Mon, 27 Mar 2023 16:17:28 +0200 Subject: [PATCH 3/7] Fix issue on 5D data with strlen and update shapefile functions --- nes/nc_projections/default_nes.py | 178 ++++++++++------ nes/nc_projections/lcc_nes.py | 62 +++--- nes/nc_projections/mercator_nes.py | 61 +++--- nes/nc_projections/points_nes.py | 11 +- nes/nc_projections/points_nes_ghost.py | 2 +- nes/nc_projections/rotated_nes.py | 57 ++--- tests/2.1-test_spatial_join.py | 284 +++++++++++++------------ 7 files changed, 370 insertions(+), 285 deletions(-) diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 8bd50e5..d479a19 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -1887,15 +1887,33 @@ class Nes(object): self.read_axis_limits['z_min']:self.read_axis_limits['z_max'], self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] - # elif len(var_dims) == 5: - # data = nc_var[self.read_axis_limits['t_min']:self.read_axis_limits['t_max'], - # :, - # self.read_axis_limits['z_min']:self.read_axis_limits['z_max'], - # self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], - # self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] + elif len(var_dims) == 5: + if 'strlen' in var_dims: + data = nc_var[self.read_axis_limits['t_min']:self.read_axis_limits['t_max'], + self.read_axis_limits['z_min']:self.read_axis_limits['z_max'], + self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max'], + :] + data_aux = np.empty(shape=(data.shape[0], data.shape[1], data.shape[2], data.shape[3]), dtype=np.object) + for time_n in range(data.shape[0]): + for lev_n in range(data.shape[1]): + for lat_n in range(data.shape[2]): + for lon_n in range(data.shape[3]): + data_aux[time_n, lev_n, lat_n, lon_n] = ''.join( + data[time_n, lev_n, lat_n, lon_n].tostring().decode('ascii').replace('\x00', '')) + data = data_aux + else: + # data = nc_var[self.read_axis_limits['t_min']:self.read_axis_limits['t_max'], + # :, + # self.read_axis_limits['z_min']:self.read_axis_limits['z_max'], + # self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + # self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] + raise NotImplementedError('Error with {0}. Only can be read netCDF with 4 dimensions or less'.format( + var_name)) else: raise NotImplementedError('Error with {0}. Only can be read netCDF with 4 dimensions or less'.format( var_name)) + # # Missing to nan # try: # data[data.shapefile == True] = np.nan @@ -2284,6 +2302,7 @@ class Nes(object): """ for i, (var_name, var_dict) in enumerate(self.variables.items()): + if isinstance(var_dict['data'], int) and var_dict['data'] == 0: var_dims = ('time', 'lev',) + self._var_dim var_dtype = np.float32 @@ -2330,15 +2349,33 @@ class Nes(object): var_dims += ('strlen',) # Split strings into chars (S1) - data_aux = np.chararray(shape=(var_dict['data'].shape[0], - var_dict['data'].shape[1], - self.strlen)) - for lat_n in range(var_dict['data'].shape[0]): - for lon_n in range(var_dict['data'].shape[1]): - chars = [v for v in var_dict['data'][lat_n][lon_n]] - data = chars + ['']*(self.strlen-len(chars)) - for char_n, char in enumerate(data): - data_aux[lat_n, lon_n, char_n] = char.encode('ascii', 'ignore') + if len(var_dict['data'].shape) == 2: + data_aux = np.chararray(shape=(var_dict['data'].shape[0], + var_dict['data'].shape[1], + self.strlen)) + for lat_n in range(var_dict['data'].shape[0]): + for lon_n in range(var_dict['data'].shape[1]): + chars = [v for v in var_dict['data'][lat_n][lon_n]] + data = chars + ['']*(self.strlen-len(chars)) + for char_n, char in enumerate(data): + data_aux[lat_n, lon_n, char_n] = char.encode('ascii', 'ignore') + + elif len(var_dict['data'].shape) == 4: + data_aux = np.chararray(shape=(var_dict['data'].shape[0], + var_dict['data'].shape[1], + var_dict['data'].shape[2], + var_dict['data'].shape[3], + self.strlen)) + for time_n in range(var_dict['data'].shape[0]): + for lev_n in range(var_dict['data'].shape[1]): + for lat_n in range(var_dict['data'].shape[2]): + for lon_n in range(var_dict['data'].shape[3]): + chars = [v for v in var_dict['data'][time_n][lev_n][lat_n][lon_n]] + data = chars + ['']*(self.strlen-len(chars)) + for char_n, char in enumerate(data): + data_aux[time_n, lev_n, lat_n, lon_n, char_n] = char.encode('ascii', + 'ignore') + var_dict['data'] = data_aux var_dtype = 'S1' @@ -2377,6 +2414,7 @@ class Nes(object): for att_name, att_value in var_dict.items(): if att_name == 'data': + if att_value is not None: if self.info: print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) @@ -2385,6 +2423,17 @@ class Nes(object): self.write_axis_limits['z_min']:self.write_axis_limits['z_max'], self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], self.write_axis_limits['x_min']:self.write_axis_limits['x_max']] = 0 + + elif len(att_value.shape) == 5: + if 'strlen' in var_dims: + var[self.write_axis_limits['t_min']:self.write_axis_limits['t_max'], + self.write_axis_limits['z_min']:self.write_axis_limits['z_max'], + self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], + self.write_axis_limits['x_min']:self.write_axis_limits['x_max'], + :] = att_value + else: + raise NotImplementedError('It is not possible to write 5D variables.') + elif len(att_value.shape) == 4: var[self.write_axis_limits['t_min']:self.write_axis_limits['t_max'], self.write_axis_limits['z_min']:self.write_axis_limits['z_max'], @@ -2398,6 +2447,7 @@ class Nes(object): :] = att_value else: raise NotImplementedError('It is not possible to write 3D variables.') + if self.info: print("Rank {0:03d}: Var {1} data ({2}/{3})".format( self.rank, var_name, i + 1, len(self.variables))) @@ -2766,50 +2816,55 @@ class Nes(object): shapefile : GeoPandasDataFrame Shapefile dataframe. """ + + if self.shapefile is None: - if self._lat_bnds is None or self._lon_bnds is None: - self.create_spatial_bounds() - - # Reshape arrays to create geometry - aux_shape = (self.lat_bnds['data'].shape[0], self.lon_bnds['data'].shape[0], 4) - lon_bnds_aux = np.empty(aux_shape) - lon_bnds_aux[:, :, 0] = self.lon_bnds['data'][np.newaxis, :, 0] - lon_bnds_aux[:, :, 1] = self.lon_bnds['data'][np.newaxis, :, 1] - lon_bnds_aux[:, :, 2] = self.lon_bnds['data'][np.newaxis, :, 1] - lon_bnds_aux[:, :, 3] = self.lon_bnds['data'][np.newaxis, :, 0] - - lon_bnds = lon_bnds_aux - del lon_bnds_aux - - lat_bnds_aux = np.empty(aux_shape) - lat_bnds_aux[:, :, 0] = self.lat_bnds['data'][:, np.newaxis, 0] - lat_bnds_aux[:, :, 1] = self.lat_bnds['data'][:, np.newaxis, 0] - lat_bnds_aux[:, :, 2] = self.lat_bnds['data'][:, np.newaxis, 1] - lat_bnds_aux[:, :, 3] = self.lat_bnds['data'][:, np.newaxis, 1] - - lat_bnds = lat_bnds_aux - del lat_bnds_aux - - aux_b_lats = lat_bnds.reshape((lat_bnds.shape[0] * lat_bnds.shape[1], lat_bnds.shape[2])) - aux_b_lons = lon_bnds.reshape((lon_bnds.shape[0] * lon_bnds.shape[1], lon_bnds.shape[2])) - - # Create dataframe cointaining all polygons - geometry = [] - for i in range(aux_b_lons.shape[0]): - geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), - (aux_b_lons[i, 1], aux_b_lats[i, 1]), - (aux_b_lons[i, 2], aux_b_lats[i, 2]), - (aux_b_lons[i, 3], aux_b_lats[i, 3]), - (aux_b_lons[i, 0], aux_b_lats[i, 0])])) - fids = np.arange(len(self._lat['data']) * len(self._lon['data'])) - fids = fids.reshape((len(self._lat['data']), len(self._lon['data']))) - fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], - self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] - gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), - geometry=geometry, - crs="EPSG:4326") - self.shapefile = gdf - + if self._lat_bnds is None or self._lon_bnds is None: + self.create_spatial_bounds() + + # Reshape arrays to create geometry + aux_shape = (self.lat_bnds['data'].shape[0], self.lon_bnds['data'].shape[0], 4) + lon_bnds_aux = np.empty(aux_shape) + lon_bnds_aux[:, :, 0] = self.lon_bnds['data'][np.newaxis, :, 0] + lon_bnds_aux[:, :, 1] = self.lon_bnds['data'][np.newaxis, :, 1] + lon_bnds_aux[:, :, 2] = self.lon_bnds['data'][np.newaxis, :, 1] + lon_bnds_aux[:, :, 3] = self.lon_bnds['data'][np.newaxis, :, 0] + + lon_bnds = lon_bnds_aux + del lon_bnds_aux + + lat_bnds_aux = np.empty(aux_shape) + lat_bnds_aux[:, :, 0] = self.lat_bnds['data'][:, np.newaxis, 0] + lat_bnds_aux[:, :, 1] = self.lat_bnds['data'][:, np.newaxis, 0] + lat_bnds_aux[:, :, 2] = self.lat_bnds['data'][:, np.newaxis, 1] + lat_bnds_aux[:, :, 3] = self.lat_bnds['data'][:, np.newaxis, 1] + + lat_bnds = lat_bnds_aux + del lat_bnds_aux + + aux_b_lats = lat_bnds.reshape((lat_bnds.shape[0] * lat_bnds.shape[1], lat_bnds.shape[2])) + aux_b_lons = lon_bnds.reshape((lon_bnds.shape[0] * lon_bnds.shape[1], lon_bnds.shape[2])) + + # Create dataframe cointaining all polygons + geometry = [] + for i in range(aux_b_lons.shape[0]): + geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), + (aux_b_lons[i, 1], aux_b_lats[i, 1]), + (aux_b_lons[i, 2], aux_b_lats[i, 2]), + (aux_b_lons[i, 3], aux_b_lats[i, 3]), + (aux_b_lons[i, 0], aux_b_lats[i, 0])])) + fids = np.arange(len(self._lat['data']) * len(self._lon['data'])) + fids = fids.reshape((len(self._lat['data']), len(self._lon['data']))) + fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] + gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), + geometry=geometry, + crs="EPSG:4326") + self.shapefile = gdf + + else: + gdf = self.shapefile + return gdf def write_shapefile(self, path): @@ -3224,8 +3279,11 @@ class Nes(object): Source projection Nes Object. """ - grid_area = cell_measures.calculate_grid_area(self) - + if 'cell_area' not in self.cell_measures.keys(): + grid_area = cell_measures.calculate_grid_area(self) + else: + grid_area = self.cell_measures['cell_area']['data'] + return grid_area @staticmethod diff --git a/nes/nc_projections/lcc_nes.py b/nes/nc_projections/lcc_nes.py index a049b50..8c9c436 100644 --- a/nes/nc_projections/lcc_nes.py +++ b/nes/nc_projections/lcc_nes.py @@ -467,35 +467,40 @@ class LCCNes(Nes): Shapefile dataframe. """ - # Get latitude and longitude cell boundaries - if self._lat_bnds is None or self._lon_bnds is None: - self.create_spatial_bounds() - - # Reshape arrays to create geometry - aux_b_lats = self.lat_bnds['data'].reshape((self.lat_bnds['data'].shape[0] * self.lat_bnds['data'].shape[1], - self.lat_bnds['data'].shape[2])) - aux_b_lons = self.lon_bnds['data'].reshape((self.lon_bnds['data'].shape[0] * self.lon_bnds['data'].shape[1], - self.lon_bnds['data'].shape[2])) - - # Get polygons from bounds - geometry = [] - for i in range(aux_b_lons.shape[0]): - geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), - (aux_b_lons[i, 1], aux_b_lats[i, 1]), - (aux_b_lons[i, 2], aux_b_lats[i, 2]), - (aux_b_lons[i, 3], aux_b_lats[i, 3]), - (aux_b_lons[i, 0], aux_b_lats[i, 0])])) - - # Create dataframe cointaining all polygons - fids = np.arange(self._lat['data'].shape[0] * self._lat['data'].shape[1]) - fids = fids.reshape((self._lat['data'].shape[0], self._lat['data'].shape[1])) - fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], - self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] - gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), - geometry=geometry, - crs="EPSG:4326") - self.shapefile = gdf + if self.shapefile is None: + + # Get latitude and longitude cell boundaries + if self._lat_bnds is None or self._lon_bnds is None: + self.create_spatial_bounds() + + # Reshape arrays to create geometry + aux_b_lats = self.lat_bnds['data'].reshape((self.lat_bnds['data'].shape[0] * self.lat_bnds['data'].shape[1], + self.lat_bnds['data'].shape[2])) + aux_b_lons = self.lon_bnds['data'].reshape((self.lon_bnds['data'].shape[0] * self.lon_bnds['data'].shape[1], + self.lon_bnds['data'].shape[2])) + + # Get polygons from bounds + geometry = [] + for i in range(aux_b_lons.shape[0]): + geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), + (aux_b_lons[i, 1], aux_b_lats[i, 1]), + (aux_b_lons[i, 2], aux_b_lats[i, 2]), + (aux_b_lons[i, 3], aux_b_lats[i, 3]), + (aux_b_lons[i, 0], aux_b_lats[i, 0])])) + + # Create dataframe cointaining all polygons + fids = np.arange(self._lat['data'].shape[0] * self._lat['data'].shape[1]) + fids = fids.reshape((self._lat['data'].shape[0], self._lat['data'].shape[1])) + fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] + gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), + geometry=geometry, + crs="EPSG:4326") + self.shapefile = gdf + else: + gdf = self.shapefile + return gdf def get_centroids_from_coordinates(self): @@ -530,6 +535,7 @@ class LCCNes(Nes): """ Get coordinate bounds and call function to calculate the area of each cell of a grid. """ + grid_area = super(LCCNes, self).calculate_grid_area() self.cell_measures['cell_area'] = {'data': grid_area.reshape([self.lon_bnds['data'].shape[0], self.lon_bnds['data'].shape[1]])} diff --git a/nes/nc_projections/mercator_nes.py b/nes/nc_projections/mercator_nes.py index d100e6f..af1433b 100644 --- a/nes/nc_projections/mercator_nes.py +++ b/nes/nc_projections/mercator_nes.py @@ -443,35 +443,40 @@ class MercatorNes(Nes): Shapefile dataframe. """ - # Get latitude and longitude cell boundaries - if self._lat_bnds is None or self._lon_bnds is None: - self.create_spatial_bounds() - - # Reshape arrays to create geometry - aux_b_lats = self.lat_bnds['data'].reshape((self.lat_bnds['data'].shape[0] * self.lat_bnds['data'].shape[1], - self.lat_bnds['data'].shape[2])) - aux_b_lons = self.lon_bnds['data'].reshape((self.lon_bnds['data'].shape[0] * self.lon_bnds['data'].shape[1], - self.lon_bnds['data'].shape[2])) - - # Get polygons from bounds - geometry = [] - for i in range(aux_b_lons.shape[0]): - geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), - (aux_b_lons[i, 1], aux_b_lats[i, 1]), - (aux_b_lons[i, 2], aux_b_lats[i, 2]), - (aux_b_lons[i, 3], aux_b_lats[i, 3]), - (aux_b_lons[i, 0], aux_b_lats[i, 0])])) - - # Create dataframe cointaining all polygons - fids = np.arange(self._lat['data'].shape[0] * self._lat['data'].shape[1]) - fids = fids.reshape((self._lat['data'].shape[0], self._lat['data'].shape[1])) - fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], - self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] - gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), - geometry=geometry, - crs="EPSG:4326") - self.shapefile = gdf + if self.shapefile is None: + + # Get latitude and longitude cell boundaries + if self._lat_bnds is None or self._lon_bnds is None: + self.create_spatial_bounds() + + # Reshape arrays to create geometry + aux_b_lats = self.lat_bnds['data'].reshape((self.lat_bnds['data'].shape[0] * self.lat_bnds['data'].shape[1], + self.lat_bnds['data'].shape[2])) + aux_b_lons = self.lon_bnds['data'].reshape((self.lon_bnds['data'].shape[0] * self.lon_bnds['data'].shape[1], + self.lon_bnds['data'].shape[2])) + + # Get polygons from bounds + geometry = [] + for i in range(aux_b_lons.shape[0]): + geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), + (aux_b_lons[i, 1], aux_b_lats[i, 1]), + (aux_b_lons[i, 2], aux_b_lats[i, 2]), + (aux_b_lons[i, 3], aux_b_lats[i, 3]), + (aux_b_lons[i, 0], aux_b_lats[i, 0])])) + + # Create dataframe cointaining all polygons + fids = np.arange(self._lat['data'].shape[0] * self._lat['data'].shape[1]) + fids = fids.reshape((self._lat['data'].shape[0], self._lat['data'].shape[1])) + fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] + gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), + geometry=geometry, + crs="EPSG:4326") + self.shapefile = gdf + else: + gdf = self.shapefile + return gdf def get_centroids_from_coordinates(self): diff --git a/nes/nc_projections/points_nes.py b/nes/nc_projections/points_nes.py index 708f2b1..ab0ae1e 100644 --- a/nes/nc_projections/points_nes.py +++ b/nes/nc_projections/points_nes.py @@ -669,10 +669,15 @@ class PointsNes(Nes): Shapefile dataframe. """ - # Create dataframe cointaining all points - gdf = self.get_centroids_from_coordinates() - self.shapefile = gdf + if self.shapefile is None: + + # Create dataframe cointaining all points + gdf = self.get_centroids_from_coordinates() + self.shapefile = gdf + else: + gdf = self.shapefile + return gdf def get_centroids_from_coordinates(self): diff --git a/nes/nc_projections/points_nes_ghost.py b/nes/nc_projections/points_nes_ghost.py index 79dd468..1f4f4b4 100644 --- a/nes/nc_projections/points_nes_ghost.py +++ b/nes/nc_projections/points_nes_ghost.py @@ -341,7 +341,7 @@ class PointsNesGHOST(PointsNes): else: var_dtype = var_dict['data'].dtype - # Get dimensions + # Get dimensions if len(var_dict['data'].shape) == 1: # Metadata var_dims = self._var_dim diff --git a/nes/nc_projections/rotated_nes.py b/nes/nc_projections/rotated_nes.py index 7176da7..9d58434 100644 --- a/nes/nc_projections/rotated_nes.py +++ b/nes/nc_projections/rotated_nes.py @@ -494,33 +494,38 @@ class RotatedNes(Nes): Shapefile dataframe. """ - if self._lat_bnds is None or self._lon_bnds is None: - self.create_spatial_bounds() - - # Reshape arrays to create geometry - aux_b_lats = self.lat_bnds['data'].reshape((self.lat_bnds['data'].shape[0] * self.lat_bnds['data'].shape[1], - self.lat_bnds['data'].shape[2])) - aux_b_lons = self.lon_bnds['data'].reshape((self.lon_bnds['data'].shape[0] * self.lon_bnds['data'].shape[1], - self.lon_bnds['data'].shape[2])) + if self.shapefile is None: + + if self._lat_bnds is None or self._lon_bnds is None: + self.create_spatial_bounds() + + # Reshape arrays to create geometry + aux_b_lats = self.lat_bnds['data'].reshape((self.lat_bnds['data'].shape[0] * self.lat_bnds['data'].shape[1], + self.lat_bnds['data'].shape[2])) + aux_b_lons = self.lon_bnds['data'].reshape((self.lon_bnds['data'].shape[0] * self.lon_bnds['data'].shape[1], + self.lon_bnds['data'].shape[2])) + + # Get polygons from bounds + geometry = [] + for i in range(aux_b_lons.shape[0]): + geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), + (aux_b_lons[i, 1], aux_b_lats[i, 1]), + (aux_b_lons[i, 2], aux_b_lats[i, 2]), + (aux_b_lons[i, 3], aux_b_lats[i, 3]), + (aux_b_lons[i, 0], aux_b_lats[i, 0])])) - # Get polygons from bounds - geometry = [] - for i in range(aux_b_lons.shape[0]): - geometry.append(Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), - (aux_b_lons[i, 1], aux_b_lats[i, 1]), - (aux_b_lons[i, 2], aux_b_lats[i, 2]), - (aux_b_lons[i, 3], aux_b_lats[i, 3]), - (aux_b_lons[i, 0], aux_b_lats[i, 0])])) - - # Create dataframe cointaining all polygons - fids = np.arange(self._lat['data'].shape[0] * self._lat['data'].shape[1]) - fids = fids.reshape((self._lat['data'].shape[0], self._lat['data'].shape[1])) - fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], - self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] - gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), - geometry=geometry, - crs="EPSG:4326") - self.shapefile = gdf + # Create dataframe cointaining all polygons + fids = np.arange(self._lat['data'].shape[0] * self._lat['data'].shape[1]) + fids = fids.reshape((self._lat['data'].shape[0], self._lat['data'].shape[1])) + fids = fids[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] + gdf = gpd.GeoDataFrame(index=pd.Index(name='FID', data=fids.ravel()), + geometry=geometry, + crs="EPSG:4326") + self.shapefile = gdf + + else: + gdf = self.shapefile return gdf diff --git a/tests/2.1-test_spatial_join.py b/tests/2.1-test_spatial_join.py index 283cfb8..dfac69b 100644 --- a/tests/2.1-test_spatial_join.py +++ b/tests/2.1-test_spatial_join.py @@ -74,61 +74,67 @@ result.loc['write', test_name] = timeit.default_timer() - st_time # REOPEN nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) nessy.load() + +# REWRITE nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}_2.nc".format(size), serial=serial_write) +# REOPEN +nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}_2.nc".format(size), parallel_method=parallel_method) +nessy.load() + comm.Barrier() if rank == 0: print(result.loc[:, test_name]) sys.stdout.flush() -# # ====================================================================================================================== -# # =================================== CENTROID FROM NEW FILE =================================================== -# # ====================================================================================================================== -# -# test_name = '2.1.2.New_file_centroid' -# if rank == 0: -# print(test_name) -# -# # DEFINE PROJECTION -# st_time = timeit.default_timer() -# projection = 'regular' -# lat_orig = 41.1 -# lon_orig = 1.8 -# inc_lat = 0.2 -# inc_lon = 0.2 -# n_lat = 100 -# n_lon = 100 -# -# # SPATIAL JOIN -# # Method can be centroid, nearest and intersection -# nessy = from_shapefile(shapefile_path, method='centroid', projection=projection, -# lat_orig=lat_orig, lon_orig=lon_orig, -# inc_lat=inc_lat, inc_lon=inc_lon, -# n_lat=n_lat, n_lon=n_lon) -# comm.Barrier() -# result.loc['calculate', test_name] = timeit.default_timer() - st_time -# print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) -# -# # ADD Var -# for var_name in shapefile_var_list: -# data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) -# nessy.variables[var_name] = {'data': data, 'dtype': str} -# nessy.set_strlen(str_len) -# -# # WRITE -# st_time = timeit.default_timer() -# nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) -# comm.Barrier() -# result.loc['write', test_name] = timeit.default_timer() - st_time -# -# # REOPEN -# nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) -# nessy.load() -# -# comm.Barrier() -# if rank == 0: -# print(result.loc[:, test_name]) -# sys.stdout.flush() +# ====================================================================================================================== +# =================================== CENTROID FROM NEW FILE =================================================== +# ====================================================================================================================== + +test_name = '2.1.2.New_file_centroid' +if rank == 0: + print(test_name) + +# DEFINE PROJECTION +st_time = timeit.default_timer() +projection = 'regular' +lat_orig = 41.1 +lon_orig = 1.8 +inc_lat = 0.2 +inc_lon = 0.2 +n_lat = 100 +n_lon = 100 + +# SPATIAL JOIN +# Method can be centroid, nearest and intersection +nessy = from_shapefile(shapefile_path, method='centroid', projection=projection, + lat_orig=lat_orig, lon_orig=lon_orig, + inc_lat=inc_lat, inc_lon=inc_lon, + n_lat=n_lat, n_lon=n_lon) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) +nessy.load() + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() # ====================================================================================================================== # =================================== NEAREST EXISTING FILE =================================================== @@ -175,54 +181,54 @@ if rank == 0: print(result.loc[:, test_name]) sys.stdout.flush() -# # ====================================================================================================================== -# # =================================== NEAREST FROM NEW FILE =================================================== -# # ====================================================================================================================== -# -# test_name = '2.1.4.New_file_nearest' -# if rank == 0: -# print(test_name) -# -# # DEFINE PROJECTION -# st_time = timeit.default_timer() -# projection = 'regular' -# lat_orig = 41.1 -# lon_orig = 1.8 -# inc_lat = 0.2 -# inc_lon = 0.2 -# n_lat = 100 -# n_lon = 100 -# -# # SPATIAL JOIN -# # Method can be centroid, nearest and intersection -# nessy = from_shapefile(shapefile_path, method='nearest', projection=projection, -# lat_orig=lat_orig, lon_orig=lon_orig, -# inc_lat=inc_lat, inc_lon=inc_lon, -# n_lat=n_lat, n_lon=n_lon) -# comm.Barrier() -# result.loc['calculate', test_name] = timeit.default_timer() - st_time -# print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) -# -# # ADD Var -# for var_name in shapefile_var_list: -# data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) -# nessy.variables[var_name] = {'data': data, 'dtype': str} -# nessy.set_strlen(str_len) -# -# # WRITE -# st_time = timeit.default_timer() -# nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) -# comm.Barrier() -# result.loc['write', test_name] = timeit.default_timer() - st_time -# -# # REOPEN -# nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) -# nessy.load() -# -# comm.Barrier() -# if rank == 0: -# print(result.loc[:, test_name]) -# sys.stdout.flush() +# ====================================================================================================================== +# =================================== NEAREST FROM NEW FILE =================================================== +# ====================================================================================================================== + +test_name = '2.1.4.New_file_nearest' +if rank == 0: + print(test_name) + +# DEFINE PROJECTION +st_time = timeit.default_timer() +projection = 'regular' +lat_orig = 41.1 +lon_orig = 1.8 +inc_lat = 0.2 +inc_lon = 0.2 +n_lat = 100 +n_lon = 100 + +# SPATIAL JOIN +# Method can be centroid, nearest and intersection +nessy = from_shapefile(shapefile_path, method='nearest', projection=projection, + lat_orig=lat_orig, lon_orig=lon_orig, + inc_lat=inc_lat, inc_lon=inc_lon, + n_lat=n_lat, n_lon=n_lon) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) + +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) +nessy.load() + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() # ====================================================================================================================== @@ -275,49 +281,49 @@ sys.stdout.flush() # =================================== INTERSECTION FROM NEW FILE =================================================== # ====================================================================================================================== -# test_name = '2.1.6.New_file_intersection' -# if rank == 0: -# print(test_name) -# -# # DEFINE PROJECTION -# st_time = timeit.default_timer() -# projection = 'regular' -# lat_orig = 41.1 -# lon_orig = 1.8 -# inc_lat = 0.2 -# inc_lon = 0.2 -# n_lat = 100 -# n_lon = 100 -# -# # SPATIAL JOIN -# # Method can be centroid, nearest and intersection -# nessy = from_shapefile(shapefile_path, method='intersection', projection=projection, -# lat_orig=lat_orig, lon_orig=lon_orig, -# inc_lat=inc_lat, inc_lon=inc_lon, -# n_lat=n_lat, n_lon=n_lon) -# comm.Barrier() -# result.loc['calculate', test_name] = timeit.default_timer() - st_time -# print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) -# -# # ADD Var -# for var_name in shapefile_var_list: -# data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) -# nessy.variables[var_name] = {'data': data, 'dtype': str} -# nessy.set_strlen(str_len) -# # WRITE -# st_time = timeit.default_timer() -# nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) -# comm.Barrier() -# result.loc['write', test_name] = timeit.default_timer() - st_time -# -# # REOPEN -# nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) -# nessy.load() -# -# comm.Barrier() -# if rank == 0: -# print(result.loc[:, test_name]) -# sys.stdout.flush() +test_name = '2.1.6.New_file_intersection' +if rank == 0: + print(test_name) + +# DEFINE PROJECTION +st_time = timeit.default_timer() +projection = 'regular' +lat_orig = 41.1 +lon_orig = 1.8 +inc_lat = 0.2 +inc_lon = 0.2 +n_lat = 100 +n_lon = 100 + +# SPATIAL JOIN +# Method can be centroid, nearest and intersection +nessy = from_shapefile(shapefile_path, method='intersection', projection=projection, + lat_orig=lat_orig, lon_orig=lon_orig, + inc_lat=inc_lat, inc_lon=inc_lon, + n_lat=n_lat, n_lon=n_lon) +comm.Barrier() +result.loc['calculate', test_name] = timeit.default_timer() - st_time +print('FROM SHAPEFILE - Rank {0:03d} - Shapefile: \n{1}'.format(rank, nessy.shapefile)) + +# ADD Var +for var_name in shapefile_var_list: + data = nessy.shapefile[var_name].values.reshape([nessy.lat['data'].shape[0], nessy.lon['data'].shape[-1]]) + nessy.variables[var_name] = {'data': data, 'dtype': str} +nessy.set_strlen(str_len) +# WRITE +st_time = timeit.default_timer() +nessy.to_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), serial=serial_write, info=True) +comm.Barrier() +result.loc['write', test_name] = timeit.default_timer() - st_time + +# REOPEN +nessy = open_netcdf(test_name.replace(' ', '_') + "_{0:03d}.nc".format(size), parallel_method=parallel_method) +nessy.load() + +comm.Barrier() +if rank == 0: + print(result.loc[:, test_name]) +sys.stdout.flush() if rank == 0: result.to_csv(result_path) -- GitLab From 9e8bca6ecf0f5e8851e4919657893ba41d979b15 Mon Sep 17 00:00:00 2001 From: Alba Vilanova Date: Tue, 28 Mar 2023 16:20:57 +0200 Subject: [PATCH 4/7] Fix mask error --- nes/nc_projections/default_nes.py | 1 - nes/nc_projections/points_nes.py | 8 +++----- nes/nc_projections/points_nes_ghost.py | 8 +++----- nes/nc_projections/points_nes_providentia.py | 8 +++----- 4 files changed, 9 insertions(+), 16 deletions(-) diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 88cb5f0..f50fc31 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -1854,7 +1854,6 @@ class Nes(object): data: np.array Portion of the variable data corresponding to the rank. """ - # from numpy.ma.core import MaskError nc_var = self.netcdf.variables[var_name] var_dims = nc_var.dimensions diff --git a/nes/nc_projections/points_nes.py b/nes/nc_projections/points_nes.py index ab0ae1e..563cb34 100644 --- a/nes/nc_projections/points_nes.py +++ b/nes/nc_projections/points_nes.py @@ -7,7 +7,6 @@ import pandas as pd from copy import deepcopy import geopandas as gpd from netCDF4 import date2num, stringtochar -from numpy.ma.core import MaskError from .default_nes import Nes @@ -311,10 +310,9 @@ class PointsNes(Nes): var_name)) # Missing to nan - try: - data[data.mask == True] = np.nan - except (AttributeError, MaskError, ValueError): - pass + if np.ma.is_masked(data): + # This operation is done because sometimes the missing value is lost during the calculation + data[data.mask] = np.nan return data diff --git a/nes/nc_projections/points_nes_ghost.py b/nes/nc_projections/points_nes_ghost.py index 1f4f4b4..11c47ad 100644 --- a/nes/nc_projections/points_nes_ghost.py +++ b/nes/nc_projections/points_nes_ghost.py @@ -3,7 +3,6 @@ import sys import warnings import numpy as np -from numpy.ma.core import MaskError from netCDF4 import stringtochar, date2num from copy import deepcopy from .points_nes import PointsNes @@ -305,10 +304,9 @@ class PointsNesGHOST(PointsNes): var_name)) # Missing to nan - try: - data[data.mask == True] = np.nan - except (AttributeError, MaskError, ValueError): - pass + if np.ma.is_masked(data): + # This operation is done because sometimes the missing value is lost during the calculation + data[data.mask] = np.nan return data diff --git a/nes/nc_projections/points_nes_providentia.py b/nes/nc_projections/points_nes_providentia.py index 81a771b..8241a81 100644 --- a/nes/nc_projections/points_nes_providentia.py +++ b/nes/nc_projections/points_nes_providentia.py @@ -4,7 +4,6 @@ import sys import warnings import numpy as np from copy import deepcopy -from numpy.ma.core import MaskError from netCDF4 import stringtochar from .points_nes import PointsNes @@ -341,10 +340,9 @@ class PointsNesProvidentia(PointsNes): var_name)) # Missing to nan - try: - data[data.mask == True] = np.nan - except (AttributeError, MaskError, ValueError): - pass + if np.ma.is_masked(data): + # This operation is done because sometimes the missing value is lost during the calculation + data[data.mask] = np.nan return data -- GitLab From 6241d8102881461fca55d55ab0de535f4cc81e42 Mon Sep 17 00:00:00 2001 From: Alba Vilanova Date: Tue, 28 Mar 2023 16:49:48 +0200 Subject: [PATCH 5/7] Create function to get strlen --- nes/create_nes.py | 2 +- nes/nc_projections/default_nes.py | 31 +++++++++++++++++--- nes/nc_projections/points_nes.py | 6 +--- nes/nc_projections/points_nes_providentia.py | 2 +- tests/2.1-test_spatial_join.py | 2 +- 5 files changed, 31 insertions(+), 12 deletions(-) diff --git a/nes/create_nes.py b/nes/create_nes.py index da7d29c..10ee6de 100644 --- a/nes/create_nes.py +++ b/nes/create_nes.py @@ -9,7 +9,7 @@ from .nc_projections import * def create_nes(comm=None, info=False, projection=None, parallel_method='Y', balanced=False, - strlen=75, times=None, avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, + times=None, avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, **kwargs): """ Create a Nes class from scratch. diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index f50fc31..e5a72f9 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -86,7 +86,7 @@ class Nes(object): """ def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, - balanced=False, times=None, strlen=75, **kwargs): + balanced=False, times=None, **kwargs): """ Initialize the Nes class @@ -190,6 +190,10 @@ class Nes(object): # Set NetCDF attributes self.global_attrs = self.__get_global_attributes(create_nes) + # Set string length + # 75 is the standard value used in GHOST data + self.strlen = 75 + else: if dataset is not None: @@ -242,12 +246,12 @@ class Nes(object): # Set NetCDF attributes self.global_attrs = self.__get_global_attributes() + # Get string length + self.strlen = self._get_strlen() + # Writing options self.zip_lvl = 0 - # Get string length - self.strlen = strlen - # Dimensions information self._var_dim = None self._lat_dim = None @@ -307,6 +311,23 @@ class Nes(object): return new + def _get_strlen(self, strlen=75): + """ + Get the strlen + + Parameters + ---------- + strlen : int + Max length of the string + """ + + if 'strlen' in self.netcdf.dimensions: + strlen = self.netcdf.dimensions['strlen'].size + else: + strlen = strlen + + return strlen + def set_strlen(self, strlen=75): """ Set the strlen @@ -316,7 +337,9 @@ class Nes(object): strlen : int Max length of the string """ + self.strlen = strlen + return None def __del__(self): diff --git a/nes/nc_projections/points_nes.py b/nes/nc_projections/points_nes.py index 563cb34..756fe3c 100644 --- a/nes/nc_projections/points_nes.py +++ b/nes/nc_projections/points_nes.py @@ -30,7 +30,7 @@ class PointsNes(Nes): """ def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='X', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, - times=None, strlen=75, **kwargs): + times=None, **kwargs): """ Initialize the PointsNes class. @@ -67,10 +67,6 @@ class PointsNes(Nes): # Dimensions screening self.lat = self._get_coordinate_values(self._lat, 'X') self.lon = self._get_coordinate_values(self._lon, 'X') - self.strlen = strlen - else: - # Dimensions screening - self.strlen = self._get_strlen() # Complete dimensions self._station = {'data': np.arange(len(self._lon['data']))} diff --git a/nes/nc_projections/points_nes_providentia.py b/nes/nc_projections/points_nes_providentia.py index 8241a81..84c6be2 100644 --- a/nes/nc_projections/points_nes_providentia.py +++ b/nes/nc_projections/points_nes_providentia.py @@ -110,7 +110,7 @@ class PointsNesProvidentia(PointsNes): self.grid_edge_lat = self._get_coordinate_values(self._grid_edge_lat, '') # Set strlen to be None (avoid default strlen inherited from points) - self.strlen = None + self.set_strlen(None) @staticmethod def new(comm=None, path=None, info=False, dataset=None, xarray=False, create_nes=False, balanced=False, diff --git a/tests/2.1-test_spatial_join.py b/tests/2.1-test_spatial_join.py index dfac69b..98a482d 100644 --- a/tests/2.1-test_spatial_join.py +++ b/tests/2.1-test_spatial_join.py @@ -57,7 +57,7 @@ result.loc['read', test_name] = timeit.default_timer() - st_time st_time = timeit.default_timer() nessy.spatial_join(shapefile_path, method='centroid', var_list=shapefile_var_list, info=True) comm.Barrier() -result.loc['calcul', test_name] = timeit.default_timer() - st_time +result.loc['calculate', test_name] = timeit.default_timer() - st_time st_time = timeit.default_timer() -- GitLab From 0a42da98310d9fa94fc847c3b95b74acc9720901 Mon Sep 17 00:00:00 2001 From: Alba Vilanova Date: Tue, 28 Mar 2023 17:05:44 +0200 Subject: [PATCH 6/7] Remove cell measures code from proj files --- nes/nc_projections/default_nes.py | 2 ++ nes/nc_projections/latlon_nes.py | 11 ----------- nes/nc_projections/lcc_nes.py | 10 ---------- nes/nc_projections/mercator_nes.py | 11 ----------- nes/nc_projections/rotated_nes.py | 10 ---------- 5 files changed, 2 insertions(+), 42 deletions(-) diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index e5a72f9..2677ec1 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -3302,6 +3302,8 @@ class Nes(object): if 'cell_area' not in self.cell_measures.keys(): grid_area = cell_measures.calculate_grid_area(self) + self.cell_measures['cell_area'] = {'data': grid_area.reshape([self.lon_bnds['data'].shape[0], + self.lon_bnds['data'].shape[1]])} else: grid_area = self.cell_measures['cell_area']['data'] diff --git a/nes/nc_projections/latlon_nes.py b/nes/nc_projections/latlon_nes.py index cfcb11d..b044d98 100644 --- a/nes/nc_projections/latlon_nes.py +++ b/nes/nc_projections/latlon_nes.py @@ -272,14 +272,3 @@ class LatLonNes(Nes): """ return super(LatLonNes, self).to_grib2(path, grib_keys, grib_template_path, lat_flip=lat_flip, info=info) - - def calculate_grid_area(self): - """ - Get coordinate bounds and call function to calculate the area of each cell of a grid. - - """ - grid_area = super().calculate_grid_area() - self.cell_measures['cell_area'] = {'data': grid_area.reshape([self.lon_bnds['data'].shape[0], - self.lon_bnds['data'].shape[1]])} - - return None diff --git a/nes/nc_projections/lcc_nes.py b/nes/nc_projections/lcc_nes.py index 8c9c436..b5a679e 100644 --- a/nes/nc_projections/lcc_nes.py +++ b/nes/nc_projections/lcc_nes.py @@ -531,13 +531,3 @@ class LCCNes(Nes): return centroids_gdf - def calculate_grid_area(self): - """ - Get coordinate bounds and call function to calculate the area of each cell of a grid. - """ - - grid_area = super(LCCNes, self).calculate_grid_area() - self.cell_measures['cell_area'] = {'data': grid_area.reshape([self.lon_bnds['data'].shape[0], - self.lon_bnds['data'].shape[1]])} - - return None diff --git a/nes/nc_projections/mercator_nes.py b/nes/nc_projections/mercator_nes.py index af1433b..b752e8d 100644 --- a/nes/nc_projections/mercator_nes.py +++ b/nes/nc_projections/mercator_nes.py @@ -506,14 +506,3 @@ class MercatorNes(Nes): crs="EPSG:4326") return centroids_gdf - - def calculate_grid_area(self): - """ - Get coordinate bounds and call function to calculate the area of each cell of a grid. - """ - - grid_area = super(MercatorNes, self).calculate_grid_area() - self.cell_measures['cell_area'] = {'data': grid_area.reshape([self.lon_bnds['data'].shape[0], - self.lon_bnds['data'].shape[1]])} - - return None diff --git a/nes/nc_projections/rotated_nes.py b/nes/nc_projections/rotated_nes.py index 9d58434..66dc2b7 100644 --- a/nes/nc_projections/rotated_nes.py +++ b/nes/nc_projections/rotated_nes.py @@ -557,13 +557,3 @@ class RotatedNes(Nes): return centroids_gdf - def calculate_grid_area(self): - """ - Get coordinate bounds and call function to calculate the area of each cell of a grid. - """ - - grid_area = super(RotatedNes, self).calculate_grid_area() - self.cell_measures['cell_area'] = {'data': grid_area.reshape([self.lon_bnds['data'].shape[0], - self.lon_bnds['data'].shape[1]])} - - return None -- GitLab From ec2769d91f38f2164f512887ec0a9a9d16cc55ad Mon Sep 17 00:00:00 2001 From: ctena Date: Tue, 28 Mar 2023 17:55:57 +0200 Subject: [PATCH 7/7] from_shapefile function improved to use less amount of memory --- nes/create_nes.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/nes/create_nes.py b/nes/create_nes.py index 10ee6de..cfb0205 100644 --- a/nes/create_nes.py +++ b/nes/create_nes.py @@ -128,8 +128,7 @@ def from_shapefile(path, method=None, parallel_method='Y', **kwargs): 1. Create NES grid. 2. Create shapefile for grid. - 3. Read shapefile from mask. - 4. Spatial join to add shapefile variables to NES variables. + 3. Spatial join to add shapefile variables to NES variables. Parameters ---------- @@ -150,10 +149,7 @@ def from_shapefile(path, method=None, parallel_method='Y', **kwargs): # Create shapefile for grid nessy.create_shapefile() - # Read shapefile - shapefile = gpd.read_file(path) - # Make spatial join - nessy.spatial_join(shapefile, method=method) + nessy.spatial_join(path, method=method) return nessy -- GitLab