diff --git a/CHANGELOG.md b/CHANGELOG.md index c79027aa14aae72985ec10f62c7124cbe2ca493f..8561210f1d6cebc0da7853955423016879ff46aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ### 1.1.1 * Release date: ??? * Changes and new features: + * Write 2D string data to save variables from shapefiles after doing a spatial join ([#49](https://earth.bsc.es/gitlab/es/NES/-/issues/49)) * Bugs fixing: * Bug on `cell_methods` serial write ([#53](https://earth.bsc.es/gitlab/es/NES/-/issues/53)) * Horizontal Interpolation Conservative: Improvement on memory usage when calculating the weight matrix ([#54](https://earth.bsc.es/gitlab/es/NES/-/issues/54)) diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index da83b2b923929004314b5a81417bb33b94b2d4fb..7b0759831cd17644ac85f83e64e03354738acea3 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -88,7 +88,7 @@ class Nes(object): """ def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, - balanced=False, times=None, **kwargs): + balanced=False, times=None, strlen=75, **kwargs): """ Initialize the Nes class @@ -246,6 +246,9 @@ class Nes(object): # Writing options self.zip_lvl = 0 + # Get string length + self.strlen = strlen + # Dimensions information self._var_dim = None self._lat_dim = None @@ -318,6 +321,7 @@ class Nes(object): del self.lat_bnds del self._lon_bnds del self.lon_bnds + del self.strlen del self.shapefile for cell_measure in self.cell_measures.keys(): if self.cell_measures[cell_measure]['data'] is not None: @@ -1425,6 +1429,7 @@ class Nes(object): """ units = self.__parse_time_unit(time.units) + if not hasattr(time, 'calendar'): calendar = 'standard' else: @@ -1479,7 +1484,6 @@ class Nes(object): if self.master: nc_var = self.netcdf.variables['time'] time_data, units, calendar = self.__parse_time(nc_var) - time = num2date(time_data, units, calendar=calendar) time = [aux.replace(second=0, microsecond=0) for aux in time] else: @@ -1797,10 +1801,21 @@ class Nes(object): self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] data = data.reshape(1, 1, data.shape[-2], data.shape[-1]) elif len(var_dims) == 3: - data = nc_var[self.read_axis_limits['t_min']:self.read_axis_limits['t_max'], - self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], - self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] - data = data.reshape(data.shape[-3], 1, data.shape[-2], data.shape[-1]) + if 'strlen' in var_dims: + data = nc_var[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max'], + :] + data_aux = np.empty(shape=(data.shape[0], data.shape[1]), dtype=np.object) + for lat_n in range(data.shape[0]): + for lon_n in range(data.shape[1]): + data_aux[lat_n, lon_n] = ''.join( + data[lat_n, lon_n].tostring().decode('ascii').replace('\x00', '')) + data = data_aux.reshape(1, 1, data_aux.shape[-2], data_aux.shape[-1]) + else: + data = nc_var[self.read_axis_limits['t_min']:self.read_axis_limits['t_max'], + self.read_axis_limits['y_min']:self.read_axis_limits['y_max'], + self.read_axis_limits['x_min']:self.read_axis_limits['x_max']] + data = data.reshape(data.shape[-3], 1, data.shape[-2], data.shape[-1]) elif len(var_dims) == 4: data = nc_var[self.read_axis_limits['t_min']:self.read_axis_limits['t_max'], self.read_axis_limits['z_min']:self.read_axis_limits['z_max'], @@ -2077,6 +2092,9 @@ class Nes(object): netcdf.createDimension('lon', len(self._lon['data'])) netcdf.createDimension('lat', len(self._lat['data'])) + # Create string length dimension + netcdf.createDimension('strlen', self.strlen) + return None def _create_dimension_variables(self, netcdf): @@ -2197,12 +2215,63 @@ class Nes(object): for i, (var_name, var_dict) in enumerate(self.variables.items()): if var_dict['data'] is not None: + + # Get dimensions + if var_dict['data'].shape == 4: + var_dims = ('time', 'lev',) + self._var_dim + else: + var_dims = self._var_dim + + # Get data type + if 'dtype' in var_dict.keys(): + var_dtype = var_dict['dtype'] + if var_dtype != var_dict['data'].dtype: + msg = "WARNING!!! " + msg += "Different data types for variable {0}. ".format(var_name) + msg += "Input dtype={0}. Data dtype={1}.".format(var_dtype, + var_dict['data'].dtype) + warnings.warn(msg) + try: + var_dict['data'] = var_dict['data'].astype(var_dtype) + except Exception as e: # TODO: Detect exception + raise e("It was not possible to cast the data to the input dtype.") + else: + var_dtype = var_dict['data'].dtype + + # Transform objects into strings + if var_dtype == np.dtype(object): + var_dict['data'] = var_dict['data'].astype(str) + var_dtype = var_dict['data'].dtype + + # Convert list of strings to chars for parallelization + try: + unicode_type = len(max(var_dict['data'].flatten(), key=len)) + if ((var_dict['data'].dtype == np.dtype(' 0, complevel=self.zip_lvl) else: if self.balanced: @@ -2212,7 +2281,7 @@ class Nes(object): else: chunk_size = None chunk_size = self.comm.bcast(chunk_size, root=0) - var = netcdf.createVariable(var_name, var_dict['data'].dtype, ('time', 'lev',) + self._var_dim, + var = netcdf.createVariable(var_name, var_dtype, var_dims, zlib=self.zip_lvl > 0, complevel=self.zip_lvl, chunksizes=chunk_size) if self.info: @@ -2228,31 +2297,47 @@ class Nes(object): if att_name == 'data': if self.info: print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) - try: - var[self.write_axis_limits['t_min']:self.write_axis_limits['t_max'], - self.write_axis_limits['z_min']:self.write_axis_limits['z_max'], - self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], - self.write_axis_limits['x_min']:self.write_axis_limits['x_max']] = att_value - except ValueError: - var[self.write_axis_limits['t_min']:self.write_axis_limits['t_max'], - 0, - self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], - self.write_axis_limits['x_min']:self.write_axis_limits['x_max']] = att_value - # msg = "*WARNING* '{0}' variable is a 3D field. Setting it on first (0) layer.".format( - # var_name) - # warn(msg) - except IndexError: - raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + if len(att_value.shape) == 4: + try: var[self.write_axis_limits['t_min']:self.write_axis_limits['t_max'], self.write_axis_limits['z_min']:self.write_axis_limits['z_max'], self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], - self.write_axis_limits['x_min']:self.write_axis_limits['x_max']].shape, - att_value.shape)) + self.write_axis_limits['x_min']:self.write_axis_limits['x_max']] = att_value + except ValueError: + var[self.write_axis_limits['t_min']:self.write_axis_limits['t_max'], + 0, + self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], + self.write_axis_limits['x_min']:self.write_axis_limits['x_max']] = att_value + # msg = "*WARNING* '{0}' variable is a 3D field. Setting it on first (0) layer.".format( + # var_name) + # warn(msg) + except IndexError: + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits['t_min']:self.write_axis_limits['t_max'], + self.write_axis_limits['z_min']:self.write_axis_limits['z_max'], + self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], + self.write_axis_limits['x_min']:self.write_axis_limits['x_max']].shape, + att_value.shape)) + elif len(att_value.shape) == 3: + if 'strlen' in var_dims: + try: + var[self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], + self.write_axis_limits['x_min']:self.write_axis_limits['x_max'], + :] = att_value + except IndexError: + raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format( + var[self.write_axis_limits['y_min']:self.write_axis_limits['y_max'], + self.write_axis_limits['x_min']:self.write_axis_limits['x_max'], + :].shape, + att_value.shape)) + else: + raise NotImplementedError('It is not possible to write 3D variables.') if self.info: print("Rank {0:03d}: Var {1} data ({2}/{3})".format(self.rank, var_name, i + 1, len(self.variables))) elif att_name not in ['chunk_size', 'var_dims', 'dimensions']: var.setncattr(att_name, att_value) + self._set_var_crs(var) if self.info: print("Rank {0:03d}: Var {1} completed ({2}/{3})".format(self.rank, var_name, i + 1, diff --git a/nes/nc_projections/points_nes.py b/nes/nc_projections/points_nes.py index 9c3d859b48ad1c157095d42b570e9977ddd1f0e0..5a2edaa418dae585cdfaaffc6ab6a38f8cb848f3 100644 --- a/nes/nc_projections/points_nes.py +++ b/nes/nc_projections/points_nes.py @@ -369,7 +369,7 @@ class PointsNes(Nes): # Convert list of strings to chars for parallelization try: - unicode_type = len(max(var_dict['data'], key=len)) + unicode_type = len(max(var_dict['data'].flatten(), key=len)) if ((var_dict['data'].dtype == np.dtype('...\n", " \n", " \n", - " 22495\n", - " POLYGON ((18.00000 64.80000, 18.20000 64.80000...\n", - " Europe/Stockholm\n", + " 14995\n", + " POLYGON ((18.00000 54.80000, 18.20000 54.80000...\n", + " Europe/Warsaw\n", " \n", " \n", - " 22496\n", - " POLYGON ((18.20000 64.80000, 18.40000 64.80000...\n", - " Europe/Stockholm\n", + " 14996\n", + " POLYGON ((18.20000 54.80000, 18.40000 54.80000...\n", + " Europe/Warsaw\n", " \n", " \n", - " 22497\n", - " POLYGON ((18.40000 64.80000, 18.60000 64.80000...\n", - " Europe/Stockholm\n", + " 14997\n", + " POLYGON ((18.40000 54.80000, 18.60000 54.80000...\n", + " Europe/Warsaw\n", " \n", " \n", - " 22498\n", - " POLYGON ((18.60000 64.80000, 18.80000 64.80000...\n", - " Europe/Stockholm\n", + " 14998\n", + " POLYGON ((18.60000 54.80000, 18.80000 54.80000...\n", + " Europe/Warsaw\n", " \n", " \n", - " 22499\n", - " POLYGON ((18.80000 64.80000, 19.00000 64.80000...\n", - " Europe/Stockholm\n", + " 14999\n", + " POLYGON ((18.80000 54.80000, 19.00000 54.80000...\n", + " NaN\n", " \n", " \n", "\n", - "

22500 rows × 2 columns

\n", + "

15000 rows × 2 columns

\n", "" ], "text/plain": [ - " geometry tzid\n", - "FID \n", - "0 POLYGON ((-11.00000 35.00000, -10.80000 35.000... NaN\n", - "1 POLYGON ((-10.80000 35.00000, -10.60000 35.000... NaN\n", - "2 POLYGON ((-10.60000 35.00000, -10.40000 35.000... NaN\n", - "3 POLYGON ((-10.40000 35.00000, -10.20000 35.000... NaN\n", - "4 POLYGON ((-10.20000 35.00000, -10.00000 35.000... NaN\n", - "... ... ...\n", - "22495 POLYGON ((18.00000 64.80000, 18.20000 64.80000... Europe/Stockholm\n", - "22496 POLYGON ((18.20000 64.80000, 18.40000 64.80000... Europe/Stockholm\n", - "22497 POLYGON ((18.40000 64.80000, 18.60000 64.80000... Europe/Stockholm\n", - "22498 POLYGON ((18.60000 64.80000, 18.80000 64.80000... Europe/Stockholm\n", - "22499 POLYGON ((18.80000 64.80000, 19.00000 64.80000... Europe/Stockholm\n", + " geometry tzid\n", + "FID \n", + "0 POLYGON ((-11.00000 35.00000, -10.80000 35.000... NaN\n", + "1 POLYGON ((-10.80000 35.00000, -10.60000 35.000... NaN\n", + "2 POLYGON ((-10.60000 35.00000, -10.40000 35.000... NaN\n", + "3 POLYGON ((-10.40000 35.00000, -10.20000 35.000... NaN\n", + "4 POLYGON ((-10.20000 35.00000, -10.00000 35.000... NaN\n", + "... ... ...\n", + "14995 POLYGON ((18.00000 54.80000, 18.20000 54.80000... Europe/Warsaw\n", + "14996 POLYGON ((18.20000 54.80000, 18.40000 54.80000... Europe/Warsaw\n", + "14997 POLYGON ((18.40000 54.80000, 18.60000 54.80000... Europe/Warsaw\n", + "14998 POLYGON ((18.60000 54.80000, 18.80000 54.80000... Europe/Warsaw\n", + "14999 POLYGON ((18.80000 54.80000, 19.00000 54.80000... NaN\n", "\n", - "[22500 rows x 2 columns]" + "[15000 rows x 2 columns]" ] }, - "execution_count": 17, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -679,7 +1232,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 26, "metadata": {}, "outputs": [ {