diff --git a/CHANGELOG.md b/CHANGELOG.md
index c79027aa14aae72985ec10f62c7124cbe2ca493f..8561210f1d6cebc0da7853955423016879ff46aa 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,7 @@
### 1.1.1
* Release date: ???
* Changes and new features:
+ * Write 2D string data to save variables from shapefiles after doing a spatial join ([#49](https://earth.bsc.es/gitlab/es/NES/-/issues/49))
* Bugs fixing:
* Bug on `cell_methods` serial write ([#53](https://earth.bsc.es/gitlab/es/NES/-/issues/53))
* Horizontal Interpolation Conservative: Improvement on memory usage when calculating the weight matrix ([#54](https://earth.bsc.es/gitlab/es/NES/-/issues/54))
diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py
index da83b2b923929004314b5a81417bb33b94b2d4fb..7b0759831cd17644ac85f83e64e03354738acea3 100644
--- a/nes/nc_projections/default_nes.py
+++ b/nes/nc_projections/default_nes.py
@@ -88,7 +88,7 @@ class Nes(object):
"""
def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y',
avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False,
- balanced=False, times=None, **kwargs):
+ balanced=False, times=None, strlen=75, **kwargs):
"""
Initialize the Nes class
@@ -246,6 +246,9 @@ class Nes(object):
# Writing options
self.zip_lvl = 0
+ # Get string length
+ self.strlen = strlen
+
# Dimensions information
self._var_dim = None
self._lat_dim = None
@@ -318,6 +321,7 @@ class Nes(object):
del self.lat_bnds
del self._lon_bnds
del self.lon_bnds
+ del self.strlen
del self.shapefile
for cell_measure in self.cell_measures.keys():
if self.cell_measures[cell_measure]['data'] is not None:
@@ -1425,6 +1429,7 @@ class Nes(object):
"""
units = self.__parse_time_unit(time.units)
+
if not hasattr(time, 'calendar'):
calendar = 'standard'
else:
@@ -1479,7 +1484,6 @@ class Nes(object):
if self.master:
nc_var = self.netcdf.variables['time']
time_data, units, calendar = self.__parse_time(nc_var)
-
time = num2date(time_data, units, calendar=calendar)
time = [aux.replace(second=0, microsecond=0) for aux in time]
else:
@@ -1797,10 +1801,21 @@ class Nes(object):
self.read_axis_limits['x_min']:self.read_axis_limits['x_max']]
data = data.reshape(1, 1, data.shape[-2], data.shape[-1])
elif len(var_dims) == 3:
- data = nc_var[self.read_axis_limits['t_min']:self.read_axis_limits['t_max'],
- self.read_axis_limits['y_min']:self.read_axis_limits['y_max'],
- self.read_axis_limits['x_min']:self.read_axis_limits['x_max']]
- data = data.reshape(data.shape[-3], 1, data.shape[-2], data.shape[-1])
+ if 'strlen' in var_dims:
+ data = nc_var[self.read_axis_limits['y_min']:self.read_axis_limits['y_max'],
+ self.read_axis_limits['x_min']:self.read_axis_limits['x_max'],
+ :]
+ data_aux = np.empty(shape=(data.shape[0], data.shape[1]), dtype=np.object)
+ for lat_n in range(data.shape[0]):
+ for lon_n in range(data.shape[1]):
+ data_aux[lat_n, lon_n] = ''.join(
+ data[lat_n, lon_n].tostring().decode('ascii').replace('\x00', ''))
+ data = data_aux.reshape(1, 1, data_aux.shape[-2], data_aux.shape[-1])
+ else:
+ data = nc_var[self.read_axis_limits['t_min']:self.read_axis_limits['t_max'],
+ self.read_axis_limits['y_min']:self.read_axis_limits['y_max'],
+ self.read_axis_limits['x_min']:self.read_axis_limits['x_max']]
+ data = data.reshape(data.shape[-3], 1, data.shape[-2], data.shape[-1])
elif len(var_dims) == 4:
data = nc_var[self.read_axis_limits['t_min']:self.read_axis_limits['t_max'],
self.read_axis_limits['z_min']:self.read_axis_limits['z_max'],
@@ -2077,6 +2092,9 @@ class Nes(object):
netcdf.createDimension('lon', len(self._lon['data']))
netcdf.createDimension('lat', len(self._lat['data']))
+ # Create string length dimension
+ netcdf.createDimension('strlen', self.strlen)
+
return None
def _create_dimension_variables(self, netcdf):
@@ -2197,12 +2215,63 @@ class Nes(object):
for i, (var_name, var_dict) in enumerate(self.variables.items()):
if var_dict['data'] is not None:
+
+ # Get dimensions
+ if var_dict['data'].shape == 4:
+ var_dims = ('time', 'lev',) + self._var_dim
+ else:
+ var_dims = self._var_dim
+
+ # Get data type
+ if 'dtype' in var_dict.keys():
+ var_dtype = var_dict['dtype']
+ if var_dtype != var_dict['data'].dtype:
+ msg = "WARNING!!! "
+ msg += "Different data types for variable {0}. ".format(var_name)
+ msg += "Input dtype={0}. Data dtype={1}.".format(var_dtype,
+ var_dict['data'].dtype)
+ warnings.warn(msg)
+ try:
+ var_dict['data'] = var_dict['data'].astype(var_dtype)
+ except Exception as e: # TODO: Detect exception
+ raise e("It was not possible to cast the data to the input dtype.")
+ else:
+ var_dtype = var_dict['data'].dtype
+
+ # Transform objects into strings
+ if var_dtype == np.dtype(object):
+ var_dict['data'] = var_dict['data'].astype(str)
+ var_dtype = var_dict['data'].dtype
+
+ # Convert list of strings to chars for parallelization
+ try:
+ unicode_type = len(max(var_dict['data'].flatten(), key=len))
+ if ((var_dict['data'].dtype == np.dtype(' 0, complevel=self.zip_lvl)
else:
if self.balanced:
@@ -2212,7 +2281,7 @@ class Nes(object):
else:
chunk_size = None
chunk_size = self.comm.bcast(chunk_size, root=0)
- var = netcdf.createVariable(var_name, var_dict['data'].dtype, ('time', 'lev',) + self._var_dim,
+ var = netcdf.createVariable(var_name, var_dtype, var_dims,
zlib=self.zip_lvl > 0, complevel=self.zip_lvl,
chunksizes=chunk_size)
if self.info:
@@ -2228,31 +2297,47 @@ class Nes(object):
if att_name == 'data':
if self.info:
print("Rank {0:03d}: Filling {1})".format(self.rank, var_name))
- try:
- var[self.write_axis_limits['t_min']:self.write_axis_limits['t_max'],
- self.write_axis_limits['z_min']:self.write_axis_limits['z_max'],
- self.write_axis_limits['y_min']:self.write_axis_limits['y_max'],
- self.write_axis_limits['x_min']:self.write_axis_limits['x_max']] = att_value
- except ValueError:
- var[self.write_axis_limits['t_min']:self.write_axis_limits['t_max'],
- 0,
- self.write_axis_limits['y_min']:self.write_axis_limits['y_max'],
- self.write_axis_limits['x_min']:self.write_axis_limits['x_max']] = att_value
- # msg = "*WARNING* '{0}' variable is a 3D field. Setting it on first (0) layer.".format(
- # var_name)
- # warn(msg)
- except IndexError:
- raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format(
+ if len(att_value.shape) == 4:
+ try:
var[self.write_axis_limits['t_min']:self.write_axis_limits['t_max'],
self.write_axis_limits['z_min']:self.write_axis_limits['z_max'],
self.write_axis_limits['y_min']:self.write_axis_limits['y_max'],
- self.write_axis_limits['x_min']:self.write_axis_limits['x_max']].shape,
- att_value.shape))
+ self.write_axis_limits['x_min']:self.write_axis_limits['x_max']] = att_value
+ except ValueError:
+ var[self.write_axis_limits['t_min']:self.write_axis_limits['t_max'],
+ 0,
+ self.write_axis_limits['y_min']:self.write_axis_limits['y_max'],
+ self.write_axis_limits['x_min']:self.write_axis_limits['x_max']] = att_value
+ # msg = "*WARNING* '{0}' variable is a 3D field. Setting it on first (0) layer.".format(
+ # var_name)
+ # warn(msg)
+ except IndexError:
+ raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format(
+ var[self.write_axis_limits['t_min']:self.write_axis_limits['t_max'],
+ self.write_axis_limits['z_min']:self.write_axis_limits['z_max'],
+ self.write_axis_limits['y_min']:self.write_axis_limits['y_max'],
+ self.write_axis_limits['x_min']:self.write_axis_limits['x_max']].shape,
+ att_value.shape))
+ elif len(att_value.shape) == 3:
+ if 'strlen' in var_dims:
+ try:
+ var[self.write_axis_limits['y_min']:self.write_axis_limits['y_max'],
+ self.write_axis_limits['x_min']:self.write_axis_limits['x_max'],
+ :] = att_value
+ except IndexError:
+ raise IndexError("Different shapes. out_shape={0}, data_shp={1}".format(
+ var[self.write_axis_limits['y_min']:self.write_axis_limits['y_max'],
+ self.write_axis_limits['x_min']:self.write_axis_limits['x_max'],
+ :].shape,
+ att_value.shape))
+ else:
+ raise NotImplementedError('It is not possible to write 3D variables.')
if self.info:
print("Rank {0:03d}: Var {1} data ({2}/{3})".format(self.rank, var_name, i + 1,
len(self.variables)))
elif att_name not in ['chunk_size', 'var_dims', 'dimensions']:
var.setncattr(att_name, att_value)
+
self._set_var_crs(var)
if self.info:
print("Rank {0:03d}: Var {1} completed ({2}/{3})".format(self.rank, var_name, i + 1,
diff --git a/nes/nc_projections/points_nes.py b/nes/nc_projections/points_nes.py
index 9c3d859b48ad1c157095d42b570e9977ddd1f0e0..5a2edaa418dae585cdfaaffc6ab6a38f8cb848f3 100644
--- a/nes/nc_projections/points_nes.py
+++ b/nes/nc_projections/points_nes.py
@@ -369,7 +369,7 @@ class PointsNes(Nes):
# Convert list of strings to chars for parallelization
try:
- unicode_type = len(max(var_dict['data'], key=len))
+ unicode_type = len(max(var_dict['data'].flatten(), key=len))
if ((var_dict['data'].dtype == np.dtype('...\n",
" \n",
" \n",
- " \n",
" 22495 \n",
- " POLYGON ((18.00000 64.80000, 18.20000 64.80000... \n",
- " Europe/Stockholm \n",
+ " 14995 \n",
+ " POLYGON ((18.00000 54.80000, 18.20000 54.80000... \n",
+ " Europe/Warsaw \n",
" \n",
- " \n",
" 22496 \n",
- " POLYGON ((18.20000 64.80000, 18.40000 64.80000... \n",
- " Europe/Stockholm \n",
+ " 14996 \n",
+ " POLYGON ((18.20000 54.80000, 18.40000 54.80000... \n",
+ " Europe/Warsaw \n",
" \n",
- " \n",
" 22497 \n",
- " POLYGON ((18.40000 64.80000, 18.60000 64.80000... \n",
- " Europe/Stockholm \n",
+ " 14997 \n",
+ " POLYGON ((18.40000 54.80000, 18.60000 54.80000... \n",
+ " Europe/Warsaw \n",
" \n",
- " \n",
" 22498 \n",
- " POLYGON ((18.60000 64.80000, 18.80000 64.80000... \n",
- " Europe/Stockholm \n",
+ " 14998 \n",
+ " POLYGON ((18.60000 54.80000, 18.80000 54.80000... \n",
+ " Europe/Warsaw \n",
" \n",
- " \n",
" \n",
"\n",
- "22499 \n",
- " POLYGON ((18.80000 64.80000, 19.00000 64.80000... \n",
- " Europe/Stockholm \n",
+ " 14999 \n",
+ " POLYGON ((18.80000 54.80000, 19.00000 54.80000... \n",
+ " NaN \n",
"
22500 rows × 2 columns
\n", + "15000 rows × 2 columns
\n", "" ], "text/plain": [ - " geometry tzid\n", - "FID \n", - "0 POLYGON ((-11.00000 35.00000, -10.80000 35.000... NaN\n", - "1 POLYGON ((-10.80000 35.00000, -10.60000 35.000... NaN\n", - "2 POLYGON ((-10.60000 35.00000, -10.40000 35.000... NaN\n", - "3 POLYGON ((-10.40000 35.00000, -10.20000 35.000... NaN\n", - "4 POLYGON ((-10.20000 35.00000, -10.00000 35.000... NaN\n", - "... ... ...\n", - "22495 POLYGON ((18.00000 64.80000, 18.20000 64.80000... Europe/Stockholm\n", - "22496 POLYGON ((18.20000 64.80000, 18.40000 64.80000... Europe/Stockholm\n", - "22497 POLYGON ((18.40000 64.80000, 18.60000 64.80000... Europe/Stockholm\n", - "22498 POLYGON ((18.60000 64.80000, 18.80000 64.80000... Europe/Stockholm\n", - "22499 POLYGON ((18.80000 64.80000, 19.00000 64.80000... Europe/Stockholm\n", + " geometry tzid\n", + "FID \n", + "0 POLYGON ((-11.00000 35.00000, -10.80000 35.000... NaN\n", + "1 POLYGON ((-10.80000 35.00000, -10.60000 35.000... NaN\n", + "2 POLYGON ((-10.60000 35.00000, -10.40000 35.000... NaN\n", + "3 POLYGON ((-10.40000 35.00000, -10.20000 35.000... NaN\n", + "4 POLYGON ((-10.20000 35.00000, -10.00000 35.000... NaN\n", + "... ... ...\n", + "14995 POLYGON ((18.00000 54.80000, 18.20000 54.80000... Europe/Warsaw\n", + "14996 POLYGON ((18.20000 54.80000, 18.40000 54.80000... Europe/Warsaw\n", + "14997 POLYGON ((18.40000 54.80000, 18.60000 54.80000... Europe/Warsaw\n", + "14998 POLYGON ((18.60000 54.80000, 18.80000 54.80000... Europe/Warsaw\n", + "14999 POLYGON ((18.80000 54.80000, 19.00000 54.80000... NaN\n", "\n", - "[22500 rows x 2 columns]" + "[15000 rows x 2 columns]" ] }, - "execution_count": 17, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -679,7 +1232,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 26, "metadata": {}, "outputs": [ {