From 6addd55e4fd2545513468f9032fb7cdfb6365c8b Mon Sep 17 00:00:00 2001 From: ctena Date: Mon, 13 Mar 2023 18:43:09 +0100 Subject: [PATCH 1/3] Improved concatenation function --- nes/load_nes.py | 63 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 52 insertions(+), 11 deletions(-) diff --git a/nes/load_nes.py b/nes/load_nes.py index 5bfd333..8de2346 100644 --- a/nes/load_nes.py +++ b/nes/load_nes.py @@ -4,9 +4,12 @@ import os from mpi4py import MPI from netCDF4 import Dataset import warnings - +import numpy as np from .nc_projections import * +DIM_VAR_NAMES = ['lat', 'latitude', 'lat_bnds', 'lon', 'longitude', 'lon_bnds', 'time', 'time_bnds', 'lev', 'level', + 'cell_area', 'crs', 'rotated_pole', 'x', 'y', 'rlat', 'rlon', 'Lambert_conformal', 'mercator'] + def open_netcdf(path, comm=None, xarray=False, info=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, balanced=False): @@ -270,15 +273,53 @@ def concatenate_netcdfs(nessy_list, comm=None, info=False, parallel_method='Y', nessy_first = nessy_list[0] for i, aux_nessy in enumerate(nessy_list[1:]): if isinstance(aux_nessy, str): - aux_nessy = open_netcdf(aux_nessy, - comm=comm, - parallel_method=parallel_method, - avoid_first_hours=avoid_first_hours, - avoid_last_hours=avoid_last_hours, - first_level=first_level, - last_level=last_level, - balanced=balanced - ) - nessy_first.concatenate(aux_nessy) + nc_add = Dataset(filename=aux_nessy, mode='r') + for var_name, var_info in nc_add.variables.items(): + if var_name not in DIM_VAR_NAMES: + nessy_first.variables[var_name] = {} + var_dims = var_info.dimensions + # Read data in 4 dimensions + if len(var_dims) < 2: + data = var_info[:] + elif len(var_dims) == 2: + data = var_info[nessy_first.read_axis_limits['y_min']:nessy_first.read_axis_limits['y_max'], + nessy_first.read_axis_limits['x_min']:nessy_first.read_axis_limits['x_max']] + data = data.reshape(1, 1, data.shape[-2], data.shape[-1]) + elif len(var_dims) == 3: + if 'strlen' in var_dims: + data = var_info[nessy_first.read_axis_limits['y_min']:nessy_first.read_axis_limits['y_max'], + nessy_first.read_axis_limits['x_min']:nessy_first.read_axis_limits['x_max'], + :] + data_aux = np.empty(shape=(data.shape[0], data.shape[1]), dtype=np.object) + for lat_n in range(data.shape[0]): + for lon_n in range(data.shape[1]): + data_aux[lat_n, lon_n] = ''.join( + data[lat_n, lon_n].tostring().decode('ascii').replace('\x00', '')) + data = data_aux.reshape((1, 1, data_aux.shape[-2], data_aux.shape[-1])) + else: + data = var_info[nessy_first.read_axis_limits['t_min']:nessy_first.read_axis_limits['t_max'], + nessy_first.read_axis_limits['y_min']:nessy_first.read_axis_limits['y_max'], + nessy_first.read_axis_limits['x_min']:nessy_first.read_axis_limits['x_max']] + data = data.reshape(data.shape[-3], 1, data.shape[-2], data.shape[-1]) + elif len(var_dims) == 4: + data = var_info[nessy_first.read_axis_limits['t_min']:nessy_first.read_axis_limits['t_max'], + nessy_first.read_axis_limits['z_min']:nessy_first.read_axis_limits['z_max'], + nessy_first.read_axis_limits['y_min']:nessy_first.read_axis_limits['y_max'], + nessy_first.read_axis_limits['x_min']:nessy_first.read_axis_limits['x_max']] + else: + raise TypeError("{} data shape is nto accepted".format(var_dims)) + + nessy_first.variables[var_name]['data'] = data + # Avoid some attributes + for attrname in var_info.ncattrs(): + if attrname not in ['missing_value', '_FillValue']: + value = getattr(var_info, attrname) + if value in ['unitless', '-']: + value = '' + nessy_first.variables[var_name][attrname] = value + nc_add.close() + + else: + nessy_first.concatenate(aux_nessy) return nessy_first -- GitLab From 74ac5b0cf2cdaf59d61b4cda25c06a6178c96056 Mon Sep 17 00:00:00 2001 From: ctena Date: Mon, 13 Mar 2023 18:44:47 +0100 Subject: [PATCH 2/3] Write: Corrected str write bug & capability to write all 0 data only with an integer provided --- nes/nc_projections/default_nes.py | 126 ++++++++++++++++-------------- 1 file changed, 68 insertions(+), 58 deletions(-) diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 2d05637..9585c79 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -2238,66 +2238,68 @@ class Nes(object): for i, (var_name, var_dict) in enumerate(self.variables.items()): if var_dict['data'] is not None: - - # Get dimensions - if len(var_dict['data'].shape) == 4: + if isinstance(var_dict['data'], int) and var_dict['data'] == 0: var_dims = ('time', 'lev',) + self._var_dim + var_dtype = np.float32 else: - var_dims = self._var_dim - - # Get data type - if 'dtype' in var_dict.keys(): - var_dtype = var_dict['dtype'] - if var_dtype != var_dict['data'].dtype: - msg = "WARNING!!! " - msg += "Different data types for variable {0}. ".format(var_name) - msg += "Input dtype={0}. Data dtype={1}.".format(var_dtype, var_dict['data'].dtype) - warnings.warn(msg) + # Get dimensions + if len(var_dict['data'].shape) == 4: + var_dims = ('time', 'lev',) + self._var_dim + else: + var_dims = self._var_dim + + # Get data type + if 'dtype' in var_dict.keys(): + var_dtype = var_dict['dtype'] + if var_dtype != var_dict['data'].dtype: + msg = "WARNING!!! " + msg += "Different data types for variable {0}. ".format(var_name) + msg += "Input dtype={0}. Data dtype={1}.".format(var_dtype, var_dict['data'].dtype) + warnings.warn(msg) + try: + var_dict['data'] = var_dict['data'].astype(var_dtype) + except Exception as e: # TODO: Detect exception + print(e) + raise TypeError("It was not possible to cast the data to the input dtype.") + else: + var_dtype = var_dict['data'].dtype + # Transform objects into strings + if var_dtype == np.dtype(object): + var_dict['data'] = var_dict['data'].astype(str) + var_dtype = var_dict['data'].dtype + + # Convert list of strings to chars for parallelization + if not np.issubdtype(var_dict['data'].dtype, np.number): try: - var_dict['data'] = var_dict['data'].astype(var_dtype) - except Exception as e: # TODO: Detect exception + # Get unicode + unicode_type = len(max(var_dict['data'].flatten(), key=len)) + + if ((var_dict['data'].dtype == np.dtype(' Date: Tue, 14 Mar 2023 12:02:39 +0100 Subject: [PATCH 3/3] Correct str write bug in points --- nes/nc_projections/points_nes.py | 2 +- nes/nc_projections/points_nes_ghost.py | 2 +- nes/nc_projections/points_nes_providentia.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nes/nc_projections/points_nes.py b/nes/nc_projections/points_nes.py index 318302e..f542bac 100644 --- a/nes/nc_projections/points_nes.py +++ b/nes/nc_projections/points_nes.py @@ -368,7 +368,7 @@ class PointsNes(Nes): var_dims = ('time',) + self._var_dim # Convert list of strings to chars for parallelization - if (var_dict['data'].dtype != float) and (var_dict['data'].dtype != int): + if not np.issubdtype(var_dict['data'].dtype, np.number): try: # Get unicode unicode_type = len(max(var_dict['data'].flatten(), key=len)) diff --git a/nes/nc_projections/points_nes_ghost.py b/nes/nc_projections/points_nes_ghost.py index 695ac4b..936cbca 100644 --- a/nes/nc_projections/points_nes_ghost.py +++ b/nes/nc_projections/points_nes_ghost.py @@ -350,7 +350,7 @@ class PointsNesGHOST(PointsNes): var_dims = self._var_dim + ('time',) # Convert list of strings to chars for parallelization - if (var_dict['data'].dtype != float) and (var_dict['data'].dtype != int): + if not np.issubdtype(var_dict['data'].dtype, np.number): try: # Get unicode unicode_type = len(max(var_dict['data'].flatten(), key=len)) diff --git a/nes/nc_projections/points_nes_providentia.py b/nes/nc_projections/points_nes_providentia.py index 3e45308..937d7e6 100644 --- a/nes/nc_projections/points_nes_providentia.py +++ b/nes/nc_projections/points_nes_providentia.py @@ -386,7 +386,7 @@ class PointsNesProvidentia(PointsNes): var_dims = self._var_dim + ('time',) # Convert list of strings to chars for parallelization - if (var_dict['data'].dtype != float) and (var_dict['data'].dtype != int): + if not np.issubdtype(var_dict['data'].dtype, np.number): try: # Get unicode unicode_type = len(max(var_dict['data'].flatten(), key=len)) -- GitLab