From 513cf4e4bd7cfe02b2dded0e9ebb84669d135edd Mon Sep 17 00:00:00 2001 From: ctena Date: Wed, 3 Jan 2024 15:41:56 +0100 Subject: [PATCH 1/4] - Removed xarray - Starting open_raster() --- nes/__init__.py | 1 + nes/create_nes.py | 12 +- nes/load_nes.py | 65 ++-- nes/methods/vertical_interpolation.py | 1 - nes/nc_projections/__init__.py | 1 + nes/nc_projections/default_nes.py | 293 +++++++------------ nes/nc_projections/latlon_nes.py | 12 +- nes/nc_projections/lcc_nes.py | 12 +- nes/nc_projections/mercator_nes.py | 12 +- nes/nc_projections/points_nes.py | 12 +- nes/nc_projections/points_nes_ghost.py | 12 +- nes/nc_projections/points_nes_providentia.py | 15 +- nes/nc_projections/raster_nes.py | 13 + nes/nc_projections/rotated_nes.py | 12 +- nes/nc_projections/rotated_nested_nes.py | 7 +- requirements.txt | 4 +- 16 files changed, 214 insertions(+), 270 deletions(-) create mode 100644 nes/nc_projections/raster_nes.py diff --git a/nes/__init__.py b/nes/__init__.py index 884723b..86a702f 100644 --- a/nes/__init__.py +++ b/nes/__init__.py @@ -2,6 +2,7 @@ __date__ = "2023-06-22" __version__ = "1.1.3" from .load_nes import open_netcdf, concatenate_netcdfs +from .load_nes import open_raster from .create_nes import create_nes, from_shapefile from .nc_projections import * from .methods.cell_measures import calculate_geometry_area diff --git a/nes/create_nes.py b/nes/create_nes.py index 7444bb5..98f81f0 100644 --- a/nes/create_nes.py +++ b/nes/create_nes.py @@ -92,32 +92,32 @@ def create_nes(comm=None, info=False, projection=None, parallel_method='Y', bala parallel_method = 'X' elif parallel_method == 'T': raise NotImplementedError("Parallel method T not implemented yet") - nessy = PointsNes(comm=comm, dataset=None, xarray=False, info=info, parallel_method=parallel_method, + nessy = PointsNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, balanced=balanced, create_nes=True, times=times, **kwargs) elif projection in ['regular', 'global']: - nessy = LatLonNes(comm=comm, dataset=None, xarray=False, info=info, parallel_method=parallel_method, + nessy = LatLonNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, balanced=balanced, create_nes=True, times=times, **kwargs) elif projection == 'rotated': - nessy = RotatedNes(comm=comm, dataset=None, xarray=False, info=info, parallel_method=parallel_method, + nessy = RotatedNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, balanced=balanced, create_nes=True, times=times, **kwargs) elif projection == 'rotated-nested': - nessy = RotatedNestedNes(comm=comm, dataset=None, xarray=False, info=info, parallel_method=parallel_method, + nessy = RotatedNestedNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, balanced=balanced, create_nes=True, times=times, **kwargs) elif projection == 'lcc': - nessy = LCCNes(comm=comm, dataset=None, xarray=False, info=info, parallel_method=parallel_method, + nessy = LCCNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, balanced=balanced, create_nes=True, times=times, **kwargs) elif projection == 'mercator': - nessy = MercatorNes(comm=comm, dataset=None, xarray=False, info=info, parallel_method=parallel_method, + nessy = MercatorNes(comm=comm, dataset=None, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, balanced=balanced, create_nes=True, times=times, **kwargs) diff --git a/nes/load_nes.py b/nes/load_nes.py index dd08add..0fb271e 100644 --- a/nes/load_nes.py +++ b/nes/load_nes.py @@ -12,7 +12,7 @@ DIM_VAR_NAMES = ['lat', 'latitude', 'lat_bnds', 'lon', 'longitude', 'lon_bnds', 'cell_area', 'crs', 'rotated_pole', 'x', 'y', 'rlat', 'rlon', 'Lambert_conformal', 'mercator'] -def open_netcdf(path, comm=None, xarray=False, info=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, +def open_netcdf(path, comm=None, info=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, balanced=False): """ Open a netCDF file. @@ -23,8 +23,6 @@ def open_netcdf(path, comm=None, xarray=False, info=False, parallel_method='Y', Path to the NetCDF file to read. comm : MPI.COMM MPI communicator to use in that netCDF. Default: MPI.COMM_WORLD. - xarray : bool - (Not working) Indicates if you want to use xarray. Default: False. info : bool Indicates if you want to print (stdout) the reading/writing steps. avoid_first_hours : int @@ -54,19 +52,17 @@ def open_netcdf(path, comm=None, xarray=False, info=False, parallel_method='Y', if not os.path.exists(path): raise FileNotFoundError(path) - if xarray: - dataset = None - else: - dataset = Dataset(path, format="NETCDF4", mode='r', parallel=False) - # Parallel is not needed for reading - # if comm.Get_size() == 1: - # dataset = Dataset(path, format="NETCDF4", mode='r', parallel=False) - # else: - # dataset = Dataset(path, format="NETCDF4", mode='r', parallel=True, comm=comm, info=MPI.Info()) + + dataset = Dataset(path, format="NETCDF4", mode='r', parallel=False) + # Parallel is not needed for reading + # if comm.Get_size() == 1: + # dataset = Dataset(path, format="NETCDF4", mode='r', parallel=False) + # else: + # dataset = Dataset(path, format="NETCDF4", mode='r', parallel=True, comm=comm, info=MPI.Info()) if __is_rotated(dataset): # Rotated grids - nessy = RotatedNes(comm=comm, dataset=dataset, xarray=xarray, info=info, parallel_method=parallel_method, + nessy = RotatedNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) elif __is_points(dataset): @@ -76,35 +72,35 @@ def open_netcdf(path, comm=None, xarray=False, info=False, parallel_method='Y', parallel_method = 'X' if __is_points_ghost(dataset): # Points - GHOST - nessy = PointsNesGHOST(comm=comm, dataset=dataset, xarray=xarray, info=info, + nessy = PointsNesGHOST(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) elif __is_points_providentia(dataset): # Points - Providentia - nessy = PointsNesProvidentia(comm=comm, dataset=dataset, xarray=xarray, info=info, + nessy = PointsNesProvidentia(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) else: # Points - non-GHOST - nessy = PointsNes(comm=comm, dataset=dataset, xarray=xarray, info=info, parallel_method=parallel_method, + nessy = PointsNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) elif __is_lcc(dataset): # Lambert conformal conic grids - nessy = LCCNes(comm=comm, dataset=dataset, xarray=xarray, info=info, parallel_method=parallel_method, + nessy = LCCNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) elif __is_mercator(dataset): # Mercator grids - nessy = MercatorNes(comm=comm, dataset=dataset, xarray=xarray, info=info, parallel_method=parallel_method, + nessy = MercatorNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) else: # Regular grids - nessy = LatLonNes(comm=comm, dataset=dataset, xarray=xarray, info=info, parallel_method=parallel_method, + nessy = LatLonNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) @@ -326,3 +322,34 @@ def concatenate_netcdfs(nessy_list, comm=None, info=False, parallel_method='Y', nessy_first.concatenate(aux_nessy) return nessy_first + + +def open_raster(path, comm=None, info=False): + """ + Open a Raster (TIF) file + + Parameters + ---------- + path : str + Path to the Raster (TIFF) file to read. + comm : MPI.COMM + MPI communicator to use in that netCDF. Default: MPI.COMM_WORLD. + info : bool + Indicates if you want to print (stdout) the reading/writing steps. + + Returns + ------- + RasterNes + Raster NES object + """ + if comm is None: + comm = MPI.COMM_WORLD + else: + comm = comm + + if not os.path.exists(path): + raise FileNotFoundError(path) + + nessy = RasterNes(path=path, comm=comm, info=info) + + return nessy diff --git a/nes/methods/vertical_interpolation.py b/nes/methods/vertical_interpolation.py index 5a980eb..995207b 100644 --- a/nes/methods/vertical_interpolation.py +++ b/nes/methods/vertical_interpolation.py @@ -194,7 +194,6 @@ def interpolate_vertical(self, new_levels, new_src_vertical=None, kind='linear', # Remove original file information self.__ini_path = None - self.dataset = None self.netcdf = None return self diff --git a/nes/nc_projections/__init__.py b/nes/nc_projections/__init__.py index d4c4b9f..f5c287b 100644 --- a/nes/nc_projections/__init__.py +++ b/nes/nc_projections/__init__.py @@ -7,3 +7,4 @@ from .points_nes_ghost import PointsNesGHOST from .points_nes_providentia import PointsNesProvidentia from .lcc_nes import LCCNes from .mercator_nes import MercatorNes +from .raster_nes import RasterNes diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 55cd1a4..12efaff 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -5,7 +5,6 @@ import gc import warnings import numpy as np import pandas as pd -from xarray import open_dataset from netCDF4 import Dataset, num2date, date2num, stringtochar from mpi4py import MPI from cfunits import Units @@ -35,8 +34,6 @@ class Nes(object): Size of the communicator. info : bool Indicates if you want to print reading/writing info. - is_xarray : bool - (Not working) Indicates if you want to use xarray as default. __ini_path : str Path to the original file to read when open_netcdf is called. hours_start : int @@ -91,7 +88,7 @@ class Nes(object): Dictionary with the projection information. """ - def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -105,10 +102,8 @@ class Nes(object): Path to the NetCDF to initialize the object. info: bool Indicates if you want to get reading/writing info. - dataset: Dataset + dataset: Dataset or None NetCDF4-python Dataset to initialize the class. - xarray: bool - (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default over Y axis accepted values: ['X', 'Y', 'T']. @@ -140,7 +135,6 @@ class Nes(object): # General info self.info = info - self.is_xarray = xarray self.__ini_path = path self.shapefile = None @@ -171,7 +165,6 @@ class Nes(object): if create_nes: self.netcdf = None - self.dataset = None # Set string length self.strlen = None @@ -208,21 +201,10 @@ class Nes(object): self.global_attrs = self.__get_global_attributes(create_nes) else: - if dataset is not None: - if self.is_xarray: - self.dataset = dataset - self.netcdf = None - else: - self.dataset = None - self.netcdf = dataset + self.netcdf = dataset elif self.__ini_path is not None: - if self.is_xarray: - self.dataset = self.__open_dataset() - self.netcdf = None - else: - self.dataset = None - self.netcdf = self.__open_netcdf4() + self.open() # Get string length self.strlen = self._get_strlen() @@ -286,7 +268,7 @@ class Nes(object): self.first_level = None @staticmethod - def new(comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', + def new(comm=None, path=None, info=False, dataset=None, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -302,8 +284,6 @@ class Nes(object): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. - xarray: bool - (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default over Y axis accepted values: ['X', 'Y', 'T']. @@ -324,7 +304,7 @@ class Nes(object): List of times to substitute the current ones while creation. """ - new = Nes(comm=comm, path=path, info=info, dataset=dataset, xarray=xarray, parallel_method=parallel_method, + new = Nes(comm=comm, path=path, info=info, dataset=dataset, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, balanced=balanced, times=times, **kwargs) @@ -745,16 +725,12 @@ class Nes(object): if isinstance(var_list, str): var_list = [var_list] - if self.is_xarray: - self.dataset = self.dataset.drop_vars(var_list) - self.variables = self._get_lazy_variables() - else: - if self.variables is not None: - for var_name in var_list: - if var_name in self.variables: - if 'data' in self.variables[var_name].keys(): - del self.variables[var_name]['data'] - del self.variables[var_name] + if self.variables is not None: + for var_name in var_list: + if var_name in self.variables: + if 'data' in self.variables[var_name].keys(): + del self.variables[var_name]['data'] + del self.variables[var_name] gc.collect() return None @@ -1457,37 +1433,10 @@ class Nes(object): Open the NetCDF. """ - if self.is_xarray: - self.dataset = self.__open_dataset() - self.netcdf = None - else: - self.dataset = None - self.netcdf = self.__open_netcdf4() + self.netcdf = self.__open_netcdf4() return None - def __open_dataset(self): - """ - Open the NetCDF with xarray. - - Returns - ------- - dataset : xr.Dataset - Open dataset. - """ - - if self.master: - warnings.filterwarnings('ignore') # Disabling warnings while reading MONARCH original file - dataset = open_dataset(self.__ini_path, decode_coords='all') - warnings.filterwarnings('default') # Re-activating warnings - else: - dataset = None - - dataset = self.comm.bcast(dataset, root=0) - self.dataset = dataset - - return dataset - def __open_netcdf4(self, mode='r'): """ Open the NetCDF with netcdf4-python. @@ -1660,23 +1609,20 @@ class Nes(object): List of times (datetime.datetime) of the NetCDF data. """ - if self.is_xarray: - time = self.variables['time'] + if self.master: + nc_var = self.netcdf.variables['time'] + time_data, units, calendar = self.__parse_time(nc_var) + # Extracting time resolution depending on the units + self._time_resolution = self.__get_time_resolution_from_units(units) + # Checking if it is a climatology dataset + if hasattr(nc_var, 'climatology'): + self._climatology = True + self._climatology_var_name = nc_var.climatology + time = num2date(time_data, units, calendar=calendar) + time = [aux.replace(second=0, microsecond=0) for aux in time] else: - if self.master: - nc_var = self.netcdf.variables['time'] - time_data, units, calendar = self.__parse_time(nc_var) - # Extracting time resolution depending on the units - self._time_resolution = self.__get_time_resolution_from_units(units) - # Checking if it is a climatology dataset - if hasattr(nc_var, 'climatology'): - self._climatology = True - self._climatology_var_name = nc_var.climatology - time = num2date(time_data, units, calendar=calendar) - time = [aux.replace(second=0, microsecond=0) for aux in time] - else: - time = None - time = self.comm.bcast(time, root=0) + time = None + time = self.comm.bcast(time, root=0) self.free_vars('time') return time @@ -1736,28 +1682,24 @@ class Nes(object): Longitude bounds of the NetCDF data. """ - if self.is_xarray: - lat_bnds = self.variables['lat_bnds'] - lon_bnds = self.variables['lon_bnds'] - else: - if self.master: - if not create_nes: - if 'lat_bnds' in self.netcdf.variables.keys(): - lat_bnds = {'data': self._unmask_array(self.netcdf.variables['lat_bnds'][:])} - else: - lat_bnds = None - if 'lon_bnds' in self.netcdf.variables.keys(): - lon_bnds = {'data': self._unmask_array(self.netcdf.variables['lon_bnds'][:])} - else: - lon_bnds = None + if self.master: + if not create_nes: + if 'lat_bnds' in self.netcdf.variables.keys(): + lat_bnds = {'data': self._unmask_array(self.netcdf.variables['lat_bnds'][:])} else: lat_bnds = None + if 'lon_bnds' in self.netcdf.variables.keys(): + lon_bnds = {'data': self._unmask_array(self.netcdf.variables['lon_bnds'][:])} + else: lon_bnds = None else: lat_bnds = None lon_bnds = None - lat_bnds = self.comm.bcast(lat_bnds, root=0) - lon_bnds = self.comm.bcast(lon_bnds, root=0) + else: + lat_bnds = None + lon_bnds = None + lat_bnds = self.comm.bcast(lat_bnds, root=0) + lon_bnds = self.comm.bcast(lon_bnds, root=0) self.free_vars(['lat_bnds', 'lon_bnds']) @@ -1812,12 +1754,10 @@ class Nes(object): try: dimension_name = set(possible_names).intersection(set(self.variables.keys())).pop() - if self.is_xarray: - nc_var = self.dataset[dimension_name] - else: - nc_var = self.variables[dimension_name].copy() - nc_var['data'] = self.netcdf.variables[dimension_name][:] - if hasattr(nc_var, 'units'): + + nc_var = self.variables[dimension_name].copy() + nc_var['data'] = self.netcdf.variables[dimension_name][:] + if hasattr(nc_var, 'units'): if nc_var['units'] in ['unitless', '-']: nc_var['units'] = '' self.free_vars(dimension_name) @@ -1938,32 +1878,29 @@ class Nes(object): ...} """ - if self.is_xarray: - variables = self.dataset.variables + if self.master: + variables = {} + # Initialise data + for var_name, var_info in self.netcdf.variables.items(): + variables[var_name] = {} + variables[var_name]['data'] = None + variables[var_name]['dimensions'] = var_info.dimensions + variables[var_name]['dtype'] = var_info.dtype + if variables[var_name]['dtype'] in [str, np.object]: + if self.strlen is None: + self.set_strlen() + variables[var_name]['dtype'] = str + + # Avoid some attributes + for attrname in var_info.ncattrs(): + if attrname not in ['missing_value', '_FillValue']: + value = getattr(var_info, attrname) + if value in ['unitless', '-']: + value = '' + variables[var_name][attrname] = value else: - if self.master: - variables = {} - # Initialise data - for var_name, var_info in self.netcdf.variables.items(): - variables[var_name] = {} - variables[var_name]['data'] = None - variables[var_name]['dimensions'] = var_info.dimensions - variables[var_name]['dtype'] = var_info.dtype - if variables[var_name]['dtype'] in [str, np.object]: - if self.strlen is None: - self.set_strlen() - variables[var_name]['dtype'] = str - - # Avoid some attributes - for attrname in var_info.ncattrs(): - if attrname not in ['missing_value', '_FillValue']: - value = getattr(var_info, attrname) - if value in ['unitless', '-']: - value = '' - variables[var_name][attrname] = value - else: - variables = None - variables = self.comm.bcast(variables, root=0) + variables = None + variables = self.comm.bcast(variables, root=0) return variables @@ -2057,7 +1994,7 @@ class Nes(object): List (or single string) of the variables to be loaded. """ - if (self.__ini_path is None) and (self.dataset is None) and (self.netcdf is None): + if (self.__ini_path is None) and (self.netcdf is None): raise RuntimeError('Only data from existing files can be loaded.') if self.netcdf is None: @@ -2154,7 +2091,6 @@ class Nes(object): if isinstance(aux_nessy, str): aux_nessy = self.new(path=aux_nessy, comm=self.comm, parallel_method=self.parallel_method, - xarray=self.is_xarray, avoid_first_hours=self.hours_start, avoid_last_hours=self.hours_end, first_level=self.first_level, last_level=self.last_level) new = True @@ -2192,12 +2128,10 @@ class Nes(object): """ gl_attrs = {} - if self.is_xarray: - gl_attrs = self.dataset.attrs - else: - if not create_nes: - for attrname in self.netcdf.ncattrs(): - gl_attrs[attrname] = getattr(self.netcdf, attrname) + + if not create_nes: + for attrname in self.netcdf.ncattrs(): + gl_attrs[attrname] = getattr(self.netcdf, attrname) return gl_attrs @@ -2891,7 +2825,6 @@ class Nes(object): def __to_netcdf_cams_ra(self, path): return to_netcdf_cams_ra(self, path) - def to_netcdf(self, path, compression_level=0, serial=False, info=False, chunking=False, type='NES', keep_open=False): """ @@ -2919,56 +2852,54 @@ class Nes(object): self.info = info self.serial_nc = None self.zip_lvl = compression_level - if self.is_xarray: - raise NotImplementedError("Writing with xarray not implemented") - else: - # if serial: - if serial and self.size > 1: - try: - data = self._gather_data(self.variables) - except KeyError: - data = self.__gather_data_py_object(self.variables) - try: - c_measures = self._gather_data(self.cell_measures) - except KeyError: - c_measures = self.__gather_data_py_object(self.cell_measures) - if self.master: - new_nc = self.copy(copy_vars=False) - new_nc.set_communicator(MPI.COMM_SELF) - new_nc.variables = data - new_nc.cell_measures = c_measures - if type in ['NES', 'DEFAULT']: - new_nc.__to_netcdf_py(path, keep_open=keep_open) - elif type == 'CAMS_RA': - new_nc.__to_netcdf_cams_ra(path) - elif type == 'MONARCH': - to_netcdf_monarch(new_nc, path, chunking=chunking, keep_open=keep_open) - elif type == 'CMAQ': - to_netcdf_cmaq(new_nc, path, keep_open=keep_open) - elif type == 'WRF_CHEM': - to_netcdf_wrf_chem(new_nc, path, keep_open=keep_open) - else: - msg = "Unknown NetCDF type '{0}'. ".format(nc_type) - msg += "Use CAMS_RA, MONARCH or NES (or DEFAULT)" - raise ValueError(msg) - self.serial_nc = new_nc - else: - self.serial_nc = True - else: + + # if serial: + if serial and self.size > 1: + try: + data = self._gather_data(self.variables) + except KeyError: + data = self.__gather_data_py_object(self.variables) + try: + c_measures = self._gather_data(self.cell_measures) + except KeyError: + c_measures = self.__gather_data_py_object(self.cell_measures) + if self.master: + new_nc = self.copy(copy_vars=False) + new_nc.set_communicator(MPI.COMM_SELF) + new_nc.variables = data + new_nc.cell_measures = c_measures if type in ['NES', 'DEFAULT']: - self.__to_netcdf_py(path, chunking=chunking, keep_open=keep_open) - elif nc_type == 'CAMS_RA': - self.__to_netcdf_cams_ra(path) - elif nc_type == 'MONARCH': - to_netcdf_monarch(self, path, chunking=chunking, keep_open=keep_open) - elif nc_type == 'CMAQ': - to_netcdf_cmaq(self, path, keep_open=keep_open) - elif nc_type == 'WRF_CHEM': - to_netcdf_wrf_chem(self, path, keep_open=keep_open) + new_nc.__to_netcdf_py(path, keep_open=keep_open) + elif type == 'CAMS_RA': + new_nc.__to_netcdf_cams_ra(path) + elif type == 'MONARCH': + to_netcdf_monarch(new_nc, path, chunking=chunking, keep_open=keep_open) + elif type == 'CMAQ': + to_netcdf_cmaq(new_nc, path, keep_open=keep_open) + elif type == 'WRF_CHEM': + to_netcdf_wrf_chem(new_nc, path, keep_open=keep_open) else: msg = "Unknown NetCDF type '{0}'. ".format(nc_type) msg += "Use CAMS_RA, MONARCH or NES (or DEFAULT)" raise ValueError(msg) + self.serial_nc = new_nc + else: + self.serial_nc = True + else: + if type in ['NES', 'DEFAULT']: + self.__to_netcdf_py(path, chunking=chunking, keep_open=keep_open) + elif nc_type == 'CAMS_RA': + self.__to_netcdf_cams_ra(path) + elif nc_type == 'MONARCH': + to_netcdf_monarch(self, path, chunking=chunking, keep_open=keep_open) + elif nc_type == 'CMAQ': + to_netcdf_cmaq(self, path, keep_open=keep_open) + elif nc_type == 'WRF_CHEM': + to_netcdf_wrf_chem(self, path, keep_open=keep_open) + else: + msg = "Unknown NetCDF type '{0}'. ".format(nc_type) + msg += "Use CAMS_RA, MONARCH or NES (or DEFAULT)" + raise ValueError(msg) self.info = old_info diff --git a/nes/nc_projections/latlon_nes.py b/nes/nc_projections/latlon_nes.py index 4b38959..05917fc 100644 --- a/nes/nc_projections/latlon_nes.py +++ b/nes/nc_projections/latlon_nes.py @@ -20,7 +20,7 @@ class LatLonNes(Nes): Tuple with the name of the dimensions of the Longitude values. ('lon',) for a regular latitude-longitude projection. """ - def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -36,8 +36,6 @@ class LatLonNes(Nes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. - xarray: bool: - (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'Y'. Accepted values: ['X', 'Y', 'T']. @@ -59,7 +57,7 @@ class LatLonNes(Nes): """ super(LatLonNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, - xarray=xarray, parallel_method=parallel_method, balanced=balanced, + parallel_method=parallel_method, balanced=balanced, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, times=times, **kwargs) @@ -79,7 +77,7 @@ class LatLonNes(Nes): self.free_vars('crs') @staticmethod - def new(comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', + def new(comm=None, path=None, info=False, dataset=None, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -95,8 +93,6 @@ class LatLonNes(Nes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. - xarray: bool: - (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'Y'. Accepted values: ['X', 'Y', 'T']. @@ -117,7 +113,7 @@ class LatLonNes(Nes): List of times to substitute the current ones while creation. """ - new = LatLonNes(comm=comm, path=path, info=info, dataset=dataset, xarray=xarray, + new = LatLonNes(comm=comm, path=path, info=info, dataset=dataset, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, balanced=balanced, times=times, **kwargs) diff --git a/nes/nc_projections/lcc_nes.py b/nes/nc_projections/lcc_nes.py index 0f7ff21..f697891 100644 --- a/nes/nc_projections/lcc_nes.py +++ b/nes/nc_projections/lcc_nes.py @@ -35,7 +35,7 @@ class LCCNes(Nes): Tuple with the name of the dimensions of the Longitude values. ('y', 'x') for a LCC projection. """ - def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -51,8 +51,6 @@ class LCCNes(Nes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. - xarray: bool: - (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'Y'. Accepted values: ['X', 'Y', 'T']. @@ -74,7 +72,7 @@ class LCCNes(Nes): """ super(LCCNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, - xarray=xarray, parallel_method=parallel_method, balanced=balanced, + parallel_method=parallel_method, balanced=balanced, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, times=times, **kwargs) @@ -102,7 +100,7 @@ class LCCNes(Nes): self.free_vars('crs') @staticmethod - def new(comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', + def new(comm=None, path=None, info=False, dataset=None, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -118,8 +116,6 @@ class LCCNes(Nes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. - xarray: bool: - (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'Y'. Accepted values: ['X', 'Y', 'T']. @@ -140,7 +136,7 @@ class LCCNes(Nes): List of times to substitute the current ones while creation. """ - new = LCCNes(comm=comm, path=path, info=info, dataset=dataset, xarray=xarray, + new = LCCNes(comm=comm, path=path, info=info, dataset=dataset, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, balanced=balanced, times=times, **kwargs) diff --git a/nes/nc_projections/mercator_nes.py b/nes/nc_projections/mercator_nes.py index 01b3819..52b8ab9 100644 --- a/nes/nc_projections/mercator_nes.py +++ b/nes/nc_projections/mercator_nes.py @@ -35,7 +35,7 @@ class MercatorNes(Nes): Tuple with the name of the dimensions of the Longitude values. ('y', 'x') for a Mercator projection. """ - def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -51,8 +51,6 @@ class MercatorNes(Nes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. - xarray: bool: - (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'Y'. Accepted values: ['X', 'Y', 'T']. @@ -75,7 +73,7 @@ class MercatorNes(Nes): """ super(MercatorNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, - xarray=xarray, parallel_method=parallel_method, balanced=balanced, + parallel_method=parallel_method, balanced=balanced, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, times=times, **kwargs) @@ -103,7 +101,7 @@ class MercatorNes(Nes): self.free_vars('crs') @staticmethod - def new(comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', + def new(comm=None, path=None, info=False, dataset=None, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -119,8 +117,6 @@ class MercatorNes(Nes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. - xarray: bool: - (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'Y'. Accepted values: ['X', 'Y', 'T']. @@ -141,7 +137,7 @@ class MercatorNes(Nes): List of times to substitute the current ones while creation. """ - new = MercatorNes(comm=comm, path=path, info=info, dataset=dataset, xarray=xarray, + new = MercatorNes(comm=comm, path=path, info=info, dataset=dataset, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, balanced=balanced, times=times, **kwargs) diff --git a/nes/nc_projections/points_nes.py b/nes/nc_projections/points_nes.py index ee54184..bca62c0 100644 --- a/nes/nc_projections/points_nes.py +++ b/nes/nc_projections/points_nes.py @@ -28,7 +28,7 @@ class PointsNes(Nes): Tuple with the name of the dimensions of the station values. ('station',) for a points grid. """ - def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='X', + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method='X', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -44,8 +44,6 @@ class PointsNes(Nes): Indicates if you want to get reading/writing info. dataset: Dataset, None NetCDF4-python Dataset to initialize the class. - xarray: bool - (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'X'. accepted values: ['X', 'T']. @@ -69,7 +67,7 @@ class PointsNes(Nes): """ super(PointsNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, - xarray=xarray, parallel_method=parallel_method, + parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, times=times, **kwargs) @@ -93,7 +91,7 @@ class PointsNes(Nes): self._lon_dim = ('station',) @staticmethod - def new(comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='X', + def new(comm=None, path=None, info=False, dataset=None, parallel_method='X', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -109,8 +107,6 @@ class PointsNes(Nes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. - xarray: bool: - (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'X'. accepted values: ['X', 'T']. @@ -131,7 +127,7 @@ class PointsNes(Nes): List of times to substitute the current ones while creation. """ - new = PointsNes(comm=comm, path=path, info=info, dataset=dataset, xarray=xarray, + new = PointsNes(comm=comm, path=path, info=info, dataset=dataset, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, balanced=balanced, times=times, **kwargs) diff --git a/nes/nc_projections/points_nes_ghost.py b/nes/nc_projections/points_nes_ghost.py index 09c5a44..2acdd81 100644 --- a/nes/nc_projections/points_nes_ghost.py +++ b/nes/nc_projections/points_nes_ghost.py @@ -25,7 +25,7 @@ class PointsNesGHOST(PointsNes): Data flags (given by data provider) dictionary with the portion of 'data' corresponding to the rank values. """ - def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='X', + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method='X', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -41,8 +41,6 @@ class PointsNesGHOST(PointsNes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. - xarray: bool: - (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'X'. Accepted values: ['X']. @@ -64,7 +62,7 @@ class PointsNesGHOST(PointsNes): """ super(PointsNesGHOST, self).__init__(comm=comm, path=path, info=info, dataset=dataset, - xarray=xarray, parallel_method=parallel_method, + parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, times=times, **kwargs) @@ -78,7 +76,7 @@ class PointsNesGHOST(PointsNes): self.qa = self._get_coordinate_values(self._qa, 'X') @staticmethod - def new(comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='X', + def new(comm=None, path=None, info=False, dataset=None, parallel_method='X', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -94,8 +92,6 @@ class PointsNesGHOST(PointsNes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. - xarray: bool: - (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'X'. Accepted values: ['X']. @@ -116,7 +112,7 @@ class PointsNesGHOST(PointsNes): List of times to substitute the current ones while creation. """ - new = PointsNesGHOST(comm=comm, path=path, info=info, dataset=dataset, xarray=xarray, + new = PointsNesGHOST(comm=comm, path=path, info=info, dataset=dataset, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, balanced=balanced, times=times, **kwargs) diff --git a/nes/nc_projections/points_nes_providentia.py b/nes/nc_projections/points_nes_providentia.py index 533e4a8..b157cb3 100644 --- a/nes/nc_projections/points_nes_providentia.py +++ b/nes/nc_projections/points_nes_providentia.py @@ -34,9 +34,10 @@ class PointsNesProvidentia(PointsNes): grid_edge_lat : dict Grid edge latitudes dictionary with the portion of 'data' corresponding to the rank values. """ - def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='X', + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method='X', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, - balanced=False, times=None, model_centre_lon=None, model_centre_lat=None, grid_edge_lon=None, grid_edge_lat=None, + balanced=False, times=None, model_centre_lon=None, model_centre_lat=None, grid_edge_lon=None, + grid_edge_lat=None, **kwargs): """ Initialize the PointsNesProvidentia class @@ -51,8 +52,6 @@ class PointsNesProvidentia(PointsNes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. - xarray: bool: - (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'X'. Accepted values: ['X']. @@ -82,7 +81,7 @@ class PointsNesProvidentia(PointsNes): """ super(PointsNesProvidentia, self).__init__(comm=comm, path=path, info=info, dataset=dataset, - xarray=xarray, parallel_method=parallel_method, + parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, @@ -108,7 +107,7 @@ class PointsNesProvidentia(PointsNes): self.grid_edge_lat = self._get_coordinate_values(self._grid_edge_lat, '') @staticmethod - def new(comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='X', + def new(comm=None, path=None, info=False, dataset=None, parallel_method='X', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, model_centre_lon=None, model_centre_lat=None, grid_edge_lon=None, grid_edge_lat=None, @@ -126,8 +125,6 @@ class PointsNesProvidentia(PointsNes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. - xarray: bool: - (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'X'. Accepted values: ['X']. @@ -156,7 +153,7 @@ class PointsNesProvidentia(PointsNes): Grid edge latitudes dictionary with the portion of 'data' corresponding to the rank values. """ - new = PointsNesProvidentia(comm=comm, path=path, info=info, dataset=dataset, xarray=xarray, + new = PointsNesProvidentia(comm=comm, path=path, info=info, dataset=dataset, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, balanced=balanced, times=times, diff --git a/nes/nc_projections/raster_nes.py b/nes/nc_projections/raster_nes.py new file mode 100644 index 0000000..51d5e7d --- /dev/null +++ b/nes/nc_projections/raster_nes.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python + +import numpy as np +from pyproj import Proj +from .default_nes import Nes + + +class RasterNes(Nes): + + def __init__(self, comm=None, path=None, info=False, **kwargs): + super().__init__(comm=comm, path=path, info=info, dataset=None, parallel_method='Y', + balanced=False, avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, + create_nes=False, times=None, **kwargs) diff --git a/nes/nc_projections/rotated_nes.py b/nes/nc_projections/rotated_nes.py index b771985..125736d 100644 --- a/nes/nc_projections/rotated_nes.py +++ b/nes/nc_projections/rotated_nes.py @@ -36,7 +36,7 @@ class RotatedNes(Nes): Tuple with the name of the dimensions of the Longitude values. ('rlat', 'rlon') for a rotated projection. """ - def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -52,8 +52,6 @@ class RotatedNes(Nes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. - xarray: bool: - (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'Y'. Accepted values: ['X', 'Y', 'T']. @@ -76,7 +74,7 @@ class RotatedNes(Nes): super(RotatedNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, balanced=balanced, - xarray=xarray, parallel_method=parallel_method, + parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, times=times, **kwargs) @@ -102,7 +100,7 @@ class RotatedNes(Nes): self._lon_dim = ('rlat', 'rlon') @staticmethod - def new(comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', + def new(comm=None, path=None, info=False, dataset=None, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -118,8 +116,6 @@ class RotatedNes(Nes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. - xarray: bool: - (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'Y'. Accepted values: ['X', 'Y', 'T']. @@ -136,7 +132,7 @@ class RotatedNes(Nes): List of times to substitute the current ones while creation. """ - new = RotatedNes(comm=comm, path=path, info=info, dataset=dataset, xarray=xarray, + new = RotatedNes(comm=comm, path=path, info=info, dataset=dataset, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, balanced=balanced, times=times, **kwargs) diff --git a/nes/nc_projections/rotated_nested_nes.py b/nes/nc_projections/rotated_nested_nes.py index e56f427..cafc607 100644 --- a/nes/nc_projections/rotated_nested_nes.py +++ b/nes/nc_projections/rotated_nested_nes.py @@ -4,9 +4,10 @@ import numpy as np from netCDF4 import Dataset from .rotated_nes import RotatedNes + class RotatedNestedNes(RotatedNes): - def __init__(self, comm=None, path=None, info=False, dataset=None, xarray=False, parallel_method='Y', + def __init__(self, comm=None, path=None, info=False, dataset=None, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, balanced=False, times=None, **kwargs): """ @@ -22,8 +23,6 @@ class RotatedNestedNes(RotatedNes): Indicates if you want to get reading/writing info. dataset: Dataset NetCDF4-python Dataset to initialize the class. - xarray: bool: - (Not working) Indicates if you want to use xarray as default. parallel_method : str Indicates the parallelization method that you want. Default: 'Y'. Accepted values: ['X', 'Y', 'T']. @@ -46,7 +45,7 @@ class RotatedNestedNes(RotatedNes): super(RotatedNestedNes, self).__init__(comm=comm, path=path, info=info, dataset=dataset, balanced=balanced, - xarray=xarray, parallel_method=parallel_method, + parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, first_level=first_level, last_level=last_level, create_nes=create_nes, times=times, **kwargs) diff --git a/requirements.txt b/requirements.txt index a40856e..02666b6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,5 +12,5 @@ scipy>=1.7.3 filelock>=3.9.0 eccodes-python~=0.9.5 cfunits>=3.3.5 -xarray>=0.20.2 -mpi4py>=3.1.4 \ No newline at end of file +mpi4py>=3.1.4 +rasterio>=1.1.3 -- GitLab From 58d649a6d5cfe9d29f6ad2a303a7cf70bbe9e7cd Mon Sep 17 00:00:00 2001 From: ctena Date: Wed, 3 Jan 2024 15:43:47 +0100 Subject: [PATCH 2/4] self.netcdf to self.dataset --- nes/methods/horizontal_interpolation.py | 2 +- nes/methods/vertical_interpolation.py | 2 +- nes/nc_projections/default_nes.py | 62 ++++++++++---------- nes/nc_projections/points_nes.py | 2 +- nes/nc_projections/points_nes_ghost.py | 2 +- nes/nc_projections/points_nes_providentia.py | 2 +- nes/nes_formats/cmaq_format.py | 2 +- nes/nes_formats/monarch_format.py | 2 +- nes/nes_formats/wrf_chem_format.py | 2 +- 9 files changed, 38 insertions(+), 40 deletions(-) diff --git a/nes/methods/horizontal_interpolation.py b/nes/methods/horizontal_interpolation.py index 82d3141..3690843 100644 --- a/nes/methods/horizontal_interpolation.py +++ b/nes/methods/horizontal_interpolation.py @@ -83,7 +83,7 @@ def interpolate_horizontal(self, dst_grid, weight_matrix_path=None, kind='Neares # Remove original file information final_dst.__ini_path = None - final_dst.netcdf = None + final_dst.dataset = None final_dst.dataset = None # Return final_dst diff --git a/nes/methods/vertical_interpolation.py b/nes/methods/vertical_interpolation.py index 995207b..94ffd51 100644 --- a/nes/methods/vertical_interpolation.py +++ b/nes/methods/vertical_interpolation.py @@ -194,6 +194,6 @@ def interpolate_vertical(self, new_levels, new_src_vertical=None, kind='linear', # Remove original file information self.__ini_path = None - self.netcdf = None + self.dataset = None return self diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 12efaff..151f319 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -40,8 +40,6 @@ class Nes(object): Number of hours to avoid from the first original values. hours_end : int Number of hours to avoid from the last original values. - dataset : xr.Dataset - (not working) xArray Dataset. netcdf : Dataset netcdf4-python Dataset. variables : dict @@ -164,7 +162,7 @@ class Nes(object): # NetCDF object if create_nes: - self.netcdf = None + self.dataset = None # Set string length self.strlen = None @@ -202,7 +200,7 @@ class Nes(object): else: if dataset is not None: - self.netcdf = dataset + self.dataset = dataset elif self.__ini_path is not None: self.open() @@ -320,8 +318,8 @@ class Nes(object): Max length of the string data """ - if 'strlen' in self.netcdf.dimensions: - strlen = self.netcdf.dimensions['strlen'].size + if 'strlen' in self.dataset.dimensions: + strlen = self.dataset.dimensions['strlen'].size else: return None @@ -455,7 +453,7 @@ class Nes(object): """ nessy = deepcopy(self) - nessy.netcdf = None + nessy.dataset = None if copy_vars: nessy.set_communicator(self.comm) nessy.variables = deepcopy(self.variables) @@ -1433,7 +1431,7 @@ class Nes(object): Open the NetCDF. """ - self.netcdf = self.__open_netcdf4() + self.dataset = self.__open_netcdf4() return None @@ -1457,7 +1455,7 @@ class Nes(object): else: netcdf = Dataset(self.__ini_path, format="NETCDF4", mode=mode, parallel=True, comm=self.comm, info=MPI.Info()) - self.netcdf = netcdf + self.dataset = netcdf return netcdf @@ -1469,9 +1467,9 @@ class Nes(object): if self.master: self.serial_nc.close() self.serial_nc = None - if (hasattr(self, 'netcdf')) and (self.netcdf is not None): - self.netcdf.close() - self.netcdf = None + if (hasattr(self, 'netcdf')) and (self.dataset is not None): + self.dataset.close() + self.dataset = None return None @@ -1610,7 +1608,7 @@ class Nes(object): """ if self.master: - nc_var = self.netcdf.variables['time'] + nc_var = self.dataset.variables['time'] time_data, units, calendar = self.__parse_time(nc_var) # Extracting time resolution depending on the units self._time_resolution = self.__get_time_resolution_from_units(units) @@ -1644,12 +1642,12 @@ class Nes(object): if self.master: if not create_nes: - if 'time_bnds' in self.netcdf.variables.keys() or self._climatology: - time = self.netcdf.variables['time'] + if 'time_bnds' in self.dataset.variables.keys() or self._climatology: + time = self.dataset.variables['time'] if self._climatology: - nc_var = self.netcdf.variables[self._climatology_var_name] + nc_var = self.dataset.variables[self._climatology_var_name] else: - nc_var = self.netcdf.variables['time_bnds'] + nc_var = self.dataset.variables['time_bnds'] time_bnds = num2date(nc_var[:], self.__parse_time_unit(time.units), calendar=time.calendar).tolist() else: @@ -1684,12 +1682,12 @@ class Nes(object): if self.master: if not create_nes: - if 'lat_bnds' in self.netcdf.variables.keys(): - lat_bnds = {'data': self._unmask_array(self.netcdf.variables['lat_bnds'][:])} + if 'lat_bnds' in self.dataset.variables.keys(): + lat_bnds = {'data': self._unmask_array(self.dataset.variables['lat_bnds'][:])} else: lat_bnds = None - if 'lon_bnds' in self.netcdf.variables.keys(): - lon_bnds = {'data': self._unmask_array(self.netcdf.variables['lon_bnds'][:])} + if 'lon_bnds' in self.dataset.variables.keys(): + lon_bnds = {'data': self._unmask_array(self.dataset.variables['lon_bnds'][:])} else: lon_bnds = None else: @@ -1723,9 +1721,9 @@ class Nes(object): c_measures = {} if self.master: if not create_nes: - if 'cell_area' in self.netcdf.variables.keys(): + if 'cell_area' in self.dataset.variables.keys(): c_measures['cell_area'] = {} - c_measures['cell_area']['data'] = self._unmask_array(self.netcdf.variables['cell_area'][:]) + c_measures['cell_area']['data'] = self._unmask_array(self.dataset.variables['cell_area'][:]) c_measures = self.comm.bcast(c_measures, root=0) self.free_vars(['cell_area']) @@ -1756,7 +1754,7 @@ class Nes(object): dimension_name = set(possible_names).intersection(set(self.variables.keys())).pop() nc_var = self.variables[dimension_name].copy() - nc_var['data'] = self.netcdf.variables[dimension_name][:] + nc_var['data'] = self.dataset.variables[dimension_name][:] if hasattr(nc_var, 'units'): if nc_var['units'] in ['unitless', '-']: nc_var['units'] = '' @@ -1881,7 +1879,7 @@ class Nes(object): if self.master: variables = {} # Initialise data - for var_name, var_info in self.netcdf.variables.items(): + for var_name, var_info in self.dataset.variables.items(): variables[var_name] = {} variables[var_name]['data'] = None variables[var_name]['dimensions'] = var_info.dimensions @@ -1919,7 +1917,7 @@ class Nes(object): Portion of the variable data corresponding to the rank. """ - nc_var = self.netcdf.variables[var_name] + nc_var = self.dataset.variables[var_name] var_dims = nc_var.dimensions # Read data in 4 dimensions @@ -1994,10 +1992,10 @@ class Nes(object): List (or single string) of the variables to be loaded. """ - if (self.__ini_path is None) and (self.netcdf is None): + if (self.__ini_path is None) and (self.dataset is None): raise RuntimeError('Only data from existing files can be loaded.') - if self.netcdf is None: + if self.dataset is None: self.__open_dataset() close = True else: @@ -2130,8 +2128,8 @@ class Nes(object): gl_attrs = {} if not create_nes: - for attrname in self.netcdf.ncattrs(): - gl_attrs[attrname] = getattr(self.netcdf, attrname) + for attrname in self.dataset.ncattrs(): + gl_attrs[attrname] = getattr(self.dataset, attrname) return gl_attrs @@ -2705,7 +2703,7 @@ class Nes(object): if att_value is not None: if self.info: print("Rank {0:03d}: Filling {1})".format(self.rank, var_name)) - var = self.netcdf.variables[var_name] + var = self.dataset.variables[var_name] if isinstance(att_value, int) and att_value == 0: var[i_time, self.write_axis_limits['z_min']:self.write_axis_limits['z_max'], @@ -2816,7 +2814,7 @@ class Nes(object): netcdf.setncattr('Conventions', 'CF-1.7') if keep_open: - self.netcdf = netcdf + self.dataset = netcdf else: netcdf.close() diff --git a/nes/nc_projections/points_nes.py b/nes/nc_projections/points_nes.py index bca62c0..9df46e2 100644 --- a/nes/nc_projections/points_nes.py +++ b/nes/nc_projections/points_nes.py @@ -310,7 +310,7 @@ class PointsNes(Nes): Portion of the variable data corresponding to the rank. """ - nc_var = self.netcdf.variables[var_name] + nc_var = self.dataset.variables[var_name] var_dims = nc_var.dimensions # Read data in 1 or 2 dimensions diff --git a/nes/nc_projections/points_nes_ghost.py b/nes/nc_projections/points_nes_ghost.py index 2acdd81..b528edf 100644 --- a/nes/nc_projections/points_nes_ghost.py +++ b/nes/nc_projections/points_nes_ghost.py @@ -279,7 +279,7 @@ class PointsNesGHOST(PointsNes): Portion of the variable data corresponding to the rank. """ - nc_var = self.netcdf.variables[var_name] + nc_var = self.dataset.variables[var_name] var_dims = nc_var.dimensions # Read data in 1 or 2 dimensions diff --git a/nes/nc_projections/points_nes_providentia.py b/nes/nc_projections/points_nes_providentia.py index b157cb3..4a72bc0 100644 --- a/nes/nc_projections/points_nes_providentia.py +++ b/nes/nc_projections/points_nes_providentia.py @@ -316,7 +316,7 @@ class PointsNesProvidentia(PointsNes): Portion of the variable data corresponding to the rank. """ - nc_var = self.netcdf.variables[var_name] + nc_var = self.dataset.variables[var_name] var_dims = nc_var.dimensions # Read data in 1, 2 or 3 dimensions diff --git a/nes/nes_formats/cmaq_format.py b/nes/nes_formats/cmaq_format.py index f27bc77..b8b344d 100644 --- a/nes/nes_formats/cmaq_format.py +++ b/nes/nes_formats/cmaq_format.py @@ -59,7 +59,7 @@ def to_netcdf_cmaq(self, path, chunking=False, keep_open=False): # Close NetCDF if keep_open: - self.netcdf = netcdf + self.dataset = netcdf else: netcdf.close() diff --git a/nes/nes_formats/monarch_format.py b/nes/nes_formats/monarch_format.py index 0087472..e660638 100644 --- a/nes/nes_formats/monarch_format.py +++ b/nes/nes_formats/monarch_format.py @@ -73,7 +73,7 @@ def to_netcdf_monarch(self, path, chunking=False, keep_open=False): netcdf.setncattr('Conventions', 'CF-1.7') if keep_open: - self.netcdf = netcdf + self.dataset = netcdf else: netcdf.close() diff --git a/nes/nes_formats/wrf_chem_format.py b/nes/nes_formats/wrf_chem_format.py index 77bf937..1dffc8d 100644 --- a/nes/nes_formats/wrf_chem_format.py +++ b/nes/nes_formats/wrf_chem_format.py @@ -67,7 +67,7 @@ def to_netcdf_wrf_chem(self, path, chunking=False, keep_open=False): # Close NetCDF if keep_open: - self.netcdf = netcdf + self.dataset = netcdf else: netcdf.close() -- GitLab From 1a998c0e598c6a799294a0b676d595cfaae95471 Mon Sep 17 00:00:00 2001 From: ctena Date: Fri, 12 Jan 2024 13:38:35 +0100 Subject: [PATCH 3/4] Working on Raster implementation --- nes/load_nes.py | 329 +++++++++++++++++++----------- nes/nc_projections/default_nes.py | 69 ++++--- nes/nc_projections/latlon_nes.py | 52 ++--- nes/nc_projections/raster_nes.py | 270 +++++++++++++++++++++++- 4 files changed, 546 insertions(+), 174 deletions(-) diff --git a/nes/load_nes.py b/nes/load_nes.py index 0fb271e..2c37de2 100644 --- a/nes/load_nes.py +++ b/nes/load_nes.py @@ -15,56 +15,82 @@ DIM_VAR_NAMES = ['lat', 'latitude', 'lat_bnds', 'lon', 'longitude', 'lon_bnds', def open_netcdf(path, comm=None, info=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, balanced=False): """ - Open a netCDF file. + Open a NetCDF file and return a Nes object with lazily loaded variables. Parameters ---------- path : str Path to the NetCDF file to read. - comm : MPI.COMM - MPI communicator to use in that netCDF. Default: MPI.COMM_WORLD. - info : bool - Indicates if you want to print (stdout) the reading/writing steps. - avoid_first_hours : int - Number of hours to remove from first time steps. - avoid_last_hours : int - Number of hours to remove from last time steps. - parallel_method : str - Indicates the parallelization method that you want. Default: 'Y'. + comm : MPI.COMM, optional + MPI communicator to use for parallel I/O. Default: MPI.COMM_WORLD. + info : bool, optional + Indicates whether to print reading/writing steps to stdout. + avoid_first_hours : int, optional + Number of hours to remove from the beginning of time steps. + avoid_last_hours : int, optional + Number of hours to remove from the end of time steps. + parallel_method : str, optional + Parallelization method. Default: 'Y'. Accepted values: ['X', 'Y', 'T'] - balanced : bool - Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode. - first_level : int + balanced : bool, optional + Indicates if you want balanced parallelization. Note: Balanced datasets cannot be written in chunking mode. + first_level : int, optional Index of the first level to use. - last_level : int, None + last_level : int or None, optional Index of the last level to use. None if it is the last. Returns ------- Nes - Nes object. Variables read in lazy mode (only metadata). - """ + Nes object containing lazily loaded variables with metadata. - if comm is None: - comm = MPI.COMM_WORLD - else: - comm = comm + Raises + ------ + FileNotFoundError + If the specified NetCDF file at `path` does not exist. + + Notes + ----- + This function supports parallel I/O using MPI. The `comm` parameter allows you to specify an MPI communicator for coordinating processes. + + Examples + -------- + Open a NetCDF file using the default communicator: + + ```python + nes_data = open_netcdf('/path/to/data.nc', info=True, parallel_method='Y') + ``` + + Open a NetCDF file with a custom MPI communicator: + + ```python + from mpi4py import MPI + + comm = MPI.COMM_WORLD + nes_data = open_netcdf('/path/to/data.nc', comm=comm, info=True, parallel_method='Y') + ``` + + Notes + ----- + - If `comm` is not provided, the default MPI communicator `MPI.COMM_WORLD` is used. + - The `info` parameter controls whether to print reading/writing steps to the standard output. + """ + # Set default communicator if not provided + comm = comm or MPI.COMM_WORLD + # Validate file existence if not os.path.exists(path): - raise FileNotFoundError(path) + raise FileNotFoundError(f"The specified NetCDF file at '{path}' does not exist.") + # Open NetCDF dataset dataset = Dataset(path, format="NETCDF4", mode='r', parallel=False) - # Parallel is not needed for reading - # if comm.Get_size() == 1: - # dataset = Dataset(path, format="NETCDF4", mode='r', parallel=False) - # else: - # dataset = Dataset(path, format="NETCDF4", mode='r', parallel=True, comm=comm, info=MPI.Info()) + # Determine grid type and create Nes object if __is_rotated(dataset): # Rotated grids nessy = RotatedNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced) elif __is_points(dataset): if parallel_method == 'Y': warnings.warn("Parallel method cannot be 'Y' to create points NES. Setting it to 'X'") @@ -75,188 +101,228 @@ def open_netcdf(path, comm=None, info=False, parallel_method='Y', avoid_first_ho nessy = PointsNesGHOST(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced) elif __is_points_providentia(dataset): # Points - Providentia nessy = PointsNesProvidentia(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=False, - balanced=balanced,) + first_level=first_level, last_level=last_level, create_nes=False, + balanced=balanced) else: # Points - non-GHOST nessy = PointsNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced) elif __is_lcc(dataset): # Lambert conformal conic grids nessy = LCCNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced) elif __is_mercator(dataset): # Mercator grids nessy = MercatorNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced) else: # Regular grids nessy = LatLonNes(comm=comm, dataset=dataset, info=info, parallel_method=parallel_method, avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours, - first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,) + first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced) return nessy def __is_rotated(dataset): """ - Check if the netCDF is in rotated pole projection or not. + Check if the NetCDF dataset uses a rotated pole projection. Parameters ---------- - dataset : Dataset - netcdf4-python open dataset object. + dataset : netCDF4.Dataset + An open NetCDF dataset. Returns ------- - value : bool - Indicated if the netCDF is a rotated one. + bool + True if the dataset uses a rotated pole projection, False otherwise. + + Notes + ----- + This function checks for the presence of either a 'rotated_pole' variable or + dimensions 'rlat' and 'rlon' in the NetCDF dataset to determine if it is in a + rotated pole projection. """ + rotated_pole_variable = 'rotated_pole' in dataset.variables + rlat_rlon_dimensions = 'rlat' in dataset.dimensions and 'rlon' in dataset.dimensions - if 'rotated_pole' in dataset.variables.keys(): - return True - elif ('rlat' in dataset.dimensions) and ('rlon' in dataset.dimensions): - return True - else: - return False + return rotated_pole_variable or rlat_rlon_dimensions def __is_points(dataset): """ - Check if the netCDF is a points dataset in non-GHOST format or not. + Check if the NetCDF dataset represents a non-GHOST points dataset. Parameters ---------- - dataset : Dataset - netcdf4-python open dataset object. + dataset : netCDF4.Dataset + An open NetCDF dataset. Returns ------- - value : bool - Indicated if the netCDF is a points non-GHOST one. - """ + bool + True if the dataset represents a non-GHOST points dataset, False otherwise. - if 'station' in dataset.dimensions: - return True - else: - return False + Notes + ----- + This function checks for the presence of a 'station' dimension in the NetCDF dataset + to determine if it represents a points dataset in non-GHOST format. + """ + return 'station' in dataset.dimensions def __is_points_ghost(dataset): """ - Check if the netCDF is a points dataset in GHOST format or not. + Check if the NetCDF dataset represents a GHOST points dataset. Parameters ---------- - dataset : Dataset - netcdf4-python open dataset object. + dataset : netCDF4.Dataset + An open NetCDF dataset. Returns ------- - value : bool - Indicated if the netCDF is a points GHOST one. - """ + bool + True if the dataset represents a GHOST points dataset, False otherwise. - if 'N_flag_codes' in dataset.dimensions and 'N_qa_codes' in dataset.dimensions: - return True - else: - return False + Notes + ----- + This function checks for the presence of 'N_flag_codes' and 'N_qa_codes' dimensions + in the NetCDF dataset to determine if it represents a points dataset in GHOST format. + + """ + return 'N_flag_codes' in dataset.dimensions and 'N_qa_codes' in dataset.dimensions def __is_points_providentia(dataset): """ - Check if the netCDF is a points dataset in Providentia format or not. + Check if the NetCDF dataset represents a Providentia points dataset. Parameters ---------- - dataset : Dataset - netcdf4-python open dataset object. + dataset : netCDF4.Dataset + An open NetCDF dataset. Returns ------- - value : bool - Indicated if the netCDF is a points Providentia one. + bool + True if the dataset represents a Providentia points dataset, False otherwise. + + Notes + ----- + This function checks for the presence of 'grid_edge', 'model_latitude', and 'model_longitude' + dimensions in the NetCDF dataset to determine if it represents a points dataset in Providentia format. + """ - - if (('grid_edge' in dataset.dimensions) and ('model_latitude' in dataset.dimensions) - and ('model_longitude' in dataset.dimensions)): - return True - else: - return False + return ('grid_edge' in dataset.dimensions and 'model_latitude' in dataset.dimensions and + 'model_longitude' in dataset.dimensions) def __is_lcc(dataset): """ - Check if the netCDF is in Lambert Conformal Conic (LCC) projection or not. + Check if the NetCDF dataset is in Lambert Conformal Conic (LCC) projection. Parameters ---------- - dataset : Dataset - netcdf4-python open dataset object. + dataset : netCDF4.Dataset + An open NetCDF dataset. Returns ------- - value : bool - Indicated if the netCDF is a LCC one. - """ + bool + True if the dataset is in Lambert Conformal Conic (LCC) projection, False otherwise. - if 'Lambert_Conformal' in dataset.variables.keys() or 'Lambert_conformal' in dataset.variables.keys(): - return True - else: - return False + Notes + ----- + This function checks for the presence of 'Lambert_Conformal' or 'Lambert_conformal' + variables in the NetCDF dataset to determine if it is in Lambert Conformal Conic projection. + """ + return 'Lambert_Conformal' in dataset.variables or 'Lambert_conformal' in dataset.variables def __is_mercator(dataset): """ - Check if the netCDF is in Mercator projection or not. + Check if the NetCDF dataset is in Mercator projection. Parameters ---------- - dataset : Dataset - netcdf4-python open dataset object. + dataset : netCDF4.Dataset + An open NetCDF dataset. Returns ------- - value : bool - Indicated if the netCDF is a Mercator one. - """ + bool + True if the dataset is in Mercator projection, False otherwise. - if 'mercator' in dataset.variables.keys(): - return True - else: - return False + Notes + ----- + This function checks for the presence of 'mercator' variable in the NetCDF dataset + to determine if it is in Mercator projection. + + """ + return 'mercator' in dataset.variables def concatenate_netcdfs(nessy_list, comm=None, info=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, balanced=False): """ - Concatenate variables form different sources. + Concatenate variables from different sources. Parameters ---------- - nessy_list : list - List of Nes objects or list of paths to concatenate. - comm : MPI.Communicator - MPI Communicator. + nessy_list : list of Nes or str + List of Nes objects or paths to NetCDF files for concatenation. + comm : MPI.Communicator, optional + MPI Communicator. Default: None. + info : bool, optional + Indicates whether to print reading/writing steps to stdout. Default: False. + parallel_method : str, optional + Parallelization method. Default: 'Y'. Accepted values: ['X', 'Y', 'T'] + avoid_first_hours : int, optional + Number of hours to remove from the beginning of time steps. Default: 0. + avoid_last_hours : int, optional + Number of hours to remove from the end of time steps. Default: 0. + first_level : int, optional + Index of the first level to use. Default: 0. + last_level : int or None, optional + Index of the last level to use. None if it is the last. Default: None. + balanced : bool, optional + Indicates if balanced parallelization is desired. Note: Balanced datasets cannot be written in chunking mode. + Default: False. Returns ------- Nes - Nes object with all the variables. + Nes object with all the concatenated variables. + + Raises + ------ + TypeError + If the input is not a list. + FileNotFoundError + If a provided path to a NetCDF file does not exist. + ValueError + If the input list is empty. """ if not isinstance(nessy_list, list): - raise AttributeError("You must pass a list of NES objects or paths.") + raise TypeError("You must pass a list of NES objects or paths.") + + if not nessy_list: + raise ValueError("Input list is empty.") if isinstance(nessy_list[0], str): + if not os.path.exists(nessy_list[0]): + raise FileNotFoundError(nessy_list[0]) nessy_first = open_netcdf(nessy_list[0], comm=comm, parallel_method=parallel_method, @@ -270,8 +336,10 @@ def concatenate_netcdfs(nessy_list, comm=None, info=False, parallel_method='Y', nessy_first.load() else: nessy_first = nessy_list[0] - for i, aux_nessy in enumerate(nessy_list[1:]): + for aux_nessy in nessy_list[1:]: if isinstance(aux_nessy, str): + if not os.path.exists(aux_nessy): + raise FileNotFoundError(aux_nessy) nc_add = Dataset(filename=aux_nessy, mode='r') for var_name, var_info in nc_add.variables.items(): if var_name not in DIM_VAR_NAMES: @@ -324,32 +392,59 @@ def concatenate_netcdfs(nessy_list, comm=None, info=False, parallel_method='Y', return nessy_first -def open_raster(path, comm=None, info=False): +def open_raster(path, comm=None, info=False, parallel_method='Y'): """ - Open a Raster (TIF) file + Open a Raster (TIFF) file and return a Raster NES object. + + This function reads a Raster (TIFF) file, initializes a Raster NES object, and returns it. Parameters ---------- path : str - Path to the Raster (TIFF) file to read. - comm : MPI.COMM - MPI communicator to use in that netCDF. Default: MPI.COMM_WORLD. - info : bool - Indicates if you want to print (stdout) the reading/writing steps. + The path to the Raster (TIFF) file to be read. + + comm : MPI.COMM, optional + MPI communicator to use in the Raster NES object. Default: MPI.COMM_WORLD. + + info : bool, optional + Indicates whether to print the reading/writing steps to stdout. + + parallel_method : str, optional + Specifies the parallelization method to be used in the Raster NES object. Default: 'Y'. + Accepted values: ['Y', 'X']. Returns ------- RasterNes - Raster NES object + The initialized Raster NES object. + + Raises + ------ + FileNotFoundError + If the specified Raster (TIFF) file does not exist. + + RuntimeError + If an unsupported parallelization method ('T') is specified. + + Notes + ----- + - If the MPI communicator (`comm`) is not provided, it defaults to MPI.COMM_WORLD. + - The Raster NES object is created using the RasterNes class with the specified parameters. + + Example + ------- + >>> raster_path = 'path/to/your/raster.tif' + >>> raster_nes_obj = open_raster(raster_path, comm=comm, info=True, parallel_method='Y') """ if comm is None: comm = MPI.COMM_WORLD - else: - comm = comm if not os.path.exists(path): - raise FileNotFoundError(path) + raise FileNotFoundError(f"The specified Raster (TIFF) file does not exist: {path}") + + if parallel_method == 'T': + raise RuntimeError("Parallel method 'T' is not accepted for opening raster files.") - nessy = RasterNes(path=path, comm=comm, info=info) + nessy = RasterNes(path=path, comm=comm, info=info, parallel_method=parallel_method) return nessy diff --git a/nes/nc_projections/default_nes.py b/nes/nc_projections/default_nes.py index 151f319..fde8952 100644 --- a/nes/nc_projections/default_nes.py +++ b/nes/nc_projections/default_nes.py @@ -25,7 +25,7 @@ class Nes(object): Attributes ---------- - comm : MPI.Communicator. + comm : MPI.Communicator rank : int MPI rank. master : bool @@ -40,7 +40,7 @@ class Nes(object): Number of hours to avoid from the first original values. hours_end : int Number of hours to avoid from the last original values. - netcdf : Dataset + dataset : Dataset netcdf4-python Dataset. variables : dict Variables information. @@ -123,10 +123,7 @@ class Nes(object): """ # MPI Initialization - if comm is None: - self.comm = MPI.COMM_WORLD - else: - self.comm = comm + self.comm = comm or MPI.COMM_WORLD self.rank = self.comm.Get_rank() self.master = self.rank == 0 self.size = self.comm.Get_size() @@ -176,8 +173,8 @@ class Nes(object): # Complete dimensions self._time = times - self._time_bnds = self.__get_time_bnds(create_nes) - self._lat_bnds, self._lon_bnds = self.__get_coordinates_bnds(create_nes) + self._time_bnds = self._get_time_bnds(create_nes) + self._lat_bnds, self._lon_bnds = self._get_coordinates_bnds(create_nes) self._lev = {'data': np.array([0]), 'units': '', 'positive': 'up'} @@ -193,15 +190,15 @@ class Nes(object): self.lat_bnds, self.lon_bnds = self._lat_bnds, self._lon_bnds # Cell measures screening - self.cell_measures = self.__get_cell_measures(create_nes) + self.cell_measures = self._get_cell_measures(create_nes) # Set NetCDF attributes - self.global_attrs = self.__get_global_attributes(create_nes) + self.global_attrs = self._get_global_attributes(create_nes) else: if dataset is not None: self.dataset = dataset - elif self.__ini_path is not None: + elif self.get_ini_path() is not None: self.open() # Get string length @@ -214,15 +211,15 @@ class Nes(object): self._get_projection() # Complete dimensions - self._time = self.__get_time() - self._time_bnds = self.__get_time_bnds() + self._time = self._get_time() + self._time_bnds = self._get_time_bnds() self._lev = self._get_coordinate_dimension(['lev', 'level', 'lm', 'plev']) self._lat = self._get_coordinate_dimension(['lat', 'latitude', 'latitudes']) self._lon = self._get_coordinate_dimension(['lon', 'longitude', 'longitudes']) - self._lat_bnds, self._lon_bnds = self.__get_coordinates_bnds() + self._lat_bnds, self._lon_bnds = self._get_coordinates_bnds() # Complete cell measures - self._cell_measures = self.__get_cell_measures() + self._cell_measures = self._get_cell_measures() # Set axis limits for parallel reading self.read_axis_limits = self.get_read_axis_limits() @@ -243,7 +240,7 @@ class Nes(object): self.write_axis_limits = self.get_write_axis_limits() # Set NetCDF attributes - self.global_attrs = self.__get_global_attributes() + self.global_attrs = self._get_global_attributes() # Writing options self.zip_lvl = 0 @@ -341,6 +338,9 @@ class Nes(object): return None + def get_ini_path(self): + return self.__ini_path + def __del__(self): """ To delete the Nes object and close all the open datasets. @@ -1451,9 +1451,9 @@ class Nes(object): """ if self.size == 1: - netcdf = Dataset(self.__ini_path, format="NETCDF4", mode=mode, parallel=False) + netcdf = Dataset(self.get_ini_path(), format="NETCDF4", mode=mode, parallel=False) else: - netcdf = Dataset(self.__ini_path, format="NETCDF4", mode=mode, parallel=True, comm=self.comm, + netcdf = Dataset(self.get_ini_path(), format="NETCDF4", mode=mode, parallel=True, comm=self.comm, info=MPI.Info()) self.dataset = netcdf @@ -1597,14 +1597,19 @@ class Nes(object): resolution = 'hours' return resolution - def __get_time(self): + def _get_time(self): """ Get the NetCDF file time values. Returns ------- - time : List - List of times (datetime.datetime) of the NetCDF data. + List[datetime.datetime] + A list of datetime objects corresponding to the time values in the NetCDF data. + + Notes + ----- + This method is intended for internal use and is prefixed with double underscores to signify its private nature. + """ if self.master: @@ -1625,19 +1630,20 @@ class Nes(object): return time - def __get_time_bnds(self, create_nes=False): + def _get_time_bnds(self, create_nes=False): """ Get the NetCDF time bounds values. Parameters ---------- - create_nes : bool - Indicates if you want to create the object from scratch (True) or through an existing file. + create_nes : bool, optional + If True, indicates that the method will create the object from scratch. + If False (default), the method will use an existing file to obtain time bounds. Returns ------- - time_bnds : List - List of time bounds (datetime) of the NetCDF data. + List[datetime] + A list of datetime objects representing the time bounds in the NetCDF data. """ if self.master: @@ -1663,7 +1669,7 @@ class Nes(object): return time_bnds - def __get_coordinates_bnds(self, create_nes=False): + def _get_coordinates_bnds(self, create_nes=False): """ Get the NetCDF coordinates bounds values. @@ -1703,7 +1709,7 @@ class Nes(object): return lat_bnds, lon_bnds - def __get_cell_measures(self, create_nes=False): + def _get_cell_measures(self, create_nes=False): """ Get the NetCDF cell measures values. @@ -1992,11 +1998,11 @@ class Nes(object): List (or single string) of the variables to be loaded. """ - if (self.__ini_path is None) and (self.dataset is None): + if (self.get_ini_path() is None) and (self.dataset is None): raise RuntimeError('Only data from existing files can be loaded.') if self.dataset is None: - self.__open_dataset() + self.open() close = True else: close = False @@ -2110,7 +2116,7 @@ class Nes(object): return new_vars_added - def __get_global_attributes(self, create_nes=False): + def _get_global_attributes(self, create_nes=False): """ Read the netcdf global attributes. @@ -2487,7 +2493,6 @@ class Nes(object): return None - def _create_cell_measures(self, netcdf): # CELL AREA diff --git a/nes/nc_projections/latlon_nes.py b/nes/nc_projections/latlon_nes.py index 05917fc..42e0bf3 100644 --- a/nes/nc_projections/latlon_nes.py +++ b/nes/nc_projections/latlon_nes.py @@ -223,30 +223,36 @@ class LatLonNes(Nes): centre_lon : dict Dictionary with data of centre longitudes in 1D """ + if 'lat' in kwargs.keys() and 'lon' in kwargs.keys(): + # Calculate centre latitudes + centre_lat = kwargs['lat'] - # Get grid resolution - inc_lat = np.float64(self.projection_data['inc_lat']) - inc_lon = np.float64(self.projection_data['inc_lon']) - - # Get coordinates origen - lat_orig = np.float64(self.projection_data['lat_orig']) - lon_orig = np.float64(self.projection_data['lon_orig']) - - # Get number of coordinates - n_lat = int(self.projection_data['n_lat']) - n_lon = int(self.projection_data['n_lon']) - - # Calculate centre latitudes - lat_c_orig = lat_orig + (inc_lat / 2) - centre_lat = np.linspace(lat_c_orig, - lat_c_orig + (inc_lat * (n_lat - 1)), - n_lat, dtype=np.float64) - - # Calculate centre longitudes - lon_c_orig = lon_orig + (inc_lon / 2) - centre_lon = np.linspace(lon_c_orig, - lon_c_orig + (inc_lon * (n_lon - 1)), - n_lon, dtype=np.float64) + # Calculate centre longitudes + centre_lon = kwargs['lon'] + else: + # Get grid resolution + inc_lat = np.float64(self.projection_data['inc_lat']) + inc_lon = np.float64(self.projection_data['inc_lon']) + + # Get coordinates origen + lat_orig = np.float64(self.projection_data['lat_orig']) + lon_orig = np.float64(self.projection_data['lon_orig']) + + # Get number of coordinates + n_lat = int(self.projection_data['n_lat']) + n_lon = int(self.projection_data['n_lon']) + + # Calculate centre latitudes + lat_c_orig = lat_orig + (inc_lat / 2) + centre_lat = np.linspace(lat_c_orig, + lat_c_orig + (inc_lat * (n_lat - 1)), + n_lat, dtype=np.float64) + + # Calculate centre longitudes + lon_c_orig = lon_orig + (inc_lon / 2) + centre_lon = np.linspace(lon_c_orig, + lon_c_orig + (inc_lon * (n_lon - 1)), + n_lon, dtype=np.float64) return {'data': centre_lat}, {'data': centre_lon} diff --git a/nes/nc_projections/raster_nes.py b/nes/nc_projections/raster_nes.py index 51d5e7d..b23e18b 100644 --- a/nes/nc_projections/raster_nes.py +++ b/nes/nc_projections/raster_nes.py @@ -3,11 +3,277 @@ import numpy as np from pyproj import Proj from .default_nes import Nes +from .latlon_nes import LatLonNes +import rasterio as rio +import datetime +from rasterio.windows import from_bounds +from pyproj import Proj, transform +from rasterio.features import shapes class RasterNes(Nes): + """ + Attributes + ---------- + dataset : rasterio.io.DatasetReader + """ - def __init__(self, comm=None, path=None, info=False, **kwargs): - super().__init__(comm=comm, path=path, info=info, dataset=None, parallel_method='Y', + def __init__(self, comm=None, path=None, info=False, parallel_method='Y', **kwargs): + self.window = None + + super().__init__(comm=comm, path=path, info=info, dataset=None, parallel_method=parallel_method, balanced=False, avoid_first_hours=0, avoid_last_hours=0, first_level=0, last_level=None, create_nes=False, times=None, **kwargs) + + def open(self): + """ + Open the NetCDF. + """ + + self.dataset = self.__open_raster() + + return None + + def __open_raster(self): + """ + + Returns + ------- + rasterio.io.DatasetReader + """ + raster = rio.open(self.get_ini_path(), 'r') + self.dataset = raster + + return raster + + def _get_strlen(self): + """ + Get the strlen + + Returns + ------- + int + Max length of the string data + """ + return None + + def _get_lazy_variables(self): + """ + Get all the variables' information. + + Returns + ------- + variables : dict + Dictionary with the variable name as key and another dictionary as value. + De value dictionary will have the 'data' key with None as value and all the variable attributes as the + other keys. + e.g. + {'var_name_1': {'data': None, 'attr_1': value_1_1, 'attr_2': value_1_2, ...}, + 'var_name_2': {'data': None, 'attr_1': value_2_1, 'attr_2': value_2_2, ...}, + ...} + """ + + if self.master: + variables = {} + # Initialise data + for band, units in enumerate(self.dataset.units): + var_name = "band_{0}".format(band + 1) + variables[var_name] = {} + variables[var_name]['data'] = None + variables[var_name]['dimensions'] = ('lat', 'lon') + variables[var_name]['dtype'] = self.dataset.dtypes[band] + if units is None: + units = '-' + variables[var_name]['units'] = units + + else: + variables = None + variables = self.comm.bcast(variables, root=0) + + return variables + + def _get_time(self): + """ + It is assumed that the Raster has no time information. + Setting time to 01/01/2000. + + Returns + ------- + List[datetime.datetime] + A list of datetime objects corresponding to the time values in the Raster data. + """ + time = [datetime.datetime(year=2000, month=1, day=1, hour=0, minute=0)] + + return time + + def _get_time_bnds(self, create_nes=False): + """ + Get the Raster time bounds values. + + Parameters + ---------- + create_nes : bool, optional + If True, indicates that the method will create the object from scratch. + If False (default), the method will use an existing file to obtain time bounds. + + Returns + ------- + None + """ + return None + + def _get_coordinates_bnds(self, create_nes=False): + return None, None + + def _get_cell_measures(self, create_nes=False): + """ + Get the Raster cell measures values. + + Parameters + ---------- + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + + Returns + ------- + dict + Dictionary of cell measures of the Raster data. + """ + + return {} + + def _get_global_attributes(self, create_nes=False): + """ + Read the netcdf global attributes. + + Parameters + ---------- + create_nes : bool + Indicates if you want to create the object from scratch (True) or through an existing file. + + Returns + ------- + gl_attrs : dict + Dictionary with the netCDF global attributes. + """ + + gl_attrs = self.dataset.meta + + # Removing None + for key, value in gl_attrs.items(): + if value is None: + gl_attrs[key] = '-' + gl_attrs[key] = '-' + + return gl_attrs + + def sel(self, hours_start=None, time_min=None, hours_end=None, time_max=None, lev_min=None, lev_max=None, + lat_min=None, lat_max=None, lon_min=None, lon_max=None): + """ + Select a slice of time, lev, lat or lon given a minimum and maximum limits. + """ + if (hours_start is not None or time_min is not None or hours_end is not None or time_max is not None or + lev_min is not None or lev_max is not None): + raise TypeError("Raster only can be selected by lat lon values.") + + # Rasterio reads from top to bottom, contrary to NetCDF. Thus, the latitudes and data need to be flipped. + # Calculate the inverse rank to ensure the first rank loads the bottom part of the raster instead of the top + inverse_rank = self.size - self.rank - 1 + # Define the geographic coordinates of the entire dataset + left, bottom, right, top = (lon_min, lat_min, lon_max, lat_max) + + # Create a window using the bounds for the entire dataset + full_dataset_window = from_bounds(left, bottom, right, top, self.dataset.transform) + + # Calculate the width and height of each sub-window + if self.parallel_method == 'Y': + sub_window_width = full_dataset_window.width + sub_window_height = full_dataset_window.height // self.size + + start_col = full_dataset_window.col_off + start_row = (inverse_rank * sub_window_height) + full_dataset_window.row_off + + self.read_axis_limits = {'x_min': 0, 'x_max': None, + 'y_min': int(self.rank * sub_window_height), + 'y_max': int((self.rank + 1) * sub_window_height), + 'z_min': 0, 'z_max': None, 't_min': 0, 't_max': None} + self.write_axis_limits = self.read_axis_limits.copy() + elif self.parallel_method == 'X': + sub_window_width = full_dataset_window.width // self.size + sub_window_height = full_dataset_window.height + + start_col = (inverse_rank * sub_window_width) + full_dataset_window.col_off + start_row = full_dataset_window.row_off + + self.read_axis_limits = {'x_min': int(self.rank * sub_window_width), + 'x_max': int((self.rank + 1) * sub_window_width), + 'y_min': 0, 'y_max': None, + 'z_min': 0, 'z_max': None, 't_min': 0, 't_max': None} + self.write_axis_limits = self.read_axis_limits.copy() + else: + raise RuntimeError("Parallel method only accepted X or Y") + + # Create a window for each MPI process + subset_window = rio.windows.Window(start_col, start_row, sub_window_width, sub_window_height) + full_transform = self.dataset.window_transform(full_dataset_window) + + # Generate arrays of latitude and longitude values using np.linspace + subset_y = np.linspace(full_transform.f, + full_transform.f + full_transform.e * int(full_dataset_window.height), + int(full_dataset_window.height)) + subset_x = np.linspace(full_transform.c, + full_transform.c + full_transform.a * int(full_dataset_window.width), + int(full_dataset_window.width)) + + subset_y = np.flip(subset_y, axis=0) + + print(full_transform) + print(self.dataset.transform) + # Set the window and coordinate arrays as attributes + self.window = subset_window + print(self.dataset.crs.to_string()) + print(self.dataset.crs.to_epsg()) + if self.dataset.crs.to_string() == 'EPSG:4326': + self._lat = {'data': subset_y} + self._lon = {'data': subset_x} + + self.lat = self._get_coordinate_values(self._lat, 'Y') + self.lon = self._get_coordinate_values(self._lon, 'X') + + self._var_dim = ('lat', 'lon') + self._lat_dim = ('lat',) + self._lon_dim = ('lon',) + else: + raise RuntimeError("Only EPSG:4326 supported for this raster") + + return subset_window + + def _read_variable(self, var_name): + """ + Read a portion of the variable data based on the current rank. + + Parameters + ---------- + var_name : str + Name of the variable to read. + + Returns + ------- + np.array + A portion of the variable data corresponding to the current rank. + """ + var_data = self.dataset.read(int(var_name[-1]), window=self.window) + + # Flip data + var_data = np.flipud(var_data) + # 2D to 4D + var_data = np.expand_dims(var_data, axis=(0, 1)) + + return var_data + + def to_latlon_nes(self): + return LatLonNes( + comm=self.comm, path=self.get_ini_path(), info=self.info, dataset=None, + parallel_method=self.parallel_method, create_nes=True, balanced=self.balanced, times=self.time, + levels=self.lev, lat=self.lat, lat_bnds=self.lat_bnds, lon=self.lon, lon_bnds=self.lon_bnds, + vars=self.variables) -- GitLab From 36e4b2ea80b3a319dfd2d09df3618747e69d980b Mon Sep 17 00:00:00 2001 From: ctena Date: Wed, 20 Mar 2024 12:00:36 +0100 Subject: [PATCH 4/4] Added some chenges that comes from the raster implementation but there are not related with the raster. Those are code improvements as removing xarray dependence. --- nes/load_nes.py | 1 + nes/nc_projections/raster_nes.py | 1 + 2 files changed, 2 insertions(+) diff --git a/nes/load_nes.py b/nes/load_nes.py index 2c37de2..72eb61d 100644 --- a/nes/load_nes.py +++ b/nes/load_nes.py @@ -436,6 +436,7 @@ def open_raster(path, comm=None, info=False, parallel_method='Y'): >>> raster_path = 'path/to/your/raster.tif' >>> raster_nes_obj = open_raster(raster_path, comm=comm, info=True, parallel_method='Y') """ + raise NotImplementedError('Raster NES object') if comm is None: comm = MPI.COMM_WORLD diff --git a/nes/nc_projections/raster_nes.py b/nes/nc_projections/raster_nes.py index b23e18b..79515f3 100644 --- a/nes/nc_projections/raster_nes.py +++ b/nes/nc_projections/raster_nes.py @@ -19,6 +19,7 @@ class RasterNes(Nes): """ def __init__(self, comm=None, path=None, info=False, parallel_method='Y', **kwargs): + raise NotImplementedError("RasterNes is not implemented yet") self.window = None super().__init__(comm=comm, path=path, info=info, dataset=None, parallel_method=parallel_method, -- GitLab