#!/usr/bin/env python
import os
import sys
from mpi4py import MPI
from netCDF4 import Dataset
import warnings
import numpy as np
from .nc_projections import *
DIM_VAR_NAMES = ['lat', 'latitude', 'lat_bnds', 'lon', 'longitude', 'lon_bnds', 'time', 'time_bnds', 'lev', 'level',
'cell_area', 'crs', 'rotated_pole', 'x', 'y', 'rlat', 'rlon', 'Lambert_conformal', 'mercator']
[docs]
def open_netcdf(path, comm=None, xarray=False, info=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0,
first_level=0, last_level=None, balanced=False):
"""
Open a netCDF file.
Parameters
----------
path : str
Path to the NetCDF file to read.
comm : MPI.COMM
MPI communicator to use in that netCDF. Default: MPI.COMM_WORLD.
xarray : bool
(Not working) Indicates if you want to use xarray. Default: False.
info : bool
Indicates if you want to print (stdout) the reading/writing steps.
avoid_first_hours : int
Number of hours to remove from first time steps.
avoid_last_hours : int
Number of hours to remove from last time steps.
parallel_method : str
Indicates the parallelization method that you want. Default: 'Y'.
Accepted values: ['X', 'Y', 'T']
balanced : bool
Indicates if you want a balanced parallelization or not. Balanced dataset cannot be written in chunking mode.
first_level : int
Index of the first level to use.
last_level : int, None
Index of the last level to use. None if it is the last.
Returns
-------
Nes
Nes object. Variables read in lazy mode (only metadata).
"""
if comm is None:
comm = MPI.COMM_WORLD
else:
comm = comm
if not os.path.exists(path):
raise FileNotFoundError(path)
if xarray:
dataset = None
else:
# dataset = Dataset(path, format="NETCDF4", mode='r', parallel=False)
if comm.Get_size() == 1:
dataset = Dataset(path, format="NETCDF4", mode='r', parallel=False)
else:
dataset = Dataset(path, format="NETCDF4", mode='r', parallel=True, comm=comm, info=MPI.Info())
if __is_rotated(dataset):
# Rotated grids
nessy = RotatedNes(comm=comm, dataset=dataset, xarray=xarray, info=info, parallel_method=parallel_method,
avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours,
first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,)
elif __is_points(dataset):
if parallel_method == 'Y':
warnings.warn("Parallel method cannot be 'Y' to create points NES. Setting it to 'X'")
sys.stderr.flush()
parallel_method = 'X'
if __is_points_ghost(dataset):
# Points - GHOST
nessy = PointsNesGHOST(comm=comm, dataset=dataset, xarray=xarray, info=info,
parallel_method=parallel_method,
avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours,
first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,)
elif __is_points_providentia(dataset):
# Points - Providentia
nessy = PointsNesProvidentia(comm=comm, dataset=dataset, xarray=xarray, info=info,
parallel_method=parallel_method,
avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours,
first_level=first_level, last_level=last_level, create_nes=False,
balanced=balanced,)
else:
# Points - non-GHOST
nessy = PointsNes(comm=comm, dataset=dataset, xarray=xarray, info=info, parallel_method=parallel_method,
avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours,
first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,)
elif __is_lcc(dataset):
# Lambert conformal conic grids
nessy = LCCNes(comm=comm, dataset=dataset, xarray=xarray, info=info, parallel_method=parallel_method,
avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours,
first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,)
elif __is_mercator(dataset):
# Mercator grids
nessy = MercatorNes(comm=comm, dataset=dataset, xarray=xarray, info=info, parallel_method=parallel_method,
avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours,
first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,)
else:
# Regular grids
nessy = LatLonNes(comm=comm, dataset=dataset, xarray=xarray, info=info, parallel_method=parallel_method,
avoid_first_hours=avoid_first_hours, avoid_last_hours=avoid_last_hours,
first_level=first_level, last_level=last_level, create_nes=False, balanced=balanced,)
return nessy
def __is_rotated(dataset):
"""
Check if the netCDF is in rotated pole projection or not.
Parameters
----------
dataset : Dataset
netcdf4-python open dataset object.
Returns
-------
value : bool
Indicated if the netCDF is a rotated one.
"""
if 'rotated_pole' in dataset.variables.keys():
return True
elif ('rlat' in dataset.dimensions) and ('rlon' in dataset.dimensions):
return True
else:
return False
def __is_points(dataset):
"""
Check if the netCDF is a points dataset in non-GHOST format or not.
Parameters
----------
dataset : Dataset
netcdf4-python open dataset object.
Returns
-------
value : bool
Indicated if the netCDF is a points non-GHOST one.
"""
if 'station' in dataset.dimensions:
return True
else:
return False
def __is_points_ghost(dataset):
"""
Check if the netCDF is a points dataset in GHOST format or not.
Parameters
----------
dataset : Dataset
netcdf4-python open dataset object.
Returns
-------
value : bool
Indicated if the netCDF is a points GHOST one.
"""
if 'N_flag_codes' in dataset.dimensions and 'N_qa_codes' in dataset.dimensions:
return True
else:
return False
def __is_points_providentia(dataset):
"""
Check if the netCDF is a points dataset in Providentia format or not.
Parameters
----------
dataset : Dataset
netcdf4-python open dataset object.
Returns
-------
value : bool
Indicated if the netCDF is a points Providentia one.
"""
if (('grid_edge' in dataset.dimensions) and ('model_latitude' in dataset.dimensions)
and ('model_longitude' in dataset.dimensions)):
return True
else:
return False
def __is_lcc(dataset):
"""
Check if the netCDF is in Lambert Conformal Conic (LCC) projection or not.
Parameters
----------
dataset : Dataset
netcdf4-python open dataset object.
Returns
-------
value : bool
Indicated if the netCDF is a LCC one.
"""
if 'Lambert_Conformal' in dataset.variables.keys() or 'Lambert_conformal' in dataset.variables.keys():
return True
else:
return False
def __is_mercator(dataset):
"""
Check if the netCDF is in Mercator projection or not.
Parameters
----------
dataset : Dataset
netcdf4-python open dataset object.
Returns
-------
value : bool
Indicated if the netCDF is a Mercator one.
"""
if 'mercator' in dataset.variables.keys():
return True
else:
return False
[docs]
def concatenate_netcdfs(nessy_list, comm=None, info=False, parallel_method='Y', avoid_first_hours=0, avoid_last_hours=0,
first_level=0, last_level=None, balanced=False):
"""
Concatenate variables form different sources.
Parameters
----------
nessy_list : list
List of Nes objects or list of paths to concatenate.
comm : MPI.Communicator
MPI Communicator.
Returns
-------
Nes
Nes object with all the variables.
"""
if not isinstance(nessy_list, list):
raise AttributeError("You must pass a list of NES objects or paths.")
if isinstance(nessy_list[0], str):
nessy_first = open_netcdf(nessy_list[0],
comm=comm,
parallel_method=parallel_method,
info=info,
avoid_first_hours=avoid_first_hours,
avoid_last_hours=avoid_last_hours,
first_level=first_level,
last_level=last_level,
balanced=balanced
)
nessy_first.load()
else:
nessy_first = nessy_list[0]
for i, aux_nessy in enumerate(nessy_list[1:]):
if isinstance(aux_nessy, str):
nc_add = Dataset(filename=aux_nessy, mode='r')
for var_name, var_info in nc_add.variables.items():
if var_name not in DIM_VAR_NAMES:
nessy_first.variables[var_name] = {}
var_dims = var_info.dimensions
# Read data in 4 dimensions
if len(var_dims) < 2:
data = var_info[:]
elif len(var_dims) == 2:
data = var_info[nessy_first.read_axis_limits['y_min']:nessy_first.read_axis_limits['y_max'],
nessy_first.read_axis_limits['x_min']:nessy_first.read_axis_limits['x_max']]
data = data.reshape(1, 1, data.shape[-2], data.shape[-1])
elif len(var_dims) == 3:
if 'strlen' in var_dims:
data = var_info[nessy_first.read_axis_limits['y_min']:nessy_first.read_axis_limits['y_max'],
nessy_first.read_axis_limits['x_min']:nessy_first.read_axis_limits['x_max'],
:]
data_aux = np.empty(shape=(data.shape[0], data.shape[1]), dtype=np.object)
for lat_n in range(data.shape[0]):
for lon_n in range(data.shape[1]):
data_aux[lat_n, lon_n] = ''.join(
data[lat_n, lon_n].tostring().decode('ascii').replace('\x00', ''))
data = data_aux.reshape((1, 1, data_aux.shape[-2], data_aux.shape[-1]))
else:
data = var_info[nessy_first.read_axis_limits['t_min']:nessy_first.read_axis_limits['t_max'],
nessy_first.read_axis_limits['y_min']:nessy_first.read_axis_limits['y_max'],
nessy_first.read_axis_limits['x_min']:nessy_first.read_axis_limits['x_max']]
data = data.reshape(data.shape[-3], 1, data.shape[-2], data.shape[-1])
elif len(var_dims) == 4:
data = var_info[nessy_first.read_axis_limits['t_min']:nessy_first.read_axis_limits['t_max'],
nessy_first.read_axis_limits['z_min']:nessy_first.read_axis_limits['z_max'],
nessy_first.read_axis_limits['y_min']:nessy_first.read_axis_limits['y_max'],
nessy_first.read_axis_limits['x_min']:nessy_first.read_axis_limits['x_max']]
else:
raise TypeError("{} data shape is nto accepted".format(var_dims))
nessy_first.variables[var_name]['data'] = data
# Avoid some attributes
for attrname in var_info.ncattrs():
if attrname not in ['missing_value', '_FillValue']:
value = getattr(var_info, attrname)
if value in ['unitless', '-']:
value = ''
nessy_first.variables[var_name][attrname] = value
nc_add.close()
else:
nessy_first.concatenate(aux_nessy)
return nessy_first