CST_SaveExp.R 20.8 KB
Newer Older
#'Save CSTools objects of class 's2dv_cube' containing experiments or observed 
#'data in NetCDF format
nperez's avatar
nperez committed
#'
#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es}
#'
Filippo Calì Quaglia's avatar
Filippo Calì Quaglia committed
#'@description This function allows to divide and save a object of class 
#''s2dv_cube' into a NetCDF file, allowing to reload the saved data using 
#'\code{CST_Load} function.
nperez's avatar
nperez committed
#'
#'@param data An object of class \code{s2dv_cube}.
#'@param destination A character string containing the directory name in which 
#'  to save the data. NetCDF file for each starting date are saved into the 
#'  folder tree: \cr
#'  destination/experiment/variable/. By default the function 
#'  creates and saves the data into the folder "CST_Data" in the working 
#'  directory.
#'@param extra_string A character string to be include as part of the file name, 
#'  for instance, to identify member or realization. It would be added to the 
#'  file name between underscore characters.
nperez's avatar
nperez committed
#'
#'@seealso \code{\link{CST_Load}}, \code{\link{as.s2dv_cube}} and \code{\link{s2dv_cube}}
nperez's avatar
nperez committed
#'
#'@import ncdf4
nperez's avatar
nperez committed
#'@importFrom s2dv Reorder InsertDim
#'@import multiApply
nperez's avatar
nperez committed
#'
#'@examples
#'\dontrun{
#'library(CSTools)
#'data <- lonlat_temp$exp
#'destination <- "./path2/"
nperez's avatar
nperez committed
#'CST_SaveExp(data = data, destination = destination)
#'}
#'
#'@export
CST_SaveExp <- function(data, destination = "./CST_Data", unique_file = TRUE) {
  # Check 's2dv_cube'
nperez's avatar
nperez committed
  if (!inherits(data, 's2dv_cube')) {
    stop("Parameter 'data' must be of the class 's2dv_cube', ",
         "as output by CSTools::CST_Load.")
  }
  # Check object structure
  if (!all(c('data', 'coords', 'attrs') %in% names(data))) {
    stop("Parameter 'data' must have 'data', 'coords' and 'attrs' elements ",
         "within the 's2dv_cube' structure.")
  }
  # sdate_name
  if (!any(names(data$coords) %in% .KnownStartDateNames())) {
    stop("Start date dimension name do not match with any of the ", 
         "accepted names by the package.")
  }
  sdate_name <- names(data$coords)[[which(names(data$coords) %in% .KnownStartDateNames())]]
  if (length(sdate_name) > 1) {
    warning("Found more than one start date dimension. Only the first one ", 
            "will be used.")
    sdate_name <- sdate_name[1]
  }
  sdates <- data$coords[[sdate_name]]
  # var_name
  if (!is.character(data$attrs$Variable$varName)) {
    stop("Element 'varName' mustbe a character string.")
  }
  var_name <- data$attrs$Variable$varName
  # metadata
  if (!inherits(data$attrs$Variable$metadata, 'list')) {
    stop("Element $attrs$Variable$metadata must be a list.")
  }
  if (!any(names(data$attrs$Variable$metadata) %in% names(data$coords))) {
    warning("Metadata is not found for any coordinate.")
  } else if (!any(names(data$attrs$Variable$metadata) %in% 
                  data$attrs$Variable$varName)) {
    warning("Metadata is not found for any variable.")
  }
  # Coordinate attributes
  if (!any(names(data$coords) %in% .KnownLonNames()) | 
      !any(names(data$coords) %in% .KnownLatNames())) {
    stop("Spatial coordinate names do not match any of the names accepted by ",
         "the package.")
  }
  lon_name <- names(data$coords)[[which(names(data$coords) %in% .KnownLonNames())]]
  lat_name <- names(data$coords)[[which(names(data$coords) %in% .KnownLatNames())]]
  # Dates
  time_values <- data$attrs$Dates
  if (is.null(dim(time_values))) {
    stop("Dates element in '$data$attrs$Dates' must have time dimensios.")
  }

  SaveExp(data = data$data,
            destination = destination, 
            lon = data$coords[[lon_name]],
            lat = data$coords[[lat_name]],
            startdates = data$coords[[sdate_name]],
            Dates = data$attrs$Dates, 
            Dataset = data$attrs$Datasets, 
            var_name = data$attrs$Variable$varName,
            metadata = data$attrs$Variable$metadata,
            extra_string = extra_string, 
            unique_file = unique_file)
}
#'Save an experiment in a format compatible with CST_Load
#'@description This function is created for compatibility with CST_Load/Load for 
#'saving post-processed datasets such as those calibrated of downscaled with 
#'CSTools functions
#'
#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es}
#'
#'@param data An multi-dimensional array with named dimensions (longitude, 
#'  latitude, time, member, sdate).
#'@param lon Vector of logitud corresponding to the longitudinal dimension in 
#'  data.
#'@param lat Vector of latitud corresponding to the latitudinal dimension in 
#'  data.
#'@param Dataset A vector of character string indicating the names of the 
#'  datasets.
#'@param var_name A character string indicating the name of the variable to be 
#'  saved.
#'@param units A character string indicating the units of the variable.
#'@param startdates A vector of dates indicating the initialization date of each 
#'  simulations.
#'@param Dates A matrix of dates with two dimension 'time' and 'sdate'.
#'@param cdo_grid_name A character string indicating the name of the grid e.g.: 
#'  'r360x181'
#'@param projection A character string indicating the projection name.
#'@param destination A character string indicating the path where to store the 
#'  NetCDF files.
#'@param extra_string A character string to be include as part of the file name, 
#'  for instance, to identify member or realization.
#'@return The function creates as many files as sdates per dataset. Each file 
#'could contain multiple members. It would be added to the file name between 
#'underscore characters. The path will be created with the name of the variable 
#'and each Datasets.
nperez's avatar
nperez committed
#'@importFrom s2dv Reorder InsertDim
#'@import multiApply
#'
#'@examples
#'\dontrun{
#'data <- lonlat_temp$exp$data
#'lon <- lonlat_temp$exp$lon
#'lat <- lonlat_temp$exp$lat
#'Dataset <- 'XXX'
#'var_name <- 'tas'
#'units <- 'k'
#'startdates <- lapply(1:length(lonlat_temp$exp$Datasets),
#'                         lonlat_temp$exp$Datasets[[x]]$InitializationDates[[1]]})[[1]]
#'Dates <- lonlat_temp$exp$Dates$start
#'dim(Dates) <- c(time = length(Dates)/length(startdates), sdate = length(startdates))
#'cdo_grid_name = attr(lonlat_temp$exp$lon, 'cdo_grid_name')
#'projection = attr(lonlat_temp$exp$lon, 'projection')
#'destination = './path/'
#'SaveExp(data, lon, lat, Dataset, var_name, units, startdates, Dates,
#'                    cdo_grid_name, projection, destination)
SaveExp <- function(data, destination = NULL, lon = NULL, lat = NULL, 
                      startdates = NULL, Dates = NULL, Dataset = NULL, var_name = NULL, 
                      extra_string = NULL, 
                      metadata = NULL, unique_file = FALSE) {
  ## Initial input parameter checks:
  # data
  if (is.null(data)) {
    stop("Parameter 'data' cannot be NULL.")
  }
  dimnames <- names(dim(data))
  if (is.null(dimnames)) {
    stop("Parameter 'data' must have named dimensions.")
  }
  # destination
  if (!is.character(destination) & length(destination) > 1) {
    stop("Parameter 'destination' must be a character string of one element ",
         "indicating the name of the file (including the folder if needed) ",
         "where the data will be saved.")
  }
  # longitude and latitude
  if (is.null(lon) | is.null(lat)) {
    stop("Parameters 'lon' and 'lat' cannot be NULL.")
  }
  # startdates
  if (is.null(startdates)) {
    stop("Parameter 'startdates' cannot be NULL.")
  }
  # Dates
  if (is.null(Dates)) {
    stop("Parameter 'Dates' cannot be NULL.")
  }
  # var_name
  if (is.null(var_name)) {
    warning("Parameter 'var_name' is NULL. It will be assigned to 'X'.")
    var_name <- 'X'
  } else if (length(var_name) > 1) {
    multiple_vars <- TRUE
  } else {
    multiple_vars <- FALSE
  }
  if (!all(sapply(var_name, is.character))) {
    stop("Parameter 'var_name' must be a character string with the ",
         "variable names.")
  }
nperez's avatar
nperez committed

  ## Dimensions checks:
  # Spatial coordinates
  if (!any(dimnames %in% .KnownLonNames()) | 
      !any(dimnames %in% .KnownLatNames())) {
    stop("Spatial coordinate names do not match any of the names accepted by ",
         "the package.")
  }
  lon_name <- dimnames[[which(dimnames %in% .KnownLonNames())]]
  lat_name <- dimnames[[which(dimnames %in% .KnownLatNames())]]
  if (length(lon_name) > 1) {
    warning("Found more than one longitudinal dimension. Only the first one ", 
            "will be used.")
    lon_name <- lon_name[1]
  }
  if (length(lat_name) > 1) {
    warning("Found more than one latitudinal dimension. Only the first one ", 
            "will be used.")
    lat_name <- lat_name[1]
  }
  # ftime_name
  if (!any(dimnames %in% .KnownForecastTimeNames())) {
    stop("Forecast time dimension name do not match with any of the ", 
         "accepted names by the package.")
  }
  ftime_name <- dimnames[[which(dimnames %in% .KnownForecastTimeNames())]]
  if (length(ftime_name) > 1) {
    warning("Found more than one forecast time dimension. Only the first one ", 
            "will be used.")
    ftime_name <- ftime_name[1]
  }
  # sdate_name
  if (!any(dimnames %in% .KnownStartDateNames())) {
    stop("Start date dimension name do not match with any of the ", 
         "accepted names by the package.")
  }
  sdate_name <- dimnames[[which(dimnames %in% .KnownStartDateNames())]]
  if (length(sdate_name) == 0) {
    stop("Start date dimension name do not match with any of the ", 
         "accepted names by the package. The accepted start date names ", 
         "are: 'sdate', 'sdates', 'syear', 'sweek' and 'sday'.")
  } else if (length(sdate_name) > 1) {
    warning("Found more than one start date dimension. Only the first one ", 
            "will be used.")
    sdate_name <- sdate_name[1]
  }
  sdate_pos <- which(dimnames == sdate_name)
  # memb_dim
  if (!any(dimnames %in% .KnownMemberNames())) {
    stop("Member dimension name do not match with any of the ", 
         "accepted names by the package.")
  }
  memb_name <- dimnames[[which(dimnames %in% .KnownMemberNames())]]
  if (length(memb_name) > 1) {
    warning("Found more than one member dimension. Only the first one ", 
            "will be used.")
    memb_name <- memb_name[1]
  }
  if (any(dimnames == 'level')) {
    stop("Ask for saving 3Dim fields to the mantainer.")
nperez's avatar
nperez committed
  }
  if (!is.null(extra_string)) {
    if (!is.character(extra_string)) {
      stop("Parameter 'extra_string' must be a character string.")
    }
  }
  # dataset
  dataset_pos <- which(dimnames %in% c('dataset', 'dat'))
  dat_dim_name <- names(dim(data))[dataset_pos]
nperez's avatar
nperez committed
  dims <- dim(data)
nperez's avatar
nperez committed
  if (length(dataset_pos) == 0) {
    warning("Dataset dimension is not found in 'data'. ",
nperez's avatar
nperez committed
            "All data is stored in the same 'dataset' folder.")
    data <- InsertDim(data, posdim = 1, lendim = 1, name = "dataset")
    dimnames <- names(dim(data))
    dataset_pos <- 1
nperez's avatar
nperez committed
  } else if (length(dataset_pos) > 1) {
    stop("There is more than one 'dataset' dimension in data.")
nperez's avatar
nperez committed
  }
nperez's avatar
nperez committed
  n_datasets <- dim(data)[dataset_pos] # number of folder by dataset
  # dataset names
  if (is.null(Dataset)) {
    warning("Parameter 'Dataset' is NULL. Files will be saved with a ", 
            "directory name of 'XXX'.")
    Dataset <- rep('XXX', n_datasets )
  }
nperez's avatar
nperez committed
  datasets <- Dataset
nperez's avatar
nperez committed
  if (n_datasets > length(datasets)) {
    warning("Dimension 'dataset' in 'data' is greater than those listed in ",
            "element 'Datasets' and the first element will be reused.")
nperez's avatar
nperez committed
    datasets <- c(datasets, rep(datasets[1], n_datasets - length(datasets)))
  } else if (n_datasets < length(datasets)) {
    warning("Dimension 'dataset' in 'data' is smaller than those listed in ",
            "element 'Datasets' and only the firsts elements will be used.")
    datasets <- datasets[1:n_datasets]
  # From Load
  if (inherits(datasets, 'list')) {
    datasets <- names(datasets)
nperez's avatar
nperez committed
  }
  # var
  var_dim <- NULL
  if (any(c('var', 'vars', 'variable', 'variables') %in% dimnames)) {
    var_dim <- which(dimnames %in% c('var', 'vars', 'variable', 'variables'))
    var_dim_name <- names(dim(data))[var_dim]
    if (length(var_dim) > 1) {
      warning("There is more than one 'variable' dimension in data.")
      var_dim <- var_dim[1]
    }
    if (dims[var_dim] == 1) {
      data <- adrop(data, drop = var_dim)
      dimnames <- names(dim(data))
      var_dim <- NULL
      num_vars <- 1
    } else {
      num_vars <- dim(data)[var_dim]
    }
  } else {
    num_vars <- 1
    var_dim_name <- NULL
nperez's avatar
nperez committed
  }
  known_dim_names <- c("var", "vars", "dat", "dataset", "nlevel", "levels", "level", 
                       .KnownLatNames(), .KnownLonNames(), .KnownForecastTimeNames(), 
                       .KnownStartDateNames())
  dim_names <- names(dim(data))
  if (any(dim_names != c(dat_dim_name, var_dim_name, sdate_name, lon_name, 
                         lat_name, memb_name, ftime_name))) {
    data <- Reorder(data, c(dat_dim_name, var_dim_name, sdate_name, lon_name, 
                            lat_name, memb_name, ftime_name))
  }
  # Dimensions definition
  defined_dims <- NULL
  filedims <- names(dim(data))[which(!names(dim(data)) %in% c(dat_dim_name, var_dim_name, 
                                                              sdate_name, ftime_name))]
  for (i_coord in filedims) {
    if (i_coord %in% names(metadata)) {
      dim_info <- list()
      if ('variables' %in% names(attributes(metadata[[i_coord]]))) {
        i_coord_info <- attributes(metadata[[i_coord]])[['variables']][[i_coord]]$dim[[1]]
      } else if (inherits(metadata[[i_coord]], 'list')) {
        i_coord_info <- metadata[[i_coord]]
      } else if (!is.null(attributes(metadata[[i_coord]]))) {
        i_coord_info <- attributes(metadata[[i_coord]])
      } else {
        stop("Metadata is not correct.")
      }
      # name
      dim_info[['name']] <- i_coord
      # len
      if ('len' %in% names(i_coord_info)) {
        if (i_coord_info[['len']] != dim(data)[i_coord]) {
          dim_info[['original_len']] <- i_coord_info[['len']]
        }   
      }
      dim_info[['len']] <- as.numeric(dim(data)[i_coord])
      # unlim
      if (!('unlim' %in% names(i_coord_info))) {
        dim_info[['unlim']] <- ifelse(dim_info[['name']] == 'time', TRUE, FALSE)
      } else {
        dim_info[['unlim']] <- i_coord_info[['unlim']][1]
      }
      # units
      if (!('units' %in% names(i_coord_info))) {
        dim_info[['units']] <- ''
      } else {
        dim_info[['units']] <- i_coord_info[['units']][1]
      }
      # vals
      # Change this part if there are coords element!!
      if (i_coord == lon_name) {
        dim_info[['vals']] <- as.vector(lon)
      } else if (i_coord == lat_name) {
        dim_info[['vals']] <- as.vector(lat)
      } else {
        dim_info[['vals']] <- 1:dim(data)[i_coord]
      }
      # create dimvar
      if (!('create_dimvar' %in% names(i_coord_info))) {
        dim_info[['create_dimvar']] <- TRUE
      } else {
        if (is.logical(i_coord_info[['create_dimvar']])) {
          dim_info[['create_dimvar']] <- i_coord_info[['create_dimvar']][1]
        }
      }
      # calendar
      if (!('calendar' %in% names(i_coord_info))) {
        dim_info[['calendar']] <- NA
      } else {
        dim_info[['calendar']] <- i_coord_info[['calendar']][1]
      }
      # longname
      if (!('longname' %in% names(i_coord_info))) {
        dim_info[['longname']] <- dim_info[['name']]
      } else {
        dim_info[['longname']] <- i_coord_info[['longname']][1]
      }
      new_dim <- list(ncdim_def(dim_info[['name']], dim_info[['units']], 
                                vals = dim_info[['vals']], dim_info[['unlim']], 
                                dim_info[['create_dimvar']], 
                                dim_info[['calendar']], 
                                dim_info[['longname']]))
      names(new_dim) <- dim_info[['name']]
      defined_dims <- c(defined_dims, new_dim)
nperez's avatar
nperez committed
    } else {
      new_dim <- list(ncdim_def(name = i_coord, units = "adim",
                            vals = 1 : dim(data)[i_coord],
                            longname = i_coord, create_dimvar = TRUE))
      names(new_dim) <- i_coord
      defined_dims <- c(defined_dims, new_dim)
  defined_vars <- list()
  if (!unique_file) {
    for (i in 1:n_datasets) {
      path <- file.path(destination, datasets[i], var_name)
      for (j in 1:num_vars) {
        dir.create(path[j], recursive = TRUE)
        startdate <- gsub("-", "", startdates)
        dim(startdate) <- c(length(startdate))
        names(dim(startdate)) <- sdate_name
        Apply(data = list(data, startdate, Dates),
              target_dims = list(c(memb_name, ftime_name, lat_name, lon_name), 
                                 NULL, ftime_name),
              fun = .saveExp, ftime_name = ftime_name, dims_var = defined_dims, 
              var_name = var_name[j], 
              metadata_var = metadata[[var_name[j]]], 
              destination = path[j], extra_string = extra_string)
      }
    }
  } else {
    # dataset definition
    new_dim <- list(ncdim_def(name = dat_dim_name, units = "adim",
                              vals = 1 : dim(data)[dat_dim_name],
                              longname = 'dataset', create_dimvar = TRUE))
    names(new_dim) <- dat_dim_name
    defined_dims <- c(new_dim, defined_dims)
    # sdate definition
    sdates <- Subset(Dates, along = ftime_name, 1, drop = 'selected')
    differ <- as.numeric((sdates - sdates[1])/3600)
    new_dim <- list(ncdim_def(name = sdate_name, units = paste('hours since', sdates[1]),
                              vals = differ,
                              longname = sdate_name, create_dimvar = TRUE))
    names(new_dim) <- sdate_name
    defined_dims <- c(defined_dims, new_dim)
    # ftime definition
    ftime_dates <- Subset(Dates, along = sdate_name, 1, drop = 'selected')
    differ <- as.numeric((ftime_dates - ftime_dates[1])/3600)
    # ERROR HERE: which values we take?
    dim_time <- list(ncdim_def(name = ftime_name, units = paste('hours since', Dates[1]),
                     vals = differ, calendar = 'proleptic_gregorian',
                     longname = ftime_name, unlim = TRUE))
    names(dim_time) <- ftime_name
    defined_dims <- c(defined_dims, dim_time)
nperez's avatar
nperez committed

    # var definition
    defined_vars <- list()
    for (j in 1:num_vars) {
      var_info <- metadata[[var_name[j]]][!sapply(metadata[[var_name[j]]], inherits, 'list')]
      if (is.null(var_info[['prec']])) {
        new_var  <- list(ncvar_def(name = var_name[j],
                        units = var_info[['units']],
                        dim = defined_dims, 
                        missval = var_info[['missval']], 
                        longname = var_info[['longname']]))
      } else {
        new_var  <- list(ncvar_def(name = var_name[j],
                    units = var_info[['units']],
                    dim = defined_dims, 
                    missval = var_info[['missval']], 
                    prec = var_info[['prec']], 
                    longname = var_info[['long_name']]))
      }
nperez's avatar
nperez committed

      names(new_var) <- var_name[j]
      defined_vars <- c(defined_vars, new_var)
    }
    if (is.null(extra_string)) {
      file_name <- paste0(var_name[j], "_", 'test', ".nc")
    } else {
      file_name <- paste0(var_name[j], "_", extra_string, "_", 'test', ".nc")
    }
    full_filename <- file.path(destination, file_name)
    file_nc <- nc_create(full_filename, defined_vars)
    if (is.null(var_dim_name)) {
      ncvar_put(file_nc, var_name, vals = data)
    } else {
      for (j in 1:num_vars) {
        ncvar_put(file_nc, defined_vars[[j]]$name, 
                  vals = Subset(data, var_dim_name, j, drop = 'selected'))
      }
    }
    nc_close(file_nc)
nperez's avatar
nperez committed
  }
}
.saveExp <- function(data, sdate, dates, ftime_name, var_name, units, dims_var, 
                     metadata_var, destination, extra_string = NULL) {
  var_info <- metadata_var[!sapply(metadata_var, inherits, 'list')]
  differ <- as.numeric((dates - dates[1])/3600)
  dim_time <- list(ncdim_def(name = ftime_name, units = paste('hours since', Dates[1]),
                        vals = differ, calendar = 'proleptic_gregorian',
                        longname = ftime_name, unlim = TRUE))
  names(dim_time) <- ftime_name
  dims_var <- c(dims_var, dim_time)
  # prec
  if (is.null(var_info[['prec']])) {
    datanc  <- ncvar_def(name = var_name,
                         units = var_info[['units']],
                          dim = dims_var, 
                          missval = var_info[['missval']], 
                          longname = var_info[['longname']])
  } else {
  datanc  <- ncvar_def(name = var_name,
                       units = var_info[['units']],
                       dim = dims_var, 
                       missval = var_info[['missval']], 
                       prec = var_info[['prec']], 
                       longname = var_info[['long_name']])
  }

  if (is.null(extra_string)) {
    file_name <- paste0(var_name, "_", sdate, ".nc")
  } else {
    file_name <- paste0(var_name, "_", extra_string, "_", sdate, ".nc")
  }
  full_filename <- file.path(destination, file_name)
  file_nc <- nc_create(full_filename, datanc)
  ncvar_put(file_nc, datanc, data)
  nc_close(file_nc)