From c7403141e889d4a1c485489a7d40166b815d6ddb Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 28 Nov 2023 18:38:09 +0100 Subject: [PATCH 01/16] Add the development with easyNCDF into CST_SaveExp and changed name of the function --- DESCRIPTION | 2 +- NAMESPACE | 3 + R/CST_SaveCube.R | 729 ++++++++++++++++++++++++++++++++++++++++++++ man/CST_SaveCube.Rd | 114 +++++++ man/SaveCube.Rd | 143 +++++++++ 5 files changed, 990 insertions(+), 1 deletion(-) create mode 100644 R/CST_SaveCube.R create mode 100644 man/CST_SaveCube.Rd create mode 100644 man/SaveCube.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 3af5dcb1..4e41770d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -90,5 +90,5 @@ VignetteBuilder: knitr License: GPL-3 Encoding: UTF-8 LazyData: true -RoxygenNote: 7.2.0 +RoxygenNote: 7.2.3 Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 012f76cf..35bd3c4f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -34,6 +34,7 @@ export(CST_RFTemp) export(CST_RFWeights) export(CST_RainFARM) export(CST_RegimesAssign) +export(CST_SaveCube) export(CST_SaveExp) export(CST_SplitDim) export(CST_Start) @@ -61,6 +62,7 @@ export(RFTemp) export(RF_Weights) export(RainFARM) export(RegimesAssign) +export(SaveCube) export(SaveExp) export(SplitDim) export(WeatherRegime) @@ -69,6 +71,7 @@ export(s2dv_cube) export(training_analogs) import(RColorBrewer) import(abind) +import(easyNCDF) import(ggplot2) import(lubridate) import(multiApply) diff --git a/R/CST_SaveCube.R b/R/CST_SaveCube.R new file mode 100644 index 00000000..908148fc --- /dev/null +++ b/R/CST_SaveCube.R @@ -0,0 +1,729 @@ +#'Save objects of class 's2dv_cube' to data in NetCDF format +#' +#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +#' +#'@description This function allows to divide and save a object of class +#''s2dv_cube' into a NetCDF file, allowing to reload the saved data using +#'\code{Start} function from StartR package. If the original 's2dv_cube' object +#'has been created from \code{CST_Load()}, then it can be reloaded with +#'\code{Load()}. +#' +#'@param data An object of class \code{s2dv_cube}. +#'@param destination A character string containing the directory name in which +#' to save the data. NetCDF file for each starting date are saved into the +#' folder tree: \cr +#' destination/Dataset/variable/. By default the function +#' creates and saves the data into the working directory. +#'@param sdate_dim A character string indicating the name of the start date +#' dimension. By default, it is set to 'sdate'. It can be NULL if there is no +#' start date dimension. +#'@param ftime_dim A character string indicating the name of the forecast time +#' dimension. By default, it is set to 'time'. It can be NULL if there is no +#' forecast time dimension. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' By default, it is set to 'dataset'. It can be NULL if there is no dataset +#' dimension. +#'@param var_dim A character string indicating the name of variable dimension. +#' By default, it is set to 'var'. It can be NULL if there is no variable +#' dimension. +#'@param memb_dim A character string indicating the name of the member dimension. +#' By default, it is set to 'member'. It can be NULL if there is no member +#' dimension. +#'@param startdates A vector of dates that will be used for the filenames +#' when saving the data in multiple files. It must be a vector of the same +#' length as the start date dimension of data. It must be a vector of class +#' \code{Dates}, \code{'POSIXct'} or character with lenghts between 1 and 10. +#' If it is NULL, the coordinate corresponding the the start date dimension or +#' the first Date of each time step will be used as the name of the files. +#' It is NULL by default. +#'@param drop_dims A vector of character strings indicating the dimension names +#' of length 1 that need to be dropped in order that they don't appear in the +#' netCDF file. It is NULL by default (optional). +#'@param single_file A logical value indicating if all object is saved in a +#' single file (TRUE) or in multiple files (FALSE). When it is FALSE, +#' the array is separated for Datasets, variable and start date. It is FALSE +#' by default. +#'@param extra_string A character string to be include as part of the file name, +#' for instance, to identify member or realization. It would be added to the +#' file name between underscore characters. +#' +#'@return Multiple or single NetCDF files containing the data array.\cr +#'\item{\code{single_file = TRUE}}{ +#' All data is saved in a single file located in the specified destination +#' path with the following name: +#' ___.nc. Multiple +#' variables are saved separately in the same file. The forecast time units +#' is extracted from the frequency of the time steps (hours, days, months). +#' The first value of forecast time is 1. If no frequency is found, the units +#' will be 'hours since' each start date and the time steps are assumed to be +#' equally spaced. +#'} +#'\item{\code{single_file = FALSE}}{ +#' The data array is subset and stored into multiple files. Each file +#' contains the data subset for each start date, variable and dataset. Files +#' with different variables and Datasets are stored in separated directories +#' within the following directory tree: destination/Dataset/variable/. +#' The name of each file will be: +#' __.nc. +#'} +#' +#'@seealso \code{\link[startR]{Start}}, \code{\link{as.s2dv_cube}} and +#'\code{\link{s2dv_cube}} +#' +#'@examples +#'\dontrun{ +#'data <- lonlat_temp_st$exp +#'destination <- "./" +#'CST_SaveExp(data = data, destination = destination, ftime_dim = 'ftime', +#' var_dim = 'var', dat_dim = 'dataset') +#'} +#' +#'@export +CST_SaveCube <- function(data, destination = "./", sdate_dim = 'sdate', + ftime_dim = 'time', dat_dim = 'dataset', + var_dim = 'var', memb_dim = 'member', + startdates = NULL, drop_dims = NULL, + single_file = FALSE, extra_string = NULL) { + # Check 's2dv_cube' + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of the class 's2dv_cube'.") + } + # Check object structure + if (!all(c('data', 'attrs') %in% names(data))) { + stop("Parameter 'data' must have at least 'data' and 'attrs' elements ", + "within the 's2dv_cube' structure.") + } + if (!inherits(data$attrs, 'list')) { + stop("Level 'attrs' must be a list with at least 'Dates' element.") + } + if (!all(c('coords') %in% names(data))) { + warning("Element 'coords' not found. No coordinates will be used.") + } + # metadata + if (is.null(data$attrs$Variable$metadata)) { + warning("No metadata found in element Variable from attrs.") + } else { + if (!inherits(data$attrs$Variable$metadata, 'list')) { + stop("Element metadata from Variable element in attrs must be a list.") + } + if (!any(names(data$attrs$Variable$metadata) %in% names(data$coords))) { + warning("Metadata is not found for any coordinate.") + } else if (!any(names(data$attrs$Variable$metadata) %in% + data$attrs$Variable$varName)) { + warning("Metadata is not found for any variable.") + } + } + # Dates + if (is.null(data$attrs$Dates)) { + stop("Element 'Dates' from 'attrs' level cannot be NULL.") + } + if (is.null(dim(data$attrs$Dates))) { + stop("Element 'Dates' from 'attrs' level must have time dimensions.") + } + # sdate_dim + if (!is.null(sdate_dim)) { + if (!is.character(sdate_dim)) { + stop("Parameter 'sdate_dim' must be a character string.") + } + if (length(sdate_dim) > 1) { + warning("Parameter 'sdate_dim' has length greater than 1 and ", + "only the first element will be used.") + sdate_dim <- sdate_dim[1] + } + } else if (length(dim(data$attrs$Dates)) == 1) { + sdate_dim <- 'sdate' + dim(data$data) <- c(sdate = 1, dim(data$data)) + data$dims <- dim(data$data) + dim(data$attrs$Dates) <- c(sdate = 1, dim(data$attrs$Dates)) + data$coords[[sdate_dim]] <- data$attrs$Dates[1] + } + # startdates + if (is.null(startdates)) { + startdates <- data$coords[[sdate_dim]] + } else { + if (!is.character(startdates)) { + warning(paste0("Parameter 'startdates' is not a character string, ", + "it will not be used.")) + startdates <- data$coords[[sdate_dim]] + } + if (!is.null(sdate_dim)) { + if (dim(data$data)[sdate_dim] != length(startdates)) { + warning(paste0("Parameter 'startdates' doesn't have the same length ", + "as dimension '", sdate_dim,"', it will not be used.")) + startdates <- data$coords[[sdate_dim]] + } + } + } + + SaveCube(data = data$data, + destination = destination, + Dates = data$attrs$Dates, + coords = data$coords, + varname = data$attrs$Variable$varName, + metadata = data$attrs$Variable$metadata, + Datasets = data$attrs$Datasets, + startdates = startdates, + dat_dim = dat_dim, sdate_dim = sdate_dim, + ftime_dim = ftime_dim, var_dim = var_dim, + memb_dim = memb_dim, + drop_dims = drop_dims, + extra_string = extra_string, + single_file = single_file, + global_attrs = global_attrs) +} +#'Save a multidimensional array with metadata to data in NetCDF format +#'@description This function allows to save a data array with metadata into a +#'NetCDF file, allowing to reload the saved data using \code{Start} function +#'from StartR package. If the original 's2dv_cube' object has been created from +#'\code{CST_Load()}, then it can be reloaded with \code{Load()}. +#' +#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +#' +#'@param data A multi-dimensional array with named dimensions. +#'@param destination A character string indicating the path where to store the +#' NetCDF files. +#'@param Dates A named array of dates with the corresponding sdate and forecast +#' time dimension. If there is no sdate_dim, you can set it to NULL. +#' It must have ftime_dim dimension. +#'@param coords A named list with elements of the coordinates corresponding to +#' the dimensions of the data parameter. The names and length of each element +#' must correspond to the names of the dimensions. If any coordinate is not +#' provided, it is set as an index vector with the values from 1 to the length +#' of the corresponding dimension. +#'@param varname A character string indicating the name of the variable to be +#' saved. +#'@param metadata A named list where each element is a variable containing the +#' corresponding information. The information must be contained in a list of +#' lists for each variable. +#'@param Datasets A vector of character string indicating the names of the +#' datasets. +#'@param startdates A vector of dates that will be used for the filenames +#' when saving the data in multiple files. It must be a vector of the same +#' length as the start date dimension of data. It must be a vector of class +#' \code{Dates}, \code{'POSIXct'} or character with lenghts between 1 and 10. +#' If it is NULL, the first Date of each time step will be used as the name of +#' the files. It is NULL by default. +#'@param sdate_dim A character string indicating the name of the start date +#' dimension. By default, it is set to 'sdate'. It can be NULL if there is no +#' start date dimension. +#'@param ftime_dim A character string indicating the name of the forecast time +#' dimension. By default, it is set to 'time'. It can be NULL if there is no +#' forecast time dimension. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' By default, it is set to 'dataset'. It can be NULL if there is no dataset +#' dimension. +#'@param var_dim A character string indicating the name of variable dimension. +#' By default, it is set to 'var'. It can be NULL if there is no variable +#' dimension. +#'@param memb_dim A character string indicating the name of the member dimension. +#' By default, it is set to 'member'. It can be NULL if there is no member +#' dimension. +#'@param drop_dims A vector of character strings indicating the dimension names +#' of length 1 that need to be dropped in order that they don't appear in the +#' netCDF file. It is NULL by default (optional). +#'@param single_file A logical value indicating if all object is saved in a +#' unique file (TRUE) or in separated directories (FALSE). When it is FALSE, +#' the array is separated for Datasets, variable and start date. It is FALSE +#' by default (optional). +#'@param extra_string A character string to be include as part of the file name, +#' for instance, to identify member or realization. It would be added to the +#' file name between underscore characters (optional). +#'@param global_attrs A list with elements containing the global attributes +#' to be saved in the NetCDF. +#' +#'@return Multiple or single NetCDF files containing the data array.\cr +#'\item{\code{single_file = TRUE}}{ +#' All data is saved in a single file located in the specified destination +#' path with the following name: +#' ___.nc. Multiple +#' variables are saved separately in the same file. The forecast time units +#' is extracted from the frequency of the time steps (hours, days, months). +#' The first value of forecast time is 1. If no frequency is found, the units +#' will be 'hours since' each start date and the time steps are assumed to be +#' equally spaced. +#'} +#'\item{\code{single_file = FALSE}}{ +#' The data array is subset and stored into multiple files. Each file +#' contains the data subset for each start date, variable and dataset. Files +#' with different variables and Datasets are stored in separated directories +#' within the following directory tree: destination/Dataset/variable/. +#' The name of each file will be: +#' __.nc. +#'} +#' +#'@examples +#'\dontrun{ +#'data <- lonlat_temp_st$exp$data +#'lon <- lonlat_temp_st$exp$coords$lon +#'lat <- lonlat_temp_st$exp$coords$lat +#'coords <- list(lon = lon, lat = lat) +#'Datasets <- lonlat_temp_st$exp$attrs$Datasets +#'varname <- 'tas' +#'Dates <- lonlat_temp_st$exp$attrs$Dates +#'destination = './' +#'metadata <- lonlat_temp_st$exp$attrs$Variable$metadata +#'SaveExp(data = data, destination = destination, coords = coords, +#' Datasets = Datasets, varname = varname, Dates = Dates, +#' metadata = metadata, single_file = TRUE, ftime_dim = 'ftime', +#' var_dim = 'var', dat_dim = 'dataset') +#'} +#' +#'@import easyNCDF +#'@importFrom s2dv Reorder +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +SaveCube <- function(data, destination = "./", Dates = NULL, coords = NULL, + varname = NULL, metadata = NULL, Datasets = NULL, + startdates = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', + ftime_dim = 'time', var_dim = 'var', memb_dim = 'member', + drop_dims = NULL, single_file = FALSE, extra_string = NULL, + global_attrs = NULL) { + ## Initial checks + # data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + dimnames <- names(dim(data)) + if (is.null(dimnames)) { + stop("Parameter 'data' must be an array with named dimensions.") + } + # destination + if (!is.character(destination) | length(destination) > 1) { + stop("Parameter 'destination' must be a character string of one element ", + "indicating the name of the file (including the folder if needed) ", + "where the data will be saved.") + } + # Dates + if (!is.null(Dates)) { + if (!inherits(Dates, "POSIXct") & !inherits(Dates, "Date")) { + stop("Parameter 'Dates' must be of 'POSIXct' or 'Dates' class.") + } + if (is.null(dim(Dates))) { + stop("Parameter 'Dates' must have dimension names.") + } + } + # drop_dims + if (!is.null(drop_dims)) { + if (!is.character(drop_dims) | any(!drop_dims %in% names(dim(data)))) { + warning("Parameter 'drop_dims' must be character string containing ", + "the data dimension names to be dropped. It will not be used.") + } else if (!all(dim(data)[drop_dims] %in% 1)) { + warning("Parameter 'drop_dims' can only contain dimension names ", + "that are of length 1. It will not be used.") + } else if (any(drop_dims %in% c(ftime_dim, sdate_dim, dat_dim, memb_dim, var_dim))) { + warning("Parameter 'drop_dims' contains dimensions used in the computation. ", + "It will not be used.") + drop_dims <- NULL + } else { + data <- Subset(x = data, along = drop_dims, + indices = lapply(1:length(drop_dims), function(x) 1), + drop = 'selected') + dimnames <- names(dim(data)) + } + } + # coords + if (!is.null(coords)) { + if (!all(names(coords) %in% dimnames)) { + coords <- coords[-which(!names(coords) %in% dimnames)] + } + for (i_coord in dimnames) { + if (i_coord %in% names(coords)) { + if (length(coords[[i_coord]]) != dim(data)[i_coord]) { + warning(paste0("Coordinate '", i_coord, "' has different lenght as ", + "its dimension and it will not be used.")) + coords[[i_coord]] <- 1:dim(data)[i_coord] + } + } else { + coords[[i_coord]] <- 1:dim(data)[i_coord] + } + } + } else { + coords <- sapply(dimnames, function(x) 1:dim(data)[x]) + } + # varname + if (is.null(varname)) { + varname <- 'X' + } else if (length(varname) > 1) { + multiple_vars <- TRUE + } else { + multiple_vars <- FALSE + } + if (!all(sapply(varname, is.character))) { + stop("Parameter 'varname' must be a character string with the ", + "variable names.") + } + # single_file + if (!inherits(single_file, 'logical')) { + warning("Parameter 'single_file' must be a logical value. It will be ", + "set as FALSE.") + single_file <- FALSE + } + # extra_string + if (!is.null(extra_string)) { + if (!is.character(extra_string)) { + stop("Parameter 'extra_string' must be a character string.") + } + } + # global_attrs + if (!is.null(global_attrs)) { + if (!inherits(global_attrs, 'list')) { + stop("Parameter 'global_attrs' must be a list.") + } + } + + ## Dimensions checks + # Spatial coordinates + if (!any(dimnames %in% .KnownLonNames()) | + !any(dimnames %in% .KnownLatNames())) { + lon_dim <- NULL + lat_dim <- NULL + } else { + lon_dim <- dimnames[which(dimnames %in% .KnownLonNames())] + lat_dim <- dimnames[which(dimnames %in% .KnownLatNames())] + } + # ftime_dim + if (!is.null(ftime_dim)) { + if (!is.character(ftime_dim)) { + stop("Parameter 'ftime_dim' must be a character string.") + } + if (!all(ftime_dim %in% dimnames)) { + stop("Parameter 'ftime_dim' is not found in 'data' dimension.") + } + } + # sdate_dim + if (!is.null(sdate_dim)) { + if (!is.character(sdate_dim)) { + stop("Parameter 'sdate_dim' must be a character string.") + } + if (!all(sdate_dim %in% dimnames)) { + stop("Parameter 'sdate_dim' is not found in 'data' dimension.") + } + } + # memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim)) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!all(memb_dim %in% dimnames)) { + stop("Parameter 'memb_dim' is not found in 'data' dimension. Set it ", + "as NULL if there is no member dimension.") + } + } + # dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim)) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!all(dat_dim %in% dimnames)) { + stop("Parameter 'dat_dim' is not found in 'data' dimension. Set it ", + "as NULL if there is no Datasets dimension.") + } + if (length(dat_dim) > 1) { + warning("Parameter 'dat_dim' has length greater than 1 and ", + "only the first element will be used.") + dat_dim <- dat_dim[1] + } + n_datasets <- dim(data)[dat_dim] + } else { + n_datasets <- 1 + } + # var_dim + if (!is.null(var_dim)) { + if (!is.character(var_dim)) { + stop("Parameter 'var_dim' must be a character string.") + } + if (!all(var_dim %in% dimnames)) { + stop("Parameter 'var_dim' is not found in 'data' dimension. Set it ", + "as NULL if there is no variable dimension.") + } + n_vars <- dim(data)[var_dim] + } else { + n_vars <- 1 + } + # minimum dimensions + if (all(dimnames %in% c(var_dim, dat_dim))) { + if (!single_file) { + warning("Parameter data has only ", + paste(c(var_dim, dat_dim), collapse = ' and '), " dimensions ", + "and it cannot be splitted in multiple files. All data will ", + "be saved in a single file.") + single_file <- TRUE + } + } + # Dates dimension check + if (!is.null(Dates)) { + if (is.null(ftime_dim)) { + stop("Parameter 'Dates' must have 'ftime_dim'.") + } + if (all(c(ftime_dim, sdate_dim) %in% names(dim(Dates)))) { + if (any(!names(dim(Dates)) %in% c(ftime_dim, sdate_dim))) { + if (all(dim(Dates)[!names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] == 1)) { + dim(Dates) <- dim(Dates)[names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] + } else { + stop("Parameter 'Dates' must have only 'sdate_dim' and 'ftime_dim' dimensions.") + } + } + if (is.null(startdates)) { + startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + } else if (any(inherits(startdates, "POSIXct"), inherits(startdates, "Date"))) { + startdates <- format(startdates, "%Y%m%d") + } else if (any(nchar(startdates) > 10, nchar(startdates) < 1)) { + warning("Parameter 'startdates' should be a character string containing ", + "the start dates in the format 'yyyy-mm-dd', 'yyyymmdd', 'yyyymm', ", + "'POSIXct' or 'Dates' class. Files will be named with Dates instead.") + startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + } + } else if (any(ftime_dim %in% names(dim(Dates)))) { + if (all(dim(Dates)[!names(dim(Dates)) %in% c(ftime_dim)] == 1)) { + dim(Dates) <- dim(Dates)[names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] + } + } + } else if (!single_file) { + warning("Dates must be provided if 'data' must be saved in separated files. ", + "All data will be saved in a single file.") + single_file <- TRUE + } + # startdates + if (is.null(startdates)) { + if (is.null(sdate_dim)) { + startdates <- 'XXX' + } else { + startdates <- rep('XXX', dim(data)[sdate_dim]) + } + } + # Datasets + if (is.null(Datasets)) { + Datasets <- rep('XXX', n_datasets ) + } + if (inherits(Datasets, 'list')) { + Datasets <- names(Datasets) + } + if (n_datasets > length(Datasets)) { + warning("Dimension 'Datasets' in 'data' is greater than those listed in ", + "element 'Datasets' and the first element will be reused.") + Datasets <- c(Datasets, rep(Datasets[1], n_datasets - length(Datasets))) + } else if (n_datasets < length(Datasets)) { + warning("Dimension 'Datasets' in 'data' is smaller than those listed in ", + "element 'Datasets' and only the firsts elements will be used.") + Datasets <- Datasets[1:n_datasets] + } + + ## Unknown dimensions check + alldims <- c(dat_dim, var_dim, sdate_dim, lon_dim, lat_dim, memb_dim, ftime_dim) + if (!all(dimnames %in% alldims)) { + unknown_dims <- dimnames[which(!dimnames %in% alldims)] + memb_dim <- c(memb_dim, unknown_dims) + alldims <- c(dat_dim, var_dim, sdate_dim, lon_dim, lat_dim, memb_dim, ftime_dim) + } + # Reorder + if (any(dimnames != alldims)) { + data <- Reorder(data, alldims) + dimnames <- names(dim(data)) + if (!is.null(attr(data, 'dimensions'))) { + attr(data, 'dimensions') <- dimnames + } + } + + ## NetCDF dimensions definition + defined_dims <- NULL + extra_info_dim <- NULL + if (is.null(Dates)) { + filedims <- dimnames[which(!dimnames %in% c(dat_dim, var_dim))] + } else { + filedims <- dimnames[which(!dimnames %in% c(dat_dim, var_dim, sdate_dim, ftime_dim))] + } + + for (i_coord in filedims) { + # vals + if (i_coord %in% names(coords)) { + if (is.numeric(coords[[i_coord]])) { + coords[[i_coord]] <- as.vector(coords[[i_coord]]) + } else { + coords[[i_coord]] <- 1:dim(data)[i_coord] + } + } else { + coords[[i_coord]] <- 1:dim(data)[i_coord] + } + dim(coords[[i_coord]]) <- dim(data)[i_coord] + + ## metadata + if (i_coord %in% names(metadata)) { + if ('variables' %in% names(attributes(metadata[[i_coord]]))) { + # from Start: 'lon' or 'lat' + attrs <- attributes(metadata[[i_coord]])[['variables']] + attr(coords[[i_coord]], 'variables') <- attrs + } else if (inherits(metadata[[i_coord]], 'list')) { + # from Start and Load: main var + attr(coords[[i_coord]], 'variables') <- list(metadata[[i_coord]]) + names(attributes(coords[[i_coord]])$variables) <- i_coord + } else if (!is.null(attributes(metadata[[i_coord]]))) { + # from Load + attr(coords[[i_coord]], 'variables') <- list(attributes(metadata[[i_coord]])) + names(attributes(coords[[i_coord]])$variables) <- i_coord + } else { + stop("Metadata is not correct.") + } + } + } + coords[c(names(coords)[!names(coords) %in% filedims])] <- NULL + + defined_vars <- list() + if (!single_file) { + for (i in 1:n_datasets) { + path <- file.path(destination, Datasets[i], varname) + for (j in 1:n_vars) { + dir.create(path[j], recursive = TRUE) + startdates <- gsub("-", "", startdates) + dim(startdates) <- c(length(startdates)) + names(dim(startdates)) <- sdate_dim + if (is.null(dat_dim) & is.null(var_dim)) { + data_subset <- data + } else if (is.null(dat_dim)) { + data_subset <- Subset(data, c(var_dim), list(j), drop = 'selected') + } else if (is.null(var_dim)) { + data_subset <- Subset(data, along = c(dat_dim), list(i), drop = 'selected') + } else { + data_subset <- Subset(data, c(dat_dim, var_dim), list(i, j), drop = 'selected') + } + if (is.null(Dates)) { + input_data <- list(data_subset, startdates) + target_dims <- list(c(lon_dim, lat_dim, memb_dim, ftime_dim), NULL) + } else { + input_data <- list(data_subset, startdates, Dates) + target_dims = list(c(lon_dim, lat_dim, memb_dim, ftime_dim), NULL, ftime_dim) + } + print(varname) + Apply(data = input_data, + target_dims = target_dims, + fun = .savearray, + destination = path[j], + coords = coords, + ftime_dim = ftime_dim, + varname = varname[j], + metadata_var = metadata[[varname[j]]], + extra_string = extra_string, + global_attrs = global_attrs) + } + } + } else { + # Datasets definition + # From here + if (!is.null(dat_dim)) { + coords[[dat_dim]] <- array(1:dim(data)[dat_dim], dim = dim(data)[dat_dim]) + attr(coords[[dat_dim]], 'variables') <- list(list(units = 'adim')) + # extra_info_dim[[dat_dim]] <- list(Datasets = paste(Datasets, collapse = ', ')) + } + first_sdate <- last_sdate <- NULL + if (!is.null(Dates)) { + if (is.null(sdate_dim)) { + sdates <- Dates[1] + # ftime definition + leadtimes <- as.numeric(Dates - sdates)/3600 + } else { + # sdate definition + sdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + differ <- as.numeric((sdates - sdates[1])/3600) + # new + dim(differ) <- dim(data)[sdate_dim] + coords[[sdate_dim]] <- differ + attr(coords[[sdate_dim]], 'variables') <- list(list(units = paste('hours since', sdates[1]), + calendar = 'proleptic_gregorian', + longname = sdate_dim)) + + first_sdate <- sdates[1] + last_sdate <- sdates[length(sdates)] + # ftime definition + Dates <- Reorder(Dates, c(ftime_dim, sdate_dim)) + differ_ftime <- apply(Dates, 2, function(x){as.numeric((x - x[1])/3600)}) + dim(differ_ftime) <- dim(Dates) + leadtimes <- Subset(differ_ftime, along = sdate_dim, 1, drop = 'selected') + + if (!all(apply(differ_ftime, 1, function(x){length(unique(x)) == 1}))) { + warning("Time steps are not equal for all start dates. Only ", + "forecast time values for the first start date will be saved ", + "correctly.") + } + } + + # Save in units 'hours since' + dim(leadtimes) <- dim(data)[ftime_dim] + coords[[ftime_dim]] <- leadtimes + attr(coords[[ftime_dim]], 'variables') <- list(list(units = paste('hours since', + paste(sdates, collapse = ', ')), + calendar = 'proleptic_gregorian', + longname = ftime_dim, unlim = TRUE)) + } + + # var definition + defined_vars <- list() + extra_info_var <- NULL + for (j in 1:n_vars) { + varname_j <- varname[j] + metadata_j <- metadata[[varname_j]] + if (is.null(var_dim)) { + coords[[varname_j]] <- data + } else { + coords[[varname_j]] <- Subset(data, var_dim, j, drop = 'selected') + } + if (!is.null(metadata_j)) { + attr(coords[[varname_j]], 'variables') <- list(metadata_j) + names(attributes(coords[[varname_j]])$variables) <- varname_j + } + # Add global attributes + if (!is.null(global_attrs)) { + attributes(coords[[varname_j]])$global_attrs <- global_attrs + } + } + if (is.null(extra_string)) { + gsub("-", "", first_sdate) + file_name <- paste0(paste(c(varname, + gsub("-", "", first_sdate), + gsub("-", "", last_sdate)), + collapse = '_'), ".nc") + } else { + nc <- substr(extra_string, nchar(extra_string)-2, nchar(extra_string)) + if (nc == ".nc") { + file_name <- extra_string + } else { + file_name <- paste0(extra_string, ".nc") + } + } + full_filename <- file.path(destination, file_name) + ArrayToNc(coords, full_filename) + } +} + +.savecube <- function(data, coords, destination = "./", + startdates = NULL, dates = NULL, + ftime_dim = 'time', varname = 'var', + metadata_var = NULL, extra_string = NULL, + global_attrs = NULL) { + if (!is.null(dates)) { + differ <- as.numeric((dates - dates[1])/3600) + dim(differ) <- dim(data)[ftime_dim] + coords[[ftime_dim]] <- differ + attr(coords[[ftime_dim]], 'variables') <- list(list(units = paste('hours since', Dates[1,1]), + calendar = 'proleptic_gregorian', + longname = ftime_dim, unlim = TRUE)) + names(attributes(coords[[ftime_dim]])$variables) <- ftime_dim + } + # Add data + coords[[varname]] <- data + if (!is.null(metadata_var)) { + attr(coords[[varname]], 'variables') <- list(metadata_var) + names(attributes(coords[[varname]])$variables) <- varname + } + # Add global attributes + if (!is.null(global_attrs)) { + attributes(coords[[varname]])$global_attrs <- global_attrs + } + + if (is.null(extra_string)) { + file_name <- paste0(varname, "_", startdates, ".nc") + } else { + file_name <- paste0(varname, "_", extra_string, "_", startdates, ".nc") + } + full_filename <- file.path(destination, file_name) + ArrayToNc(coords, full_filename) +} \ No newline at end of file diff --git a/man/CST_SaveCube.Rd b/man/CST_SaveCube.Rd new file mode 100644 index 00000000..cb12f09f --- /dev/null +++ b/man/CST_SaveCube.Rd @@ -0,0 +1,114 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_SaveCube.R +\name{CST_SaveCube} +\alias{CST_SaveCube} +\title{Save objects of class 's2dv_cube' to data in NetCDF format} +\usage{ +CST_SaveCube( + data, + destination = "./", + sdate_dim = "sdate", + ftime_dim = "time", + dat_dim = "dataset", + var_dim = "var", + memb_dim = "member", + startdates = NULL, + drop_dims = NULL, + single_file = FALSE, + extra_string = NULL +) +} +\arguments{ +\item{data}{An object of class \code{s2dv_cube}.} + +\item{destination}{A character string containing the directory name in which +to save the data. NetCDF file for each starting date are saved into the +folder tree: \cr +destination/Dataset/variable/. By default the function +creates and saves the data into the working directory.} + +\item{sdate_dim}{A character string indicating the name of the start date +dimension. By default, it is set to 'sdate'. It can be NULL if there is no +start date dimension.} + +\item{ftime_dim}{A character string indicating the name of the forecast time +dimension. By default, it is set to 'time'. It can be NULL if there is no +forecast time dimension.} + +\item{dat_dim}{A character string indicating the name of dataset dimension. +By default, it is set to 'dataset'. It can be NULL if there is no dataset +dimension.} + +\item{var_dim}{A character string indicating the name of variable dimension. +By default, it is set to 'var'. It can be NULL if there is no variable +dimension.} + +\item{memb_dim}{A character string indicating the name of the member dimension. +By default, it is set to 'member'. It can be NULL if there is no member +dimension.} + +\item{startdates}{A vector of dates that will be used for the filenames +when saving the data in multiple files. It must be a vector of the same +length as the start date dimension of data. It must be a vector of class +\code{Dates}, \code{'POSIXct'} or character with lenghts between 1 and 10. +If it is NULL, the coordinate corresponding the the start date dimension or +the first Date of each time step will be used as the name of the files. +It is NULL by default.} + +\item{drop_dims}{A vector of character strings indicating the dimension names +of length 1 that need to be dropped in order that they don't appear in the +netCDF file. It is NULL by default (optional).} + +\item{single_file}{A logical value indicating if all object is saved in a +single file (TRUE) or in multiple files (FALSE). When it is FALSE, +the array is separated for Datasets, variable and start date. It is FALSE +by default.} + +\item{extra_string}{A character string to be include as part of the file name, +for instance, to identify member or realization. It would be added to the +file name between underscore characters.} +} +\value{ +Multiple or single NetCDF files containing the data array.\cr +\item{\code{single_file = TRUE}}{ + All data is saved in a single file located in the specified destination + path with the following name: + ___.nc. Multiple + variables are saved separately in the same file. The forecast time units + is extracted from the frequency of the time steps (hours, days, months). + The first value of forecast time is 1. If no frequency is found, the units + will be 'hours since' each start date and the time steps are assumed to be + equally spaced. +} +\item{\code{single_file = FALSE}}{ + The data array is subset and stored into multiple files. Each file + contains the data subset for each start date, variable and dataset. Files + with different variables and Datasets are stored in separated directories + within the following directory tree: destination/Dataset/variable/. + The name of each file will be: + __.nc. +} +} +\description{ +This function allows to divide and save a object of class +'s2dv_cube' into a NetCDF file, allowing to reload the saved data using +\code{Start} function from StartR package. If the original 's2dv_cube' object +has been created from \code{CST_Load()}, then it can be reloaded with +\code{Load()}. +} +\examples{ +\dontrun{ +data <- lonlat_temp_st$exp +destination <- "./" +CST_SaveExp(data = data, destination = destination, ftime_dim = 'ftime', + var_dim = 'var', dat_dim = 'dataset') +} + +} +\seealso{ +\code{\link[startR]{Start}}, \code{\link{as.s2dv_cube}} and +\code{\link{s2dv_cube}} +} +\author{ +Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +} diff --git a/man/SaveCube.Rd b/man/SaveCube.Rd new file mode 100644 index 00000000..da9a1ed4 --- /dev/null +++ b/man/SaveCube.Rd @@ -0,0 +1,143 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_SaveCube.R +\name{SaveCube} +\alias{SaveCube} +\title{Save a multidimensional array with metadata to data in NetCDF format} +\usage{ +SaveCube( + data, + destination = "./", + Dates = NULL, + coords = NULL, + varname = NULL, + metadata = NULL, + Datasets = NULL, + startdates = NULL, + dat_dim = "dataset", + sdate_dim = "sdate", + ftime_dim = "time", + var_dim = "var", + memb_dim = "member", + drop_dims = NULL, + single_file = FALSE, + extra_string = NULL, + global_attrs = NULL +) +} +\arguments{ +\item{data}{A multi-dimensional array with named dimensions.} + +\item{destination}{A character string indicating the path where to store the +NetCDF files.} + +\item{Dates}{A named array of dates with the corresponding sdate and forecast +time dimension. If there is no sdate_dim, you can set it to NULL. +It must have ftime_dim dimension.} + +\item{coords}{A named list with elements of the coordinates corresponding to +the dimensions of the data parameter. The names and length of each element +must correspond to the names of the dimensions. If any coordinate is not +provided, it is set as an index vector with the values from 1 to the length +of the corresponding dimension.} + +\item{varname}{A character string indicating the name of the variable to be +saved.} + +\item{metadata}{A named list where each element is a variable containing the +corresponding information. The information must be contained in a list of +lists for each variable.} + +\item{Datasets}{A vector of character string indicating the names of the +datasets.} + +\item{startdates}{A vector of dates that will be used for the filenames +when saving the data in multiple files. It must be a vector of the same +length as the start date dimension of data. It must be a vector of class +\code{Dates}, \code{'POSIXct'} or character with lenghts between 1 and 10. +If it is NULL, the first Date of each time step will be used as the name of +the files. It is NULL by default.} + +\item{dat_dim}{A character string indicating the name of dataset dimension. +By default, it is set to 'dataset'. It can be NULL if there is no dataset +dimension.} + +\item{sdate_dim}{A character string indicating the name of the start date +dimension. By default, it is set to 'sdate'. It can be NULL if there is no +start date dimension.} + +\item{ftime_dim}{A character string indicating the name of the forecast time +dimension. By default, it is set to 'time'. It can be NULL if there is no +forecast time dimension.} + +\item{var_dim}{A character string indicating the name of variable dimension. +By default, it is set to 'var'. It can be NULL if there is no variable +dimension.} + +\item{memb_dim}{A character string indicating the name of the member dimension. +By default, it is set to 'member'. It can be NULL if there is no member +dimension.} + +\item{drop_dims}{A vector of character strings indicating the dimension names +of length 1 that need to be dropped in order that they don't appear in the +netCDF file. It is NULL by default (optional).} + +\item{single_file}{A logical value indicating if all object is saved in a +unique file (TRUE) or in separated directories (FALSE). When it is FALSE, +the array is separated for Datasets, variable and start date. It is FALSE +by default (optional).} + +\item{extra_string}{A character string to be include as part of the file name, +for instance, to identify member or realization. It would be added to the +file name between underscore characters (optional).} + +\item{global_attrs}{A list with elements containing the global attributes +to be saved in the NetCDF.} +} +\value{ +Multiple or single NetCDF files containing the data array.\cr +\item{\code{single_file = TRUE}}{ + All data is saved in a single file located in the specified destination + path with the following name: + ___.nc. Multiple + variables are saved separately in the same file. The forecast time units + is extracted from the frequency of the time steps (hours, days, months). + The first value of forecast time is 1. If no frequency is found, the units + will be 'hours since' each start date and the time steps are assumed to be + equally spaced. +} +\item{\code{single_file = FALSE}}{ + The data array is subset and stored into multiple files. Each file + contains the data subset for each start date, variable and dataset. Files + with different variables and Datasets are stored in separated directories + within the following directory tree: destination/Dataset/variable/. + The name of each file will be: + __.nc. +} +} +\description{ +This function allows to save a data array with metadata into a +NetCDF file, allowing to reload the saved data using \code{Start} function +from StartR package. If the original 's2dv_cube' object has been created from +\code{CST_Load()}, then it can be reloaded with \code{Load()}. +} +\examples{ +\dontrun{ +data <- lonlat_temp_st$exp$data +lon <- lonlat_temp_st$exp$coords$lon +lat <- lonlat_temp_st$exp$coords$lat +coords <- list(lon = lon, lat = lat) +Datasets <- lonlat_temp_st$exp$attrs$Datasets +varname <- 'tas' +Dates <- lonlat_temp_st$exp$attrs$Dates +destination = './' +metadata <- lonlat_temp_st$exp$attrs$Variable$metadata +SaveExp(data = data, destination = destination, coords = coords, + Datasets = Datasets, varname = varname, Dates = Dates, + metadata = metadata, single_file = TRUE, ftime_dim = 'ftime', + var_dim = 'var', dat_dim = 'dataset') +} + +} +\author{ +Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +} -- GitLab From 80ed199503e5dabd6871f691638b24b7f96eabe4 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 29 Nov 2023 13:15:07 +0100 Subject: [PATCH 02/16] Correct errors and rename function --- R/CST_SaveCube.R | 729 ----------------------------- R/CST_SaveExp.R | 559 +++++++--------------- tests/testthat/test-CST_SaveCube.R | 324 +++++++++++++ tests/testthat/test-CST_SaveExp.R | 268 +++++++---- 4 files changed, 663 insertions(+), 1217 deletions(-) delete mode 100644 R/CST_SaveCube.R create mode 100644 tests/testthat/test-CST_SaveCube.R diff --git a/R/CST_SaveCube.R b/R/CST_SaveCube.R deleted file mode 100644 index 908148fc..00000000 --- a/R/CST_SaveCube.R +++ /dev/null @@ -1,729 +0,0 @@ -#'Save objects of class 's2dv_cube' to data in NetCDF format -#' -#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} -#' -#'@description This function allows to divide and save a object of class -#''s2dv_cube' into a NetCDF file, allowing to reload the saved data using -#'\code{Start} function from StartR package. If the original 's2dv_cube' object -#'has been created from \code{CST_Load()}, then it can be reloaded with -#'\code{Load()}. -#' -#'@param data An object of class \code{s2dv_cube}. -#'@param destination A character string containing the directory name in which -#' to save the data. NetCDF file for each starting date are saved into the -#' folder tree: \cr -#' destination/Dataset/variable/. By default the function -#' creates and saves the data into the working directory. -#'@param sdate_dim A character string indicating the name of the start date -#' dimension. By default, it is set to 'sdate'. It can be NULL if there is no -#' start date dimension. -#'@param ftime_dim A character string indicating the name of the forecast time -#' dimension. By default, it is set to 'time'. It can be NULL if there is no -#' forecast time dimension. -#'@param dat_dim A character string indicating the name of dataset dimension. -#' By default, it is set to 'dataset'. It can be NULL if there is no dataset -#' dimension. -#'@param var_dim A character string indicating the name of variable dimension. -#' By default, it is set to 'var'. It can be NULL if there is no variable -#' dimension. -#'@param memb_dim A character string indicating the name of the member dimension. -#' By default, it is set to 'member'. It can be NULL if there is no member -#' dimension. -#'@param startdates A vector of dates that will be used for the filenames -#' when saving the data in multiple files. It must be a vector of the same -#' length as the start date dimension of data. It must be a vector of class -#' \code{Dates}, \code{'POSIXct'} or character with lenghts between 1 and 10. -#' If it is NULL, the coordinate corresponding the the start date dimension or -#' the first Date of each time step will be used as the name of the files. -#' It is NULL by default. -#'@param drop_dims A vector of character strings indicating the dimension names -#' of length 1 that need to be dropped in order that they don't appear in the -#' netCDF file. It is NULL by default (optional). -#'@param single_file A logical value indicating if all object is saved in a -#' single file (TRUE) or in multiple files (FALSE). When it is FALSE, -#' the array is separated for Datasets, variable and start date. It is FALSE -#' by default. -#'@param extra_string A character string to be include as part of the file name, -#' for instance, to identify member or realization. It would be added to the -#' file name between underscore characters. -#' -#'@return Multiple or single NetCDF files containing the data array.\cr -#'\item{\code{single_file = TRUE}}{ -#' All data is saved in a single file located in the specified destination -#' path with the following name: -#' ___.nc. Multiple -#' variables are saved separately in the same file. The forecast time units -#' is extracted from the frequency of the time steps (hours, days, months). -#' The first value of forecast time is 1. If no frequency is found, the units -#' will be 'hours since' each start date and the time steps are assumed to be -#' equally spaced. -#'} -#'\item{\code{single_file = FALSE}}{ -#' The data array is subset and stored into multiple files. Each file -#' contains the data subset for each start date, variable and dataset. Files -#' with different variables and Datasets are stored in separated directories -#' within the following directory tree: destination/Dataset/variable/. -#' The name of each file will be: -#' __.nc. -#'} -#' -#'@seealso \code{\link[startR]{Start}}, \code{\link{as.s2dv_cube}} and -#'\code{\link{s2dv_cube}} -#' -#'@examples -#'\dontrun{ -#'data <- lonlat_temp_st$exp -#'destination <- "./" -#'CST_SaveExp(data = data, destination = destination, ftime_dim = 'ftime', -#' var_dim = 'var', dat_dim = 'dataset') -#'} -#' -#'@export -CST_SaveCube <- function(data, destination = "./", sdate_dim = 'sdate', - ftime_dim = 'time', dat_dim = 'dataset', - var_dim = 'var', memb_dim = 'member', - startdates = NULL, drop_dims = NULL, - single_file = FALSE, extra_string = NULL) { - # Check 's2dv_cube' - if (!inherits(data, 's2dv_cube')) { - stop("Parameter 'data' must be of the class 's2dv_cube'.") - } - # Check object structure - if (!all(c('data', 'attrs') %in% names(data))) { - stop("Parameter 'data' must have at least 'data' and 'attrs' elements ", - "within the 's2dv_cube' structure.") - } - if (!inherits(data$attrs, 'list')) { - stop("Level 'attrs' must be a list with at least 'Dates' element.") - } - if (!all(c('coords') %in% names(data))) { - warning("Element 'coords' not found. No coordinates will be used.") - } - # metadata - if (is.null(data$attrs$Variable$metadata)) { - warning("No metadata found in element Variable from attrs.") - } else { - if (!inherits(data$attrs$Variable$metadata, 'list')) { - stop("Element metadata from Variable element in attrs must be a list.") - } - if (!any(names(data$attrs$Variable$metadata) %in% names(data$coords))) { - warning("Metadata is not found for any coordinate.") - } else if (!any(names(data$attrs$Variable$metadata) %in% - data$attrs$Variable$varName)) { - warning("Metadata is not found for any variable.") - } - } - # Dates - if (is.null(data$attrs$Dates)) { - stop("Element 'Dates' from 'attrs' level cannot be NULL.") - } - if (is.null(dim(data$attrs$Dates))) { - stop("Element 'Dates' from 'attrs' level must have time dimensions.") - } - # sdate_dim - if (!is.null(sdate_dim)) { - if (!is.character(sdate_dim)) { - stop("Parameter 'sdate_dim' must be a character string.") - } - if (length(sdate_dim) > 1) { - warning("Parameter 'sdate_dim' has length greater than 1 and ", - "only the first element will be used.") - sdate_dim <- sdate_dim[1] - } - } else if (length(dim(data$attrs$Dates)) == 1) { - sdate_dim <- 'sdate' - dim(data$data) <- c(sdate = 1, dim(data$data)) - data$dims <- dim(data$data) - dim(data$attrs$Dates) <- c(sdate = 1, dim(data$attrs$Dates)) - data$coords[[sdate_dim]] <- data$attrs$Dates[1] - } - # startdates - if (is.null(startdates)) { - startdates <- data$coords[[sdate_dim]] - } else { - if (!is.character(startdates)) { - warning(paste0("Parameter 'startdates' is not a character string, ", - "it will not be used.")) - startdates <- data$coords[[sdate_dim]] - } - if (!is.null(sdate_dim)) { - if (dim(data$data)[sdate_dim] != length(startdates)) { - warning(paste0("Parameter 'startdates' doesn't have the same length ", - "as dimension '", sdate_dim,"', it will not be used.")) - startdates <- data$coords[[sdate_dim]] - } - } - } - - SaveCube(data = data$data, - destination = destination, - Dates = data$attrs$Dates, - coords = data$coords, - varname = data$attrs$Variable$varName, - metadata = data$attrs$Variable$metadata, - Datasets = data$attrs$Datasets, - startdates = startdates, - dat_dim = dat_dim, sdate_dim = sdate_dim, - ftime_dim = ftime_dim, var_dim = var_dim, - memb_dim = memb_dim, - drop_dims = drop_dims, - extra_string = extra_string, - single_file = single_file, - global_attrs = global_attrs) -} -#'Save a multidimensional array with metadata to data in NetCDF format -#'@description This function allows to save a data array with metadata into a -#'NetCDF file, allowing to reload the saved data using \code{Start} function -#'from StartR package. If the original 's2dv_cube' object has been created from -#'\code{CST_Load()}, then it can be reloaded with \code{Load()}. -#' -#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} -#' -#'@param data A multi-dimensional array with named dimensions. -#'@param destination A character string indicating the path where to store the -#' NetCDF files. -#'@param Dates A named array of dates with the corresponding sdate and forecast -#' time dimension. If there is no sdate_dim, you can set it to NULL. -#' It must have ftime_dim dimension. -#'@param coords A named list with elements of the coordinates corresponding to -#' the dimensions of the data parameter. The names and length of each element -#' must correspond to the names of the dimensions. If any coordinate is not -#' provided, it is set as an index vector with the values from 1 to the length -#' of the corresponding dimension. -#'@param varname A character string indicating the name of the variable to be -#' saved. -#'@param metadata A named list where each element is a variable containing the -#' corresponding information. The information must be contained in a list of -#' lists for each variable. -#'@param Datasets A vector of character string indicating the names of the -#' datasets. -#'@param startdates A vector of dates that will be used for the filenames -#' when saving the data in multiple files. It must be a vector of the same -#' length as the start date dimension of data. It must be a vector of class -#' \code{Dates}, \code{'POSIXct'} or character with lenghts between 1 and 10. -#' If it is NULL, the first Date of each time step will be used as the name of -#' the files. It is NULL by default. -#'@param sdate_dim A character string indicating the name of the start date -#' dimension. By default, it is set to 'sdate'. It can be NULL if there is no -#' start date dimension. -#'@param ftime_dim A character string indicating the name of the forecast time -#' dimension. By default, it is set to 'time'. It can be NULL if there is no -#' forecast time dimension. -#'@param dat_dim A character string indicating the name of dataset dimension. -#' By default, it is set to 'dataset'. It can be NULL if there is no dataset -#' dimension. -#'@param var_dim A character string indicating the name of variable dimension. -#' By default, it is set to 'var'. It can be NULL if there is no variable -#' dimension. -#'@param memb_dim A character string indicating the name of the member dimension. -#' By default, it is set to 'member'. It can be NULL if there is no member -#' dimension. -#'@param drop_dims A vector of character strings indicating the dimension names -#' of length 1 that need to be dropped in order that they don't appear in the -#' netCDF file. It is NULL by default (optional). -#'@param single_file A logical value indicating if all object is saved in a -#' unique file (TRUE) or in separated directories (FALSE). When it is FALSE, -#' the array is separated for Datasets, variable and start date. It is FALSE -#' by default (optional). -#'@param extra_string A character string to be include as part of the file name, -#' for instance, to identify member or realization. It would be added to the -#' file name between underscore characters (optional). -#'@param global_attrs A list with elements containing the global attributes -#' to be saved in the NetCDF. -#' -#'@return Multiple or single NetCDF files containing the data array.\cr -#'\item{\code{single_file = TRUE}}{ -#' All data is saved in a single file located in the specified destination -#' path with the following name: -#' ___.nc. Multiple -#' variables are saved separately in the same file. The forecast time units -#' is extracted from the frequency of the time steps (hours, days, months). -#' The first value of forecast time is 1. If no frequency is found, the units -#' will be 'hours since' each start date and the time steps are assumed to be -#' equally spaced. -#'} -#'\item{\code{single_file = FALSE}}{ -#' The data array is subset and stored into multiple files. Each file -#' contains the data subset for each start date, variable and dataset. Files -#' with different variables and Datasets are stored in separated directories -#' within the following directory tree: destination/Dataset/variable/. -#' The name of each file will be: -#' __.nc. -#'} -#' -#'@examples -#'\dontrun{ -#'data <- lonlat_temp_st$exp$data -#'lon <- lonlat_temp_st$exp$coords$lon -#'lat <- lonlat_temp_st$exp$coords$lat -#'coords <- list(lon = lon, lat = lat) -#'Datasets <- lonlat_temp_st$exp$attrs$Datasets -#'varname <- 'tas' -#'Dates <- lonlat_temp_st$exp$attrs$Dates -#'destination = './' -#'metadata <- lonlat_temp_st$exp$attrs$Variable$metadata -#'SaveExp(data = data, destination = destination, coords = coords, -#' Datasets = Datasets, varname = varname, Dates = Dates, -#' metadata = metadata, single_file = TRUE, ftime_dim = 'ftime', -#' var_dim = 'var', dat_dim = 'dataset') -#'} -#' -#'@import easyNCDF -#'@importFrom s2dv Reorder -#'@import multiApply -#'@importFrom ClimProjDiags Subset -#'@export -SaveCube <- function(data, destination = "./", Dates = NULL, coords = NULL, - varname = NULL, metadata = NULL, Datasets = NULL, - startdates = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', - ftime_dim = 'time', var_dim = 'var', memb_dim = 'member', - drop_dims = NULL, single_file = FALSE, extra_string = NULL, - global_attrs = NULL) { - ## Initial checks - # data - if (is.null(data)) { - stop("Parameter 'data' cannot be NULL.") - } - dimnames <- names(dim(data)) - if (is.null(dimnames)) { - stop("Parameter 'data' must be an array with named dimensions.") - } - # destination - if (!is.character(destination) | length(destination) > 1) { - stop("Parameter 'destination' must be a character string of one element ", - "indicating the name of the file (including the folder if needed) ", - "where the data will be saved.") - } - # Dates - if (!is.null(Dates)) { - if (!inherits(Dates, "POSIXct") & !inherits(Dates, "Date")) { - stop("Parameter 'Dates' must be of 'POSIXct' or 'Dates' class.") - } - if (is.null(dim(Dates))) { - stop("Parameter 'Dates' must have dimension names.") - } - } - # drop_dims - if (!is.null(drop_dims)) { - if (!is.character(drop_dims) | any(!drop_dims %in% names(dim(data)))) { - warning("Parameter 'drop_dims' must be character string containing ", - "the data dimension names to be dropped. It will not be used.") - } else if (!all(dim(data)[drop_dims] %in% 1)) { - warning("Parameter 'drop_dims' can only contain dimension names ", - "that are of length 1. It will not be used.") - } else if (any(drop_dims %in% c(ftime_dim, sdate_dim, dat_dim, memb_dim, var_dim))) { - warning("Parameter 'drop_dims' contains dimensions used in the computation. ", - "It will not be used.") - drop_dims <- NULL - } else { - data <- Subset(x = data, along = drop_dims, - indices = lapply(1:length(drop_dims), function(x) 1), - drop = 'selected') - dimnames <- names(dim(data)) - } - } - # coords - if (!is.null(coords)) { - if (!all(names(coords) %in% dimnames)) { - coords <- coords[-which(!names(coords) %in% dimnames)] - } - for (i_coord in dimnames) { - if (i_coord %in% names(coords)) { - if (length(coords[[i_coord]]) != dim(data)[i_coord]) { - warning(paste0("Coordinate '", i_coord, "' has different lenght as ", - "its dimension and it will not be used.")) - coords[[i_coord]] <- 1:dim(data)[i_coord] - } - } else { - coords[[i_coord]] <- 1:dim(data)[i_coord] - } - } - } else { - coords <- sapply(dimnames, function(x) 1:dim(data)[x]) - } - # varname - if (is.null(varname)) { - varname <- 'X' - } else if (length(varname) > 1) { - multiple_vars <- TRUE - } else { - multiple_vars <- FALSE - } - if (!all(sapply(varname, is.character))) { - stop("Parameter 'varname' must be a character string with the ", - "variable names.") - } - # single_file - if (!inherits(single_file, 'logical')) { - warning("Parameter 'single_file' must be a logical value. It will be ", - "set as FALSE.") - single_file <- FALSE - } - # extra_string - if (!is.null(extra_string)) { - if (!is.character(extra_string)) { - stop("Parameter 'extra_string' must be a character string.") - } - } - # global_attrs - if (!is.null(global_attrs)) { - if (!inherits(global_attrs, 'list')) { - stop("Parameter 'global_attrs' must be a list.") - } - } - - ## Dimensions checks - # Spatial coordinates - if (!any(dimnames %in% .KnownLonNames()) | - !any(dimnames %in% .KnownLatNames())) { - lon_dim <- NULL - lat_dim <- NULL - } else { - lon_dim <- dimnames[which(dimnames %in% .KnownLonNames())] - lat_dim <- dimnames[which(dimnames %in% .KnownLatNames())] - } - # ftime_dim - if (!is.null(ftime_dim)) { - if (!is.character(ftime_dim)) { - stop("Parameter 'ftime_dim' must be a character string.") - } - if (!all(ftime_dim %in% dimnames)) { - stop("Parameter 'ftime_dim' is not found in 'data' dimension.") - } - } - # sdate_dim - if (!is.null(sdate_dim)) { - if (!is.character(sdate_dim)) { - stop("Parameter 'sdate_dim' must be a character string.") - } - if (!all(sdate_dim %in% dimnames)) { - stop("Parameter 'sdate_dim' is not found in 'data' dimension.") - } - } - # memb_dim - if (!is.null(memb_dim)) { - if (!is.character(memb_dim)) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!all(memb_dim %in% dimnames)) { - stop("Parameter 'memb_dim' is not found in 'data' dimension. Set it ", - "as NULL if there is no member dimension.") - } - } - # dat_dim - if (!is.null(dat_dim)) { - if (!is.character(dat_dim)) { - stop("Parameter 'dat_dim' must be a character string.") - } - if (!all(dat_dim %in% dimnames)) { - stop("Parameter 'dat_dim' is not found in 'data' dimension. Set it ", - "as NULL if there is no Datasets dimension.") - } - if (length(dat_dim) > 1) { - warning("Parameter 'dat_dim' has length greater than 1 and ", - "only the first element will be used.") - dat_dim <- dat_dim[1] - } - n_datasets <- dim(data)[dat_dim] - } else { - n_datasets <- 1 - } - # var_dim - if (!is.null(var_dim)) { - if (!is.character(var_dim)) { - stop("Parameter 'var_dim' must be a character string.") - } - if (!all(var_dim %in% dimnames)) { - stop("Parameter 'var_dim' is not found in 'data' dimension. Set it ", - "as NULL if there is no variable dimension.") - } - n_vars <- dim(data)[var_dim] - } else { - n_vars <- 1 - } - # minimum dimensions - if (all(dimnames %in% c(var_dim, dat_dim))) { - if (!single_file) { - warning("Parameter data has only ", - paste(c(var_dim, dat_dim), collapse = ' and '), " dimensions ", - "and it cannot be splitted in multiple files. All data will ", - "be saved in a single file.") - single_file <- TRUE - } - } - # Dates dimension check - if (!is.null(Dates)) { - if (is.null(ftime_dim)) { - stop("Parameter 'Dates' must have 'ftime_dim'.") - } - if (all(c(ftime_dim, sdate_dim) %in% names(dim(Dates)))) { - if (any(!names(dim(Dates)) %in% c(ftime_dim, sdate_dim))) { - if (all(dim(Dates)[!names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] == 1)) { - dim(Dates) <- dim(Dates)[names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] - } else { - stop("Parameter 'Dates' must have only 'sdate_dim' and 'ftime_dim' dimensions.") - } - } - if (is.null(startdates)) { - startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') - } else if (any(inherits(startdates, "POSIXct"), inherits(startdates, "Date"))) { - startdates <- format(startdates, "%Y%m%d") - } else if (any(nchar(startdates) > 10, nchar(startdates) < 1)) { - warning("Parameter 'startdates' should be a character string containing ", - "the start dates in the format 'yyyy-mm-dd', 'yyyymmdd', 'yyyymm', ", - "'POSIXct' or 'Dates' class. Files will be named with Dates instead.") - startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') - } - } else if (any(ftime_dim %in% names(dim(Dates)))) { - if (all(dim(Dates)[!names(dim(Dates)) %in% c(ftime_dim)] == 1)) { - dim(Dates) <- dim(Dates)[names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] - } - } - } else if (!single_file) { - warning("Dates must be provided if 'data' must be saved in separated files. ", - "All data will be saved in a single file.") - single_file <- TRUE - } - # startdates - if (is.null(startdates)) { - if (is.null(sdate_dim)) { - startdates <- 'XXX' - } else { - startdates <- rep('XXX', dim(data)[sdate_dim]) - } - } - # Datasets - if (is.null(Datasets)) { - Datasets <- rep('XXX', n_datasets ) - } - if (inherits(Datasets, 'list')) { - Datasets <- names(Datasets) - } - if (n_datasets > length(Datasets)) { - warning("Dimension 'Datasets' in 'data' is greater than those listed in ", - "element 'Datasets' and the first element will be reused.") - Datasets <- c(Datasets, rep(Datasets[1], n_datasets - length(Datasets))) - } else if (n_datasets < length(Datasets)) { - warning("Dimension 'Datasets' in 'data' is smaller than those listed in ", - "element 'Datasets' and only the firsts elements will be used.") - Datasets <- Datasets[1:n_datasets] - } - - ## Unknown dimensions check - alldims <- c(dat_dim, var_dim, sdate_dim, lon_dim, lat_dim, memb_dim, ftime_dim) - if (!all(dimnames %in% alldims)) { - unknown_dims <- dimnames[which(!dimnames %in% alldims)] - memb_dim <- c(memb_dim, unknown_dims) - alldims <- c(dat_dim, var_dim, sdate_dim, lon_dim, lat_dim, memb_dim, ftime_dim) - } - # Reorder - if (any(dimnames != alldims)) { - data <- Reorder(data, alldims) - dimnames <- names(dim(data)) - if (!is.null(attr(data, 'dimensions'))) { - attr(data, 'dimensions') <- dimnames - } - } - - ## NetCDF dimensions definition - defined_dims <- NULL - extra_info_dim <- NULL - if (is.null(Dates)) { - filedims <- dimnames[which(!dimnames %in% c(dat_dim, var_dim))] - } else { - filedims <- dimnames[which(!dimnames %in% c(dat_dim, var_dim, sdate_dim, ftime_dim))] - } - - for (i_coord in filedims) { - # vals - if (i_coord %in% names(coords)) { - if (is.numeric(coords[[i_coord]])) { - coords[[i_coord]] <- as.vector(coords[[i_coord]]) - } else { - coords[[i_coord]] <- 1:dim(data)[i_coord] - } - } else { - coords[[i_coord]] <- 1:dim(data)[i_coord] - } - dim(coords[[i_coord]]) <- dim(data)[i_coord] - - ## metadata - if (i_coord %in% names(metadata)) { - if ('variables' %in% names(attributes(metadata[[i_coord]]))) { - # from Start: 'lon' or 'lat' - attrs <- attributes(metadata[[i_coord]])[['variables']] - attr(coords[[i_coord]], 'variables') <- attrs - } else if (inherits(metadata[[i_coord]], 'list')) { - # from Start and Load: main var - attr(coords[[i_coord]], 'variables') <- list(metadata[[i_coord]]) - names(attributes(coords[[i_coord]])$variables) <- i_coord - } else if (!is.null(attributes(metadata[[i_coord]]))) { - # from Load - attr(coords[[i_coord]], 'variables') <- list(attributes(metadata[[i_coord]])) - names(attributes(coords[[i_coord]])$variables) <- i_coord - } else { - stop("Metadata is not correct.") - } - } - } - coords[c(names(coords)[!names(coords) %in% filedims])] <- NULL - - defined_vars <- list() - if (!single_file) { - for (i in 1:n_datasets) { - path <- file.path(destination, Datasets[i], varname) - for (j in 1:n_vars) { - dir.create(path[j], recursive = TRUE) - startdates <- gsub("-", "", startdates) - dim(startdates) <- c(length(startdates)) - names(dim(startdates)) <- sdate_dim - if (is.null(dat_dim) & is.null(var_dim)) { - data_subset <- data - } else if (is.null(dat_dim)) { - data_subset <- Subset(data, c(var_dim), list(j), drop = 'selected') - } else if (is.null(var_dim)) { - data_subset <- Subset(data, along = c(dat_dim), list(i), drop = 'selected') - } else { - data_subset <- Subset(data, c(dat_dim, var_dim), list(i, j), drop = 'selected') - } - if (is.null(Dates)) { - input_data <- list(data_subset, startdates) - target_dims <- list(c(lon_dim, lat_dim, memb_dim, ftime_dim), NULL) - } else { - input_data <- list(data_subset, startdates, Dates) - target_dims = list(c(lon_dim, lat_dim, memb_dim, ftime_dim), NULL, ftime_dim) - } - print(varname) - Apply(data = input_data, - target_dims = target_dims, - fun = .savearray, - destination = path[j], - coords = coords, - ftime_dim = ftime_dim, - varname = varname[j], - metadata_var = metadata[[varname[j]]], - extra_string = extra_string, - global_attrs = global_attrs) - } - } - } else { - # Datasets definition - # From here - if (!is.null(dat_dim)) { - coords[[dat_dim]] <- array(1:dim(data)[dat_dim], dim = dim(data)[dat_dim]) - attr(coords[[dat_dim]], 'variables') <- list(list(units = 'adim')) - # extra_info_dim[[dat_dim]] <- list(Datasets = paste(Datasets, collapse = ', ')) - } - first_sdate <- last_sdate <- NULL - if (!is.null(Dates)) { - if (is.null(sdate_dim)) { - sdates <- Dates[1] - # ftime definition - leadtimes <- as.numeric(Dates - sdates)/3600 - } else { - # sdate definition - sdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') - differ <- as.numeric((sdates - sdates[1])/3600) - # new - dim(differ) <- dim(data)[sdate_dim] - coords[[sdate_dim]] <- differ - attr(coords[[sdate_dim]], 'variables') <- list(list(units = paste('hours since', sdates[1]), - calendar = 'proleptic_gregorian', - longname = sdate_dim)) - - first_sdate <- sdates[1] - last_sdate <- sdates[length(sdates)] - # ftime definition - Dates <- Reorder(Dates, c(ftime_dim, sdate_dim)) - differ_ftime <- apply(Dates, 2, function(x){as.numeric((x - x[1])/3600)}) - dim(differ_ftime) <- dim(Dates) - leadtimes <- Subset(differ_ftime, along = sdate_dim, 1, drop = 'selected') - - if (!all(apply(differ_ftime, 1, function(x){length(unique(x)) == 1}))) { - warning("Time steps are not equal for all start dates. Only ", - "forecast time values for the first start date will be saved ", - "correctly.") - } - } - - # Save in units 'hours since' - dim(leadtimes) <- dim(data)[ftime_dim] - coords[[ftime_dim]] <- leadtimes - attr(coords[[ftime_dim]], 'variables') <- list(list(units = paste('hours since', - paste(sdates, collapse = ', ')), - calendar = 'proleptic_gregorian', - longname = ftime_dim, unlim = TRUE)) - } - - # var definition - defined_vars <- list() - extra_info_var <- NULL - for (j in 1:n_vars) { - varname_j <- varname[j] - metadata_j <- metadata[[varname_j]] - if (is.null(var_dim)) { - coords[[varname_j]] <- data - } else { - coords[[varname_j]] <- Subset(data, var_dim, j, drop = 'selected') - } - if (!is.null(metadata_j)) { - attr(coords[[varname_j]], 'variables') <- list(metadata_j) - names(attributes(coords[[varname_j]])$variables) <- varname_j - } - # Add global attributes - if (!is.null(global_attrs)) { - attributes(coords[[varname_j]])$global_attrs <- global_attrs - } - } - if (is.null(extra_string)) { - gsub("-", "", first_sdate) - file_name <- paste0(paste(c(varname, - gsub("-", "", first_sdate), - gsub("-", "", last_sdate)), - collapse = '_'), ".nc") - } else { - nc <- substr(extra_string, nchar(extra_string)-2, nchar(extra_string)) - if (nc == ".nc") { - file_name <- extra_string - } else { - file_name <- paste0(extra_string, ".nc") - } - } - full_filename <- file.path(destination, file_name) - ArrayToNc(coords, full_filename) - } -} - -.savecube <- function(data, coords, destination = "./", - startdates = NULL, dates = NULL, - ftime_dim = 'time', varname = 'var', - metadata_var = NULL, extra_string = NULL, - global_attrs = NULL) { - if (!is.null(dates)) { - differ <- as.numeric((dates - dates[1])/3600) - dim(differ) <- dim(data)[ftime_dim] - coords[[ftime_dim]] <- differ - attr(coords[[ftime_dim]], 'variables') <- list(list(units = paste('hours since', Dates[1,1]), - calendar = 'proleptic_gregorian', - longname = ftime_dim, unlim = TRUE)) - names(attributes(coords[[ftime_dim]])$variables) <- ftime_dim - } - # Add data - coords[[varname]] <- data - if (!is.null(metadata_var)) { - attr(coords[[varname]], 'variables') <- list(metadata_var) - names(attributes(coords[[varname]])$variables) <- varname - } - # Add global attributes - if (!is.null(global_attrs)) { - attributes(coords[[varname]])$global_attrs <- global_attrs - } - - if (is.null(extra_string)) { - file_name <- paste0(varname, "_", startdates, ".nc") - } else { - file_name <- paste0(varname, "_", extra_string, "_", startdates, ".nc") - } - full_filename <- file.path(destination, file_name) - ArrayToNc(coords, full_filename) -} \ No newline at end of file diff --git a/R/CST_SaveExp.R b/R/CST_SaveExp.R index 7d5733f1..72b45b41 100644 --- a/R/CST_SaveExp.R +++ b/R/CST_SaveExp.R @@ -78,16 +78,13 @@ #' var_dim = 'var', dat_dim = 'dataset') #'} #' -#'@import ncdf4 -#'@importFrom s2dv Reorder -#'@importFrom ClimProjDiags Subset -#'@import multiApply #'@export CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', - ftime_dim = 'time', dat_dim = 'dataset', - var_dim = 'var', memb_dim = 'member', - startdates = NULL, drop_dims = NULL, - single_file = FALSE, extra_string = NULL) { + ftime_dim = 'time', dat_dim = 'dataset', + var_dim = 'var', memb_dim = 'member', + startdates = NULL, drop_dims = NULL, + single_file = FALSE, extra_string = NULL, + global_attrs = NULL) { # Check 's2dv_cube' if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of the class 's2dv_cube'.") @@ -160,19 +157,20 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', } SaveExp(data = data$data, - destination = destination, - Dates = data$attrs$Dates, - coords = data$coords, - varname = data$attrs$Variable$varName, - metadata = data$attrs$Variable$metadata, - Datasets = data$attrs$Datasets, - startdates = startdates, - dat_dim = dat_dim, sdate_dim = sdate_dim, - ftime_dim = ftime_dim, var_dim = var_dim, - memb_dim = memb_dim, - drop_dims = drop_dims, - extra_string = extra_string, - single_file = single_file) + destination = destination, + Dates = data$attrs$Dates, + coords = data$coords, + varname = data$attrs$Variable$varName, + metadata = data$attrs$Variable$metadata, + Datasets = data$attrs$Datasets, + startdates = startdates, + dat_dim = dat_dim, sdate_dim = sdate_dim, + ftime_dim = ftime_dim, var_dim = var_dim, + memb_dim = memb_dim, + drop_dims = drop_dims, + extra_string = extra_string, + single_file = single_file, + global_attrs = global_attrs) } #'Save a multidimensional array with metadata to data in NetCDF format #'@description This function allows to save a data array with metadata into a @@ -186,7 +184,8 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', #'@param destination A character string indicating the path where to store the #' NetCDF files. #'@param Dates A named array of dates with the corresponding sdate and forecast -#' time dimension. +#' time dimension. If there is no sdate_dim, you can set it to NULL. +#' It must have ftime_dim dimension. #'@param coords A named list with elements of the coordinates corresponding to #' the dimensions of the data parameter. The names and length of each element #' must correspond to the names of the dimensions. If any coordinate is not @@ -230,6 +229,8 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', #'@param extra_string A character string to be include as part of the file name, #' for instance, to identify member or realization. It would be added to the #' file name between underscore characters (optional). +#'@param global_attrs A list with elements containing the global attributes +#' to be saved in the NetCDF. #' #'@return Multiple or single NetCDF files containing the data array.\cr #'\item{\code{single_file = TRUE}}{ @@ -268,16 +269,17 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', #' var_dim = 'var', dat_dim = 'dataset') #'} #' -#'@import ncdf4 +#'@import easyNCDF #'@importFrom s2dv Reorder #'@import multiApply #'@importFrom ClimProjDiags Subset #'@export SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, - varname = NULL, metadata = NULL, Datasets = NULL, - startdates = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', - ftime_dim = 'time', var_dim = 'var', memb_dim = 'member', - drop_dims = NULL, single_file = FALSE, extra_string = NULL) { + varname = NULL, metadata = NULL, Datasets = NULL, + startdates = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', + ftime_dim = 'time', var_dim = 'var', memb_dim = 'member', + drop_dims = NULL, single_file = FALSE, extra_string = NULL, + global_attrs = NULL) { ## Initial checks # data if (is.null(data)) { @@ -310,6 +312,10 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, } else if (!all(dim(data)[drop_dims] %in% 1)) { warning("Parameter 'drop_dims' can only contain dimension names ", "that are of length 1. It will not be used.") + } else if (any(drop_dims %in% c(ftime_dim, sdate_dim, dat_dim, memb_dim, var_dim))) { + warning("Parameter 'drop_dims' contains dimensions used in the computation. ", + "It will not be used.") + drop_dims <- NULL } else { data <- Subset(x = data, along = drop_dims, indices = lapply(1:length(drop_dims), function(x) 1), @@ -330,8 +336,6 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, coords[[i_coord]] <- 1:dim(data)[i_coord] } } else { - warning(paste0("Coordinate '", i_coord, "' is not provided ", - "and it will be set as index in element coords.")) coords[[i_coord]] <- 1:dim(data)[i_coord] } } @@ -340,7 +344,6 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, } # varname if (is.null(varname)) { - warning("Parameter 'varname' is NULL. It will be assigned to 'X'.") varname <- 'X' } else if (length(varname) > 1) { multiple_vars <- TRUE @@ -351,11 +354,6 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, stop("Parameter 'varname' must be a character string with the ", "variable names.") } - # metadata - if (is.null(metadata)) { - warning("Parameter 'metadata' is not provided so the metadata saved ", - "will be incomplete.") - } # single_file if (!inherits(single_file, 'logical')) { warning("Parameter 'single_file' must be a logical value. It will be ", @@ -368,6 +366,12 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, stop("Parameter 'extra_string' must be a character string.") } } + # global_attrs + if (!is.null(global_attrs)) { + if (!inherits(global_attrs, 'list')) { + stop("Parameter 'global_attrs' must be a list.") + } + } ## Dimensions checks # Spatial coordinates @@ -378,16 +382,6 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, } else { lon_dim <- dimnames[which(dimnames %in% .KnownLonNames())] lat_dim <- dimnames[which(dimnames %in% .KnownLatNames())] - if (length(lon_dim) > 1) { - warning("Found more than one longitudinal dimension. Only the first one ", - "will be used.") - lon_dim <- lon_dim[1] - } - if (length(lat_dim) > 1) { - warning("Found more than one latitudinal dimension. Only the first one ", - "will be used.") - lat_dim <- lat_dim[1] - } } # ftime_dim if (!is.null(ftime_dim)) { @@ -395,12 +389,8 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, stop("Parameter 'ftime_dim' must be a character string.") } if (!all(ftime_dim %in% dimnames)) { - stop("Parameter 'ftime_dim' is not found in 'data' dimension.") - } - if (length(ftime_dim) > 1) { - warning("Parameter 'ftime_dim' has length greater than 1 and ", - "only the first element will be used.") - ftime_dim <- ftime_dim[1] + stop("Parameter 'ftime_dim' is not found in 'data' dimension. Set it ", + "as NULL if there is no forecast time dimension.") } } # sdate_dim @@ -408,11 +398,6 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, if (!is.character(sdate_dim)) { stop("Parameter 'sdate_dim' must be a character string.") } - if (length(sdate_dim) > 1) { - warning("Parameter 'sdate_dim' has length greater than 1 and ", - "only the first element will be used.") - sdate_dim <- sdate_dim[1] - } if (!all(sdate_dim %in% dimnames)) { stop("Parameter 'sdate_dim' is not found in 'data' dimension.") } @@ -454,11 +439,6 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, stop("Parameter 'var_dim' is not found in 'data' dimension. Set it ", "as NULL if there is no variable dimension.") } - if (length(var_dim) > 1) { - warning("Parameter 'var_dim' has length greater than 1 and ", - "only the first element will be used.") - var_dim <- var_dim[1] - } n_vars <- dim(data)[var_dim] } else { n_vars <- 1 @@ -475,26 +455,36 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, } # Dates dimension check if (!is.null(Dates)) { - if (all(names(dim(Dates)) == c(ftime_dim, sdate_dim)) | - all(names(dim(Dates)) == c(sdate_dim, ftime_dim))) { + if (is.null(ftime_dim)) { + stop("Parameter 'Dates' must have 'ftime_dim'.") + } + if (all(c(ftime_dim, sdate_dim) %in% names(dim(Dates)))) { + if (any(!names(dim(Dates)) %in% c(ftime_dim, sdate_dim))) { + if (all(dim(Dates)[!names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] == 1)) { + dim(Dates) <- dim(Dates)[names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] + } else { + stop("Parameter 'Dates' must have only 'sdate_dim' and 'ftime_dim' dimensions.") + } + } if (is.null(startdates)) { startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') - } else if ((!inherits(startdates, "POSIXct") & !inherits(startdates, "Date")) && - (!is.character(startdates) | (any(nchar(startdates) > 10) | any(nchar(startdates) < 1)))) { + } else if (any(inherits(startdates, "POSIXct"), inherits(startdates, "Date"))) { + startdates <- format(startdates, "%Y%m%d") + } else if (any(nchar(startdates) > 10, nchar(startdates) < 1)) { warning("Parameter 'startdates' should be a character string containing ", "the start dates in the format 'yyyy-mm-dd', 'yyyymmdd', 'yyyymm', ", "'POSIXct' or 'Dates' class. Files will be named with Dates instead.") startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') - if (!is.null(format(startdates, "%Y%m%d"))) { - startdates <- format(startdates, "%Y%m%d") - } } - } else if (all(dim(Dates)[!names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] == 1)) { - dim(Dates) <- dim(Dates)[names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] - } else { - stop("Parameter 'Dates' must have start date dimension and ", - "forecast time dimension.") + } else if (any(ftime_dim %in% names(dim(Dates)))) { + if (all(dim(Dates)[!names(dim(Dates)) %in% c(ftime_dim)] == 1)) { + dim(Dates) <- dim(Dates)[names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] + } } + } else if (!single_file) { + warning("Dates must be provided if 'data' must be saved in separated files. ", + "All data will be saved in a single file.") + single_file <- TRUE } # startdates if (is.null(startdates)) { @@ -503,21 +493,9 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, } else { startdates <- rep('XXX', dim(data)[sdate_dim]) } - } else { - if (is.null(sdate_dim)) { - if (length(startdates) != 1) { - warning("Parameter 'startdates' has length more than 1. Only first ", - "value will be used.") - startdates <- startdates[[1]] - } - } } # Datasets if (is.null(Datasets)) { - if (!single_file) { - warning("Parameter 'Datasets' is NULL. Files will be saved with a ", - "directory name of 'XXX'.") - } Datasets <- rep('XXX', n_datasets ) } if (inherits(Datasets, 'list')) { @@ -557,96 +535,42 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, } else { filedims <- dimnames[which(!dimnames %in% c(dat_dim, var_dim, sdate_dim, ftime_dim))] } + for (i_coord in filedims) { - dim_info <- list() # vals if (i_coord %in% names(coords)) { if (is.numeric(coords[[i_coord]])) { - dim_info[['vals']] <- as.vector(coords[[i_coord]]) + coords[[i_coord]] <- as.vector(coords[[i_coord]]) } else { - dim_info[['vals']] <- 1:dim(data)[i_coord] + coords[[i_coord]] <- 1:dim(data)[i_coord] } } else { - dim_info[['vals']] <- 1:dim(data)[i_coord] - } - # name - dim_info[['name']] <- i_coord - # len - dim_info[['len']] <- as.numeric(dim(data)[i_coord]) - # unlim - dim_info[['unlim']] <- FALSE - # create_dimvar - dim_info[['create_dimvar']] <- TRUE + coords[[i_coord]] <- 1:dim(data)[i_coord] + } + dim(coords[[i_coord]]) <- dim(data)[i_coord] + ## metadata if (i_coord %in% names(metadata)) { if ('variables' %in% names(attributes(metadata[[i_coord]]))) { # from Start: 'lon' or 'lat' - attrs <- attributes(metadata[[i_coord]])[['variables']][[i_coord]] - i_coord_info <- attrs[!sapply(attrs, inherits, 'list')] + attrs <- attributes(metadata[[i_coord]])[['variables']] + attr(coords[[i_coord]], 'variables') <- attrs } else if (inherits(metadata[[i_coord]], 'list')) { # from Start and Load: main var - i_coord_info <- metadata[[i_coord]] + attr(coords[[i_coord]], 'variables') <- list(metadata[[i_coord]]) + names(attributes(coords[[i_coord]])$variables) <- i_coord } else if (!is.null(attributes(metadata[[i_coord]]))) { # from Load - i_coord_info <- attributes(metadata[[i_coord]]) + attr(coords[[i_coord]], 'variables') <- list(attributes(metadata[[i_coord]])) + names(attributes(coords[[i_coord]])$variables) <- i_coord } else { stop("Metadata is not correct.") } - # len - if ('size' %in% names(i_coord_info)) { - if (i_coord_info[['size']] != dim(data)[i_coord]) { - dim_info[['original_len']] <- i_coord_info[['size']] - i_coord_info[['size']] <- NULL - } - } - # units - if (!('units' %in% names(i_coord_info))) { - dim_info[['units']] <- '' - } else { - dim_info[['units']] <- i_coord_info[['units']] - i_coord_info[['units']] <- NULL - } - # calendar - if (!('calendar' %in% names(i_coord_info))) { - dim_info[['calendar']] <- NA - } else { - dim_info[['calendar']] <- i_coord_info[['calendar']] - i_coord_info[['calendar']] <- NULL - } - # longname - if ('long_name' %in% names(i_coord_info)) { - dim_info[['longname']] <- i_coord_info[['long_name']] - i_coord_info[['long_name']] <- NULL - } else if ('longname' %in% names(i_coord_info)) { - dim_info[['longname']] <- i_coord_info[['longname']] - i_coord_info[['longname']] <- NULL - } else { - if (i_coord %in% .KnownLonNames()) { - dim_info[['longname']] <- 'longitude' - } else if (i_coord %in% .KnownLatNames()) { - dim_info[['longname']] <- 'latitude' - } - } - # extra information - if (!is.null(names(i_coord_info))) { - extra_info_dim[[i_coord]] <- i_coord_info - } - } else { - # units - dim_info[['units']] <- "adim" - # longname - dim_info[['longname']] <- i_coord - # calendar - dim_info[['calendar']] <- NA - } - new_dim <- list(ncdim_def(name = dim_info[['name']], units = dim_info[['units']], - vals = dim_info[['vals']], unlim = dim_info[['unlim']], - create_dimvar = dim_info[['create_dimvar']], - calendar = dim_info[['calendar']], - longname = dim_info[['longname']])) - names(new_dim) <- i_coord - defined_dims <- c(defined_dims, new_dim) + } } + # Reorder coords + coords[c(names(coords)[!names(coords) %in% filedims])] <- NULL + coords <- coords[filedims] defined_vars <- list() if (!single_file) { @@ -675,146 +599,84 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, } Apply(data = input_data, target_dims = target_dims, - fun = .saveExp, + fun = .saveexp, destination = path[j], - defined_dims = defined_dims, + coords = coords, ftime_dim = ftime_dim, varname = varname[j], metadata_var = metadata[[varname[j]]], - extra_info_dim = extra_info_dim, - extra_string = extra_string) + extra_string = extra_string, + global_attrs = global_attrs) } } } else { # Datasets definition # From here if (!is.null(dat_dim)) { - new_dim <- list(ncdim_def(name = dat_dim, units = "adim", - vals = 1 : dim(data)[dat_dim], - longname = 'Datasets', create_dimvar = TRUE)) - names(new_dim) <- dat_dim - defined_dims <- c(new_dim, defined_dims) - extra_info_dim[[dat_dim]] <- list(Datasets = paste(Datasets, collapse = ', ')) + coords[[dat_dim]] <- array(1:dim(data)[dat_dim], dim = dim(data)[dat_dim]) + attr(coords[[dat_dim]], 'variables') <- list(list(units = 'adim')) + # extra_info_dim[[dat_dim]] <- list(Datasets = paste(Datasets, collapse = ', ')) } first_sdate <- last_sdate <- NULL if (!is.null(Dates)) { - # sdate definition - sdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') - differ <- as.numeric((sdates - sdates[1])/3600) - new_dim <- list(ncdim_def(name = sdate_dim, units = paste('hours since', sdates[1]), - vals = differ, - longname = sdate_dim, create_dimvar = TRUE)) - names(new_dim) <- sdate_dim - defined_dims <- c(defined_dims, new_dim) - first_sdate <- sdates[1] - last_sdate <- sdates[length(sdates)] - # ftime definition - Dates <- Reorder(Dates, c(ftime_dim, sdate_dim)) - differ_ftime <- apply(Dates, 2, function(x){as.numeric((x - x[1])/3600)}) - dim(differ_ftime) <- dim(Dates) - differ_ftime_subset <- Subset(differ_ftime, along = sdate_dim, 1, drop = 'selected') - if (all(apply(differ_ftime, 1, function(x){length(unique(x)) == 1}))) { - if (all(diff(differ_ftime_subset/24) == 1)) { - # daily values - dim_time <- list(ncdim_def(name = ftime_dim, units = 'days', - vals = round(differ_ftime_subset/24) + 1, - calendar = 'proleptic_gregorian', - longname = ftime_dim, unlim = TRUE)) - names(dim_time) <- ftime_dim - defined_dims <- c(defined_dims, dim_time) - } else if (all(diff(differ_ftime_subset/24) %in% c(28, 29, 30, 31))) { - # monthly values - dim_time <- list(ncdim_def(name = ftime_dim, units = 'months', - vals = round(differ_ftime_subset/730) + 1, - calendar = 'proleptic_gregorian', - longname = ftime_dim, unlim = TRUE)) - names(dim_time) <- ftime_dim - defined_dims <- c(defined_dims, dim_time) - } else { - # other frequency - dim_time <- list(ncdim_def(name = ftime_dim, units = 'hours', - vals = differ_ftime_subset + 1, - calendar = 'proleptic_gregorian', - longname = ftime_dim, unlim = TRUE)) - names(dim_time) <- ftime_dim - defined_dims <- c(defined_dims, dim_time) - } + if (is.null(sdate_dim)) { + sdates <- Dates[1] + # ftime definition + leadtimes <- as.numeric(difftime(Dates, sdates, units = "hours")) } else { - warning("Time steps are not equal for all start dates. Only ", - "forecast time values for the first start date will be saved ", - "correctly.") - dim_time <- list(ncdim_def(name = ftime_dim, - units = paste('hours since', - paste(sdates, collapse = ', ')), - vals = differ_ftime_subset, - calendar = 'proleptic_gregorian', - longname = ftime_dim, unlim = TRUE)) - names(dim_time) <- ftime_dim - defined_dims <- c(defined_dims, dim_time) + # sdate definition + sdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + differ <- as.numeric(difftime(sdates, sdates[1], units = "hours")) + # new + dim(differ) <- dim(data)[sdate_dim] + coords[[sdate_dim]] <- differ + attr(coords[[sdate_dim]], 'variables') <- list(list(units = paste('hours since', sdates[1]), + calendar = 'proleptic_gregorian', + longname = sdate_dim)) + first_sdate <- sdates[1] + last_sdate <- sdates[length(sdates)] + # ftime definition + Dates <- Reorder(Dates, c(ftime_dim, sdate_dim)) + differ_ftime <- array(dim = dim(Dates)) + for (i in 1:length(sdates)) differ_ftime[, i] <- as.numeric(difftime(Dates[, i], Dates[1, i], + units = "hours")) + dim(differ_ftime) <- dim(Dates) + leadtimes <- Subset(differ_ftime, along = sdate_dim, 1, drop = 'selected') + if (!all(apply(differ_ftime, 1, function(x){length(unique(x)) == 1}))) { + warning("Time steps are not equal for all start dates. Only ", + "forecast time values for the first start date will be saved ", + "correctly.") + } } + + # Save in units 'hours since' + dim(leadtimes) <- dim(data)[ftime_dim] + coords[[ftime_dim]] <- leadtimes + attr(coords[[ftime_dim]], 'variables') <- list(list(units = paste('hours since', + paste(sdates, collapse = ', ')), + calendar = 'proleptic_gregorian', + longname = ftime_dim, unlim = TRUE)) } # var definition defined_vars <- list() extra_info_var <- NULL for (j in 1:n_vars) { - var_info <- list() - i_var_info <- metadata[[varname[j]]][!sapply(metadata[[varname[j]]], inherits, 'list')] - ## Define metadata - # name - var_info[['name']] <- varname[j] - # units - if ('units' %in% names(i_var_info)) { - var_info[['units']] <- i_var_info[['units']] - i_var_info[['units']] <- NULL - } else { - var_info[['units']] <- '' - } - # dim - var_info[['dim']] <- defined_dims - # missval - if ('missval' %in% names(i_var_info)) { - var_info[['missval']] <- i_var_info[['missval']] - i_var_info[['missval']] <- NULL + varname_j <- varname[j] + metadata_j <- metadata[[varname_j]] + if (is.null(var_dim)) { + coords[[varname_j]] <- data } else { - var_info[['missval']] <- NULL + coords[[varname_j]] <- Subset(data, var_dim, j, drop = 'selected') } - # longname - if (any(c('longname', 'long_name') %in% names(i_var_info))) { - longname <- names(i_var_info)[which(names(i_var_info) %in% c('longname', 'long_name'))] - var_info[['longname']] <- i_var_info[[longname]] - i_var_info[[longname]] <- NULL - } else { - var_info[['longname']] <- varname[j] - } - # prec - if ('prec' %in% names(i_var_info)) { - var_info[['prec']] <- i_var_info[['prec']] - i_var_info[['prec']] <- NULL - } else { - prec <- typeof(data) - if (prec == 'character') { - var_info[['prec']] <- 'char' - } - if (any(prec %in% c('short', 'float', 'double', 'integer', 'char', 'byte'))) { - var_info[['prec']] <- prec - } else { - var_info[['prec']] <- 'double' - } + if (!is.null(metadata_j)) { + attr(coords[[varname_j]], 'variables') <- list(metadata_j) + names(attributes(coords[[varname_j]])$variables) <- varname_j } - # extra information - if (!is.null(names(i_var_info))) { - extra_info_var[[varname[j]]] <- i_var_info + # Add global attributes + if (!is.null(global_attrs)) { + attributes(coords[[varname_j]])$global_attrs <- global_attrs } - new_var <- list(ncvar_def(name = var_info[['name']], - units = var_info[['units']], - dim = var_info[['dim']], - missval = var_info[['missval']], - longname = var_info[['longname']], - prec = var_info[['prec']])) - - names(new_var) <- varname[j] - defined_vars <- c(defined_vars, new_var) } if (is.null(extra_string)) { gsub("-", "", first_sdate) @@ -823,140 +685,49 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, gsub("-", "", last_sdate)), collapse = '_'), ".nc") } else { - file_name <- paste0(paste(c(varname, extra_string, - gsub("-", "", first_sdate), - gsub("-", "", last_sdate)), - collapse = '_'), ".nc") - } - full_filename <- file.path(destination, file_name) - file_nc <- nc_create(full_filename, defined_vars) - if (is.null(var_dim)) { - ncvar_put(file_nc, varname, vals = data) - } else { - for (j in 1:n_vars) { - ncvar_put(file_nc, defined_vars[[j]]$name, - vals = Subset(data, var_dim, j, drop = 'selected')) - } - } - # Additional dimension attributes - for (dim in names(defined_dims)) { - if (dim %in% names(extra_info_dim)) { - for (info_dim in names(extra_info_dim[[dim]])) { - add_info_dim <- paste0(extra_info_dim[[dim]][[info_dim]], collapse = ', ') - ncatt_put(file_nc, dim, info_dim, add_info_dim) - } - } - } - # Additional dimension attributes - for (var in names(defined_vars)) { - if (var %in% names(extra_info_var)) { - for (info_var in names(extra_info_var[[var]])) { - add_info_var <- paste0(extra_info_var[[var]][[info_var]], collapse = ', ') - ncatt_put(file_nc, var, info_var, add_info_var) - } + nc <- substr(extra_string, nchar(extra_string)-2, nchar(extra_string)) + if (nc == ".nc") { + file_name <- extra_string + } else { + file_name <- paste0(extra_string, ".nc") } } - nc_close(file_nc) + full_filename <- file.path(destination, file_name) + ArrayToNc(coords, full_filename) } } -.saveExp <- function(data, startdates = NULL, dates = NULL, destination = "./", - defined_dims, ftime_dim = 'time', varname = 'var', - metadata_var = NULL, extra_info_dim = NULL, - extra_string = NULL) { - # ftime_dim +.saveexp <- function(data, coords, destination = "./", + startdates = NULL, dates = NULL, + ftime_dim = 'time', varname = 'var', + metadata_var = NULL, extra_string = NULL, + global_attrs = NULL) { if (!is.null(dates)) { - differ <- as.numeric((dates - dates[1])/3600) - dim_time <- list(ncdim_def(name = ftime_dim, units = paste('hours since', dates[1]), - vals = differ, calendar = 'proleptic_gregorian', - longname = ftime_dim, unlim = TRUE)) - names(dim_time) <- ftime_dim - defined_dims <- c(defined_dims, dim_time) - } - - ## Define var metadata - var_info <- NULL - extra_info_var <- NULL - i_var_info <- metadata_var[!sapply(metadata_var, inherits, 'list')] - - # name - var_info[['name']] <- varname - # units - if ('units' %in% names(i_var_info)) { - var_info[['units']] <- i_var_info[['units']] - i_var_info[['units']] <- NULL - } else { - var_info[['units']] <- '' - } - # dim - var_info[['dim']] <- defined_dims - # missval - if ('missval' %in% names(i_var_info)) { - var_info[['missval']] <- i_var_info[['missval']] - i_var_info[['missval']] <- NULL - } else { - var_info[['missval']] <- NULL - } - # longname - if (any(c('longname', 'long_name') %in% names(i_var_info))) { - longname <- names(i_var_info)[which(names(i_var_info) %in% c('longname', 'long_name'))] - var_info[['longname']] <- i_var_info[[longname]] - i_var_info[[longname]] <- NULL - } else { - var_info[['longname']] <- varname - } - # prec - if ('prec' %in% names(i_var_info)) { - var_info[['prec']] <- i_var_info[['prec']] - i_var_info[['prec']] <- NULL - } else { - prec <- typeof(data) - if (prec == 'character') { - var_info[['prec']] <- 'char' - } - if (any(prec %in% c('short', 'float', 'double', 'integer', 'char', 'byte'))) { - var_info[['prec']] <- prec - } else { - var_info[['prec']] <- 'double' - } - } - # extra information - if (!is.null(names(i_var_info))) { - extra_info_var <- i_var_info + differ <- as.numeric(difftime(dates, dates[1], units = "hours")) + dim(differ) <- dim(data)[ftime_dim] + coords[[ftime_dim]] <- differ + attr(coords[[ftime_dim]], 'variables') <- list(list(units = paste('hours since', dates[1]), + calendar = 'proleptic_gregorian', + longname = ftime_dim, unlim = TRUE)) + names(attributes(coords[[ftime_dim]])$variables) <- ftime_dim + } + # Add data + coords[[varname]] <- data + if (!is.null(metadata_var)) { + metadata_var$dim <- NULL + attr(coords[[varname]], 'variables') <- list(metadata_var) + names(attributes(coords[[varname]])$variables) <- varname + } + # Add global attributes + if (!is.null(global_attrs)) { + attributes(coords[[varname]])$global_attrs <- global_attrs } - datanc <- ncvar_def(name = var_info[['name']], - units = var_info[['units']], - dim = var_info[['dim']], - missval = var_info[['missval']], - longname = var_info[['longname']], - prec = var_info[['prec']]) - if (is.null(extra_string)) { file_name <- paste0(varname, "_", startdates, ".nc") } else { file_name <- paste0(varname, "_", extra_string, "_", startdates, ".nc") } full_filename <- file.path(destination, file_name) - file_nc <- nc_create(full_filename, datanc) - ncvar_put(file_nc, datanc, data) - - # Additional attributes - for (dim in names(defined_dims)) { - if (dim %in% names(extra_info_dim)) { - for (info_dim in names(extra_info_dim[[dim]])) { - add_info_dim <- paste0(extra_info_dim[[dim]][[info_dim]], collapse = ', ') - ncatt_put(file_nc, dim, info_dim, add_info_dim) - } - } - } - # Additional dimension attributes - if (!is.null(extra_info_var)) { - for (info_var in names(extra_info_var)) { - add_info_var <- paste0(extra_info_var[[info_var]], collapse = ', ') - ncatt_put(file_nc, varname, info_var, add_info_var) - } - } - - nc_close(file_nc) -} + ArrayToNc(coords, full_filename) +} \ No newline at end of file diff --git a/tests/testthat/test-CST_SaveCube.R b/tests/testthat/test-CST_SaveCube.R new file mode 100644 index 00000000..0951c392 --- /dev/null +++ b/tests/testthat/test-CST_SaveCube.R @@ -0,0 +1,324 @@ +############################################## + +# cube0 +cube0 <- array(1:5, dim = c(sdate = 5, lon = 4, lat = 4, ftime = 1)) +class(cube0) <- 's2dv_cube' + +# cube1 +cube1 <- NULL +cube1$data <- array(1:5, dim = c(sdate = 5, lon = 4, lat = 4, ftime = 1)) +coords2 <- list(sdate = c('20000101', '20010102', '20020103', '20030104', '20040105'), + var = 'tas', + lon = 1.:4., + lat = 1.:4.) +cube1$coords <- coords2 +dates2 <- c('20000101', '20010102', '20020103', '20030104', '20040105') +dates2 <- as.Date(dates2, format = "%Y%m%d", tz = "UTC") +dim(dates2) <- c(sdate = 5, ftime = 1) +cube1$attrs$Dates <- dates2 +class(cube1) <- 's2dv_cube' + +# cube2 +cube2 <- cube1 +cube2$data <- array(1:5, dim = c(sdate = 5, lon = 4, lat = 4, ftime = 1, + test = 2, test2 = 3)) +dim(cube2$data) <- c(sdate = 5, lon = 4, lat = 4, ftime = 1, member = 1, + ensemble = 1, test = 2, test2 = 3) + +# cube3 +cube3 <- cube1 + +# dat0 +dates0 <- as.Date('2022-02-01', format = "%Y-%m-%d") +dim(dates0) <- c(sdate = 1) + +# dat1 +dat1 <- array(1, dim = c(test = 1)) + +# dat2 +dat2 <- array(1:5, dim = c(sdate = 5, lon = 4, lat = 4, ftime = 1)) +coords2 <- list(sdate = c('20000101', '20010102', '20020103', '20030104', '20040105'), + var = 'tas', + lon = 1.:4., + lat = 1.:4.) +dates2 <- c('20000101', '20010102', '20020103', '20030104', '20040105') +dates2 <- as.Date(dates2, format = "%Y%m%d", tz = "UTC") +dim(dates2) <- c(sdate = 5, ftime = 1) + +# dat3 (without sdate dim) +dat3 <- array(1:5, dim = c(lon = 4, lat = 4, ftime = 2)) +coords3 <- list(sdate = c('20000101', '20010102'), + var = 'tas', + lon = 1.:4., + lat = 1.:4.) +dates3 <- c('20000101', '20010102') +dates3 <- as.Date(dates3, format = "%Y%m%d", tz = "UTC") +dim(dates3) <- c(ftime = 2) + +# dat4 (without ftime dim) +dat4 <- array(1:5, dim = c(sdate = 2, lon = 4, lat = 4)) +coords4 <- list(sdate = c('20000101', '20010102'), + var = 'tas', + lon = 1.:4., + lat = 1.:4.) +dates4 <- c('20000101', '20010102') +dates4 <- as.Date(dates4, format = "%Y%m%d", tz = "UTC") +dim(dates4) <- c(sdate = 2) + +# dates5 (Dates with extra dimensions) +dates5 <- c('20000101', '20010102', '20010102', '20010102') +dates5 <- as.Date(dates5, format = "%Y%m%d", tz = "UTC") +dim(dates5) <- c(ftime = 2, test = 1, test2 = 2) + +############################################## + +test_that("1. Input checks: CST_SaveCube", { + # s2dv_cube + expect_error( + CST_SaveCube(data = 1), + paste0("Parameter 'data' must be of the class 's2dv_cube'.") + ) + # structure + expect_error( + CST_SaveCube(data = cube0), + paste0("Parameter 'data' must have at least 'data' and 'attrs' elements ", + "within the 's2dv_cube' structure.") + ) + cube0 <- list(data = cube0, attrs = 1) + class(cube0) <- 's2dv_cube' + expect_error( + CST_SaveCube(data = cube0), + paste0("Level 'attrs' must be a list with at least 'Dates' element.") + ) + cube0$attrs <- NULL + cube0$attrs$Dates <- dates2 + expect_warning( + CST_SaveCube(data = cube0, sdate_dim = c('sdate', 'sweek'), + ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, single_file = FALSE), + paste0("Element 'coords' not found. No coordinates will be used.") + ) + + # sdate_dim + suppressWarnings( + expect_error( + CST_SaveCube(data = cube1, sdate_dim = 1), + paste0("Parameter 'sdate_dim' must be a character string.") + ) + ) + expect_warning( + CST_SaveCube(data = cube1, sdate_dim = c('sdate', 'sweek'), + ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, extra_string = 'test'), + paste0("Parameter 'sdate_dim' has length greater than 1 and ", + "only the first element will be used.") + ) + suppressWarnings( + expect_error( + CST_SaveCube(data = cube1, sdate_dim = 'a', ftime_dim = 'ftime'), + paste0("Parameter 'sdate_dim' is not found in 'data' dimension.") + ) + ) + # startdates + expect_warning( + CST_SaveCube(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, startdates = 1), + "Parameter 'startdates' is not a character string, it will not be used." + ) + expect_warning( + CST_SaveCube(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, startdates = '20100101'), + paste0("Parameter 'startdates' doesn't have the same length ", + "as dimension '", 'sdate',"', it will not be used.") + ) + # metadata + expect_warning( + CST_SaveCube(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL), + paste0("No metadata found in element Variable from attrs.") + ) + cube1$attrs$Variable$metadata <- 'metadata' + expect_error( + CST_SaveCube(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL), + paste0("Element metadata from Variable element in attrs must be a list.") + ) + cube1$attrs$Variable$metadata <- list(test = 'var') + expect_warning( + CST_SaveCube(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL), + paste0("Metadata is not found for any coordinate.") + ) + cube1$attrs$Variable$metadata <- list(var = 'var') + expect_warning( + CST_SaveCube(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL), + paste0("Metadata is not found for any variable.") + ) + # memb_dim + suppressWarnings( + expect_error( + CST_SaveCube(data = cube1, memb_dim = 1, ftime_dim = 'ftime'), + paste0("Parameter 'memb_dim' must be a character string.") + ) + ) + suppressWarnings( + expect_error( + CST_SaveCube(data = cube1, memb_dim = 'member', ftime_dim = 'ftime'), + paste0("Parameter 'memb_dim' is not found in 'data' dimension. Set it ", + "as NULL if there is no member dimension.") + ) + ) +}) + +############################################## + +test_that("1. Input checks", { + # data + expect_error( + SaveCube(data = NULL), + "Parameter 'data' cannot be NULL." + ) + expect_error( + SaveCube(data = 1:10), + "Parameter 'data' must be an array with named dimensions." + ) + # destination + expect_error( + SaveCube(data = array(1, dim = c(a = 1)), destination = NULL), + paste0("Parameter 'destination' must be a character string of one element ", + "indicating the name of the file (including the folder if needed) ", + "where the data will be saved."), + fixed = TRUE + ) + # Dates + expect_error( + SaveCube(data = array(1, dim = c(a = 1)), Dates = 'a'), + paste0("Parameter 'Dates' must be of 'POSIXct' or 'Dates' class.") + ) + expect_error( + SaveCube(data = array(1, dim = c(a = 1)), + Dates = as.Date('2022-02-01', format = "%Y-%m-%d")), + paste0("Parameter 'Dates' must have dimension names.") + ) + # drop_dims + expect_warning( + SaveCube(data = dat2, coords = coords2, + metadata = list(tas = list(level = '2m')), + Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, drop_dims = 1), + paste0("Parameter 'drop_dims' must be character string containing ", + "the data dimension names to be dropped. It will not be used.") + ) + expect_warning( + SaveCube(data = dat2, coords = coords2, + metadata = list(tas = list(level = '2m')), + Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, drop_dims = 'time'), + paste0("Parameter 'drop_dims' must be character string containing ", + "the data dimension names to be dropped. It will not be used.") + ) + expect_warning( + SaveCube(data = dat2, coords = coords2, + metadata = list(tas = list(level = '2m')), + Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, drop_dims = 'sdate'), + paste0("Parameter 'drop_dims' can only contain dimension names ", + "that are of length 1. It will not be used.") + ) + expect_warning( + SaveCube(data = dat2, coords = coords2, + metadata = list(tas = list(level = '2m')), + Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, drop_dims = 'ftime'), + paste0("Parameter 'drop_dims' contains dimensions used in the ", + "computation. It will not be used.") + ) + # varname + suppressWarnings( + expect_error( + SaveCube(data = dat2, coords = coords2, varname = 1, + metadata = list(tas = list(level = '2m')), + Dates = dates2), + "Parameter 'varname' must be a character." + ) + ) + # varname, metadata, spatial coords, unknown dim + expect_error( + SaveCube(data = dat1, varname = 1, ftime_dim = NULL, sdate_dim = NULL, + memb_dim = NULL, dat_dim = NULL, var_dim = NULL), + paste0("Parameter 'varname' must be a character string with the ", + "variable names.") + ) + # ftime_dim + expect_error( + SaveCube(data = dat4, coords = coords4, + metadata = list(tas = list(level = '2m')), + Dates = dates4, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL), + paste0("Parameter 'ftime_dim' is not found in 'data' dimension.") + ) + # Dates dimension check + expect_error( + SaveCube(data = dat4, coords = coords4, + metadata = list(tas = list(level = '2m')), + Dates = dates4, ftime_dim = NULL, memb_dim = NULL, + dat_dim = NULL, var_dim = NULL), + paste0("Parameter 'Dates' must have 'ftime_dim'.") + ) + expect_warning( + SaveCube(data = dat4, coords = coords4, + metadata = list(tas = list(level = '2m')), + Dates = NULL, ftime_dim = NULL, memb_dim = NULL, + dat_dim = NULL, var_dim = NULL), + paste0("Dates must be provided if 'data' must be saved in separated files. ", + "All data will be saved in a single file.") + ) + # Without ftime and sdate + expect_error( + SaveCube(data = dat3, coords = coords3, + metadata = list(tas = list(level = '2m')), + Dates = dates5, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, sdate_dim = NULL), + paste0("Parameter 'Dates' must have only 'sdate_dim' and 'ftime_dim' dimensions.") + ) + expect_warning( + SaveCube(data = dat2, coords = coords2, + metadata = list(tas = list(level = '2m')), + startdates = c(paste(1:11, collapse = '')), + Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, sdate_dim = 'sdate'), + paste0("Parameter 'startdates' should be a character string containing ", + "the start dates in the format 'yyyy-mm-dd', 'yyyymmdd', 'yyyymm', ", + "'POSIXct' or 'Dates' class. Files will be named with Dates instead.") + ) + expect_warning( + SaveCube(data = dat2, coords = coords2, + metadata = list(tas = list(level = '2m')), + Dates = NULL, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, sdate_dim = 'sdate'), + paste0("Dates must be provided if 'data' must be saved in separated files. ", + "All data will be saved in a single file.") + ) + # (dat3) Without sdate_dim + expect_warning( + SaveCube(data = dat3, coords = coords3, + metadata = list(tas = list(level = '2m')), + Dates = NULL, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, sdate_dim = NULL, + extra_string = 'nosdate3.nc', single_file = FALSE), + paste0("Dates must be provided if 'data' must be saved in separated files. ", + "All data will be saved in a single file.") + ) + # (dat4) Without ftime_dim + expect_error( + SaveCube(data = dat4, coords = coords4, + metadata = list(tas = list(level = '2m')), + Dates = dates4, ftime_dim = NULL, memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, sdate_dim = 'sdate', + single_file = TRUE), + paste0("Parameter 'Dates' must have 'ftime_dim'.") + ) +}) + +############################################## diff --git a/tests/testthat/test-CST_SaveExp.R b/tests/testthat/test-CST_SaveExp.R index f39dffe9..17226161 100644 --- a/tests/testthat/test-CST_SaveExp.R +++ b/tests/testthat/test-CST_SaveExp.R @@ -31,8 +31,10 @@ cube3 <- cube1 # dat0 dates0 <- as.Date('2022-02-01', format = "%Y-%m-%d") dim(dates0) <- c(sdate = 1) + # dat1 dat1 <- array(1, dim = c(test = 1)) + # dat2 dat2 <- array(1:5, dim = c(sdate = 5, lon = 4, lat = 4, ftime = 1)) coords2 <- list(sdate = c('20000101', '20010102', '20020103', '20030104', '20040105'), @@ -43,6 +45,31 @@ dates2 <- c('20000101', '20010102', '20020103', '20030104', '20040105') dates2 <- as.Date(dates2, format = "%Y%m%d", tz = "UTC") dim(dates2) <- c(sdate = 5, ftime = 1) +# dat3 (without sdate dim) +dat3 <- array(1:5, dim = c(lon = 4, lat = 4, ftime = 2)) +coords3 <- list(sdate = c('20000101', '20010102'), + var = 'tas', + lon = 1.:4., + lat = 1.:4.) +dates3 <- c('20000101', '20010102') +dates3 <- as.Date(dates3, format = "%Y%m%d", tz = "UTC") +dim(dates3) <- c(ftime = 2) + +# dat4 (without ftime dim) +dat4 <- array(1:5, dim = c(sdate = 2, lon = 4, lat = 4)) +coords4 <- list(sdate = c('20000101', '20010102'), + var = 'tas', + lon = 1.:4., + lat = 1.:4.) +dates4 <- c('20000101', '20010102') +dates4 <- as.Date(dates4, format = "%Y%m%d", tz = "UTC") +dim(dates4) <- c(sdate = 2) + +# dates5 (Dates with extra dimensions) +dates5 <- c('20000101', '20010102', '20010102', '20010102') +dates5 <- as.Date(dates5, format = "%Y%m%d", tz = "UTC") +dim(dates5) <- c(ftime = 2, test = 1, test2 = 2) + ############################################## test_that("1. Input checks: CST_SaveExp", { @@ -63,14 +90,14 @@ test_that("1. Input checks: CST_SaveExp", { CST_SaveExp(data = cube0), paste0("Level 'attrs' must be a list with at least 'Dates' element.") ) - # cube0$attrs <- NULL - # cube0$attrs$Dates <- dates2 - # expect_warning( - # CST_SaveExp(data = cube0, sdate_dim = c('sdate', 'sweek'), - # ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, - # var_dim = NULL, single_file = FALSE), - # paste0("Element 'coords' not found. No coordinates will be used.") - # ) + cube0$attrs <- NULL + cube0$attrs$Dates <- dates2 + expect_warning( + CST_SaveExp(data = cube0, sdate_dim = c('sdate', 'sweek'), + ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, single_file = FALSE), + paste0("Element 'coords' not found. No coordinates will be used.") + ) # sdate_dim suppressWarnings( @@ -79,37 +106,37 @@ test_that("1. Input checks: CST_SaveExp", { paste0("Parameter 'sdate_dim' must be a character string.") ) ) - # expect_warning( - # CST_SaveExp(data = cube1, sdate_dim = c('sdate', 'sweek'), - # ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, - # var_dim = NULL), - # paste0("Parameter 'sdate_dim' has length greater than 1 and ", - # "only the first element will be used.") - # ) + expect_warning( + CST_SaveExp(data = cube1, sdate_dim = c('sdate', 'sweek'), + ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, extra_string = 'test'), + paste0("Parameter 'sdate_dim' has length greater than 1 and ", + "only the first element will be used.") + ) suppressWarnings( expect_error( CST_SaveExp(data = cube1, sdate_dim = 'a', ftime_dim = 'ftime'), paste0("Parameter 'sdate_dim' is not found in 'data' dimension.") ) ) - # # startdates - # expect_warning( - # CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, - # dat_dim = NULL, var_dim = NULL, startdates = 1), - # "Parameter 'startdates' is not a character string, it will not be used." - # ) - # expect_warning( - # CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, - # dat_dim = NULL, var_dim = NULL, startdates = '20100101'), - # paste0("Parameter 'startdates' doesn't have the same length ", - # "as dimension '", 'sdate',"', it will not be used.") - # ) - # # metadata - # expect_warning( - # CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, - # dat_dim = NULL, var_dim = NULL), - # paste0("No metadata found in element Variable from attrs.") - # ) + # startdates + expect_warning( + CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, startdates = 1), + "Parameter 'startdates' is not a character string, it will not be used." + ) + expect_warning( + CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, startdates = '20100101'), + paste0("Parameter 'startdates' doesn't have the same length ", + "as dimension '", 'sdate',"', it will not be used.") + ) + # metadata + expect_warning( + CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL), + paste0("No metadata found in element Variable from attrs.") + ) cube1$attrs$Variable$metadata <- 'metadata' expect_error( CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, @@ -117,17 +144,17 @@ test_that("1. Input checks: CST_SaveExp", { paste0("Element metadata from Variable element in attrs must be a list.") ) cube1$attrs$Variable$metadata <- list(test = 'var') - # expect_warning( - # CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, - # dat_dim = NULL, var_dim = NULL), - # paste0("Metadata is not found for any coordinate.") - # ) + expect_warning( + CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL), + paste0("Metadata is not found for any coordinate.") + ) cube1$attrs$Variable$metadata <- list(var = 'var') - # expect_warning( - # CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, - # dat_dim = NULL, var_dim = NULL), - # paste0("Metadata is not found for any variable.") - # ) + expect_warning( + CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL), + paste0("Metadata is not found for any variable.") + ) # memb_dim suppressWarnings( expect_error( @@ -174,39 +201,40 @@ test_that("1. Input checks", { Dates = as.Date('2022-02-01', format = "%Y-%m-%d")), paste0("Parameter 'Dates' must have dimension names.") ) - # # drop_dims - # expect_warning( - # SaveExp(data = dat2, coords = coords2, - # metadata = list(tas = list(level = '2m')), - # Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, - # dat_dim = NULL, var_dim = NULL, drop_dims = 1), - # paste0("Parameter 'drop_dims' must be character string containing ", - # "the data dimension names to be dropped. It will not be used.") - # ) - # expect_warning( - # SaveExp(data = dat2, coords = coords2, - # metadata = list(tas = list(level = '2m')), - # Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, - # dat_dim = NULL, var_dim = NULL, drop_dims = 'time'), - # paste0("Parameter 'drop_dims' must be character string containing ", - # "the data dimension names to be dropped. It will not be used.") - # ) - # expect_warning( - # SaveExp(data = dat2, coords = coords2, - # metadata = list(tas = list(level = '2m')), - # Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, - # dat_dim = NULL, var_dim = NULL, drop_dims = 'sdate'), - # paste0("Parameter 'drop_dims' can only contain dimension names ", - # "that are of length 1. It will not be used.") - # ) - # # varname - # expect_warning( - # SaveExp(data = dat2, coords = coords2, - # metadata = list(tas = list(level = '2m')), - # Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, - # dat_dim = NULL, var_dim = NULL), - # paste0("Parameter 'varname' is NULL. It will be assigned to 'X'.") - # ) + # drop_dims + expect_warning( + SaveExp(data = dat2, coords = coords2, + metadata = list(tas = list(level = '2m')), + Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, drop_dims = 1), + paste0("Parameter 'drop_dims' must be character string containing ", + "the data dimension names to be dropped. It will not be used.") + ) + expect_warning( + SaveExp(data = dat2, coords = coords2, + metadata = list(tas = list(level = '2m')), + Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, drop_dims = 'time'), + paste0("Parameter 'drop_dims' must be character string containing ", + "the data dimension names to be dropped. It will not be used.") + ) + expect_warning( + SaveExp(data = dat2, coords = coords2, + metadata = list(tas = list(level = '2m')), + Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, drop_dims = 'sdate'), + paste0("Parameter 'drop_dims' can only contain dimension names ", + "that are of length 1. It will not be used.") + ) + expect_warning( + SaveExp(data = dat2, coords = coords2, + metadata = list(tas = list(level = '2m')), + Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, drop_dims = 'ftime'), + paste0("Parameter 'drop_dims' contains dimensions used in the ", + "computation. It will not be used.") + ) + # varname suppressWarnings( expect_error( SaveExp(data = dat2, coords = coords2, varname = 1, @@ -215,30 +243,82 @@ test_that("1. Input checks", { "Parameter 'varname' must be a character." ) ) - # # coords - # expect_warning( - # SaveExp(data = dat2, coords = list(sdate = coords2[[1]]), - # varname = 'tas', metadata = list(tas = list(level = '2m')), - # Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, - # dat_dim = NULL, var_dim = NULL), - # "Coordinate 'lon' is not provided and it will be set as index in element coords.", - # "Coordinate 'lat' is not provided and it will be set as index in element coords.", - # "Coordinate 'ftime' is not provided and it will be set as index in element coords." - # ) - # # varname, metadata, spatial coords, unknown dim - # expect_warning( - # SaveExp(data = dat1, ftime_dim = NULL, sdate_dim = NULL, memb_dim = NULL, - # dat_dim = NULL, var_dim = NULL, single_file = TRUE), - # "Parameter 'varname' is NULL. It will be assigned to 'X'.", - # "Parameter 'metadata' is not provided so the metadata saved will be incomplete.", - # paste0("Spatial coordinates not found.") - # ) + # varname, metadata, spatial coords, unknown dim expect_error( SaveExp(data = dat1, varname = 1, ftime_dim = NULL, sdate_dim = NULL, memb_dim = NULL, dat_dim = NULL, var_dim = NULL), paste0("Parameter 'varname' must be a character string with the ", "variable names.") ) + # ftime_dim + expect_error( + SaveExp(data = dat4, coords = coords4, + metadata = list(tas = list(level = '2m')), + Dates = dates4, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL), + paste0("Parameter 'ftime_dim' is not found in 'data' dimension.") + ) + # Dates dimension check + expect_error( + SaveExp(data = dat4, coords = coords4, + metadata = list(tas = list(level = '2m')), + Dates = dates4, ftime_dim = NULL, memb_dim = NULL, + dat_dim = NULL, var_dim = NULL), + paste0("Parameter 'Dates' must have 'ftime_dim'.") + ) + expect_warning( + SaveExp(data = dat4, coords = coords4, + metadata = list(tas = list(level = '2m')), + Dates = NULL, ftime_dim = NULL, memb_dim = NULL, + dat_dim = NULL, var_dim = NULL), + paste0("Dates must be provided if 'data' must be saved in separated files. ", + "All data will be saved in a single file.") + ) + # Without ftime and sdate + expect_error( + SaveExp(data = dat3, coords = coords3, + metadata = list(tas = list(level = '2m')), + Dates = dates5, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, sdate_dim = NULL), + paste0("Parameter 'Dates' must have only 'sdate_dim' and 'ftime_dim' dimensions.") + ) + expect_warning( + SaveExp(data = dat2, coords = coords2, + metadata = list(tas = list(level = '2m')), + startdates = c(paste(1:11, collapse = '')), + Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, sdate_dim = 'sdate'), + paste0("Parameter 'startdates' should be a character string containing ", + "the start dates in the format 'yyyy-mm-dd', 'yyyymmdd', 'yyyymm', ", + "'POSIXct' or 'Dates' class. Files will be named with Dates instead.") + ) + expect_warning( + SaveExp(data = dat2, coords = coords2, + metadata = list(tas = list(level = '2m')), + Dates = NULL, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, sdate_dim = 'sdate'), + paste0("Dates must be provided if 'data' must be saved in separated files. ", + "All data will be saved in a single file.") + ) + # (dat3) Without sdate_dim + expect_warning( + SaveExp(data = dat3, coords = coords3, + metadata = list(tas = list(level = '2m')), + Dates = NULL, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, sdate_dim = NULL, + extra_string = 'nosdate3.nc', single_file = FALSE), + paste0("Dates must be provided if 'data' must be saved in separated files. ", + "All data will be saved in a single file.") + ) + # (dat4) Without ftime_dim + expect_error( + SaveExp(data = dat4, coords = coords4, + metadata = list(tas = list(level = '2m')), + Dates = dates4, ftime_dim = NULL, memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, sdate_dim = 'sdate', + single_file = TRUE), + paste0("Parameter 'Dates' must have 'ftime_dim'.") + ) }) ############################################## -- GitLab From b869c87ef843f41445aa6322c916aa32bd1c6e9a Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 29 Nov 2023 16:14:33 +0100 Subject: [PATCH 03/16] Regenerate documentation and add example scripts --- NAMESPACE | 2 - R/CST_SaveExp.R | 67 +++++--- inst/doc/usecase.md | 11 ++ inst/doc/usecase/UseCase4_CST_SaveExp.R | 215 ++++++++++++++++++++++++ man/CST_SaveCube.Rd | 114 ------------- man/CST_SaveExp.Rd | 11 +- man/SaveCube.Rd | 143 ---------------- man/SaveExp.Rd | 10 +- 8 files changed, 289 insertions(+), 284 deletions(-) create mode 100644 inst/doc/usecase.md create mode 100644 inst/doc/usecase/UseCase4_CST_SaveExp.R delete mode 100644 man/CST_SaveCube.Rd delete mode 100644 man/SaveCube.Rd diff --git a/NAMESPACE b/NAMESPACE index 35bd3c4f..f6cc47b3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -34,7 +34,6 @@ export(CST_RFTemp) export(CST_RFWeights) export(CST_RainFARM) export(CST_RegimesAssign) -export(CST_SaveCube) export(CST_SaveExp) export(CST_SplitDim) export(CST_Start) @@ -62,7 +61,6 @@ export(RFTemp) export(RF_Weights) export(RainFARM) export(RegimesAssign) -export(SaveCube) export(SaveExp) export(SplitDim) export(WeatherRegime) diff --git a/R/CST_SaveExp.R b/R/CST_SaveExp.R index 72b45b41..ebe25941 100644 --- a/R/CST_SaveExp.R +++ b/R/CST_SaveExp.R @@ -46,6 +46,12 @@ #'@param extra_string A character string to be include as part of the file name, #' for instance, to identify member or realization. It would be added to the #' file name between underscore characters. +#'@param units_hours_since (Optional) A logical value only used for the case +#' Dates have forecast time and start date dimension and single_file is TRUE. +#' When it is TRUE, it saves the forecast time with units of 'hours since'; +#' if it is FALSE, the time units will be a number of time steps with its +#' corresponding frequency (e.g. n days, n months or n hours). It is TRUE +#' by default. #' #'@return Multiple or single NetCDF files containing the data array.\cr #'\item{\code{single_file = TRUE}}{ @@ -84,7 +90,7 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', var_dim = 'var', memb_dim = 'member', startdates = NULL, drop_dims = NULL, single_file = FALSE, extra_string = NULL, - global_attrs = NULL) { + global_attrs = NULL, units_hours_since = TRUE) { # Check 's2dv_cube' if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of the class 's2dv_cube'.") @@ -140,13 +146,12 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', } # startdates if (is.null(startdates)) { - startdates <- data$coords[[sdate_dim]] - } else { - if (!is.character(startdates)) { - warning(paste0("Parameter 'startdates' is not a character string, ", - "it will not be used.")) + if (is.character(data$coords[[sdate_dim]])) { startdates <- data$coords[[sdate_dim]] } + } + + if (!is.null(startdates)) { if (!is.null(sdate_dim)) { if (dim(data$data)[sdate_dim] != length(startdates)) { warning(paste0("Parameter 'startdates' doesn't have the same length ", @@ -170,7 +175,8 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', drop_dims = drop_dims, extra_string = extra_string, single_file = single_file, - global_attrs = global_attrs) + global_attrs = global_attrs, + units_hours_since = units_hours_since) } #'Save a multidimensional array with metadata to data in NetCDF format #'@description This function allows to save a data array with metadata into a @@ -279,7 +285,7 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, startdates = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', ftime_dim = 'time', var_dim = 'var', memb_dim = 'member', drop_dims = NULL, single_file = FALSE, extra_string = NULL, - global_attrs = NULL) { + global_attrs = NULL, units_hours_since = TRUE) { ## Initial checks # data if (is.null(data)) { @@ -297,7 +303,7 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, } # Dates if (!is.null(Dates)) { - if (!inherits(Dates, "POSIXct") & !inherits(Dates, "Date")) { + if (!any(inherits(Dates, "POSIXct"), inherits(Dates, "Date"))) { stop("Parameter 'Dates' must be of 'POSIXct' or 'Dates' class.") } if (is.null(dim(Dates))) { @@ -468,14 +474,15 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, } if (is.null(startdates)) { startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') - } else if (any(inherits(startdates, "POSIXct"), inherits(startdates, "Date"))) { - startdates <- format(startdates, "%Y%m%d") } else if (any(nchar(startdates) > 10, nchar(startdates) < 1)) { warning("Parameter 'startdates' should be a character string containing ", "the start dates in the format 'yyyy-mm-dd', 'yyyymmdd', 'yyyymm', ", "'POSIXct' or 'Dates' class. Files will be named with Dates instead.") startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') } + if (any(inherits(startdates, "POSIXct"), inherits(startdates, "Date"))) { + startdates <- format(startdates, "%Y%m%d") + } } else if (any(ftime_dim %in% names(dim(Dates)))) { if (all(dim(Dates)[!names(dim(Dates)) %in% c(ftime_dim)] == 1)) { dim(Dates) <- dim(Dates)[names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] @@ -554,6 +561,7 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, if ('variables' %in% names(attributes(metadata[[i_coord]]))) { # from Start: 'lon' or 'lat' attrs <- attributes(metadata[[i_coord]])[['variables']] + attrs[[i_coord]]$dim <- NULL attr(coords[[i_coord]], 'variables') <- attrs } else if (inherits(metadata[[i_coord]], 'list')) { # from Start and Load: main var @@ -571,7 +579,6 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, # Reorder coords coords[c(names(coords)[!names(coords) %in% filedims])] <- NULL coords <- coords[filedims] - defined_vars <- list() if (!single_file) { for (i in 1:n_datasets) { @@ -617,7 +624,6 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, attr(coords[[dat_dim]], 'variables') <- list(list(units = 'adim')) # extra_info_dim[[dat_dim]] <- list(Datasets = paste(Datasets, collapse = ', ')) } - first_sdate <- last_sdate <- NULL if (!is.null(Dates)) { if (is.null(sdate_dim)) { sdates <- Dates[1] @@ -633,8 +639,6 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, attr(coords[[sdate_dim]], 'variables') <- list(list(units = paste('hours since', sdates[1]), calendar = 'proleptic_gregorian', longname = sdate_dim)) - first_sdate <- sdates[1] - last_sdate <- sdates[length(sdates)] # ftime definition Dates <- Reorder(Dates, c(ftime_dim, sdate_dim)) differ_ftime <- array(dim = dim(Dates)) @@ -648,14 +652,30 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, "correctly.") } } - - # Save in units 'hours since' - dim(leadtimes) <- dim(data)[ftime_dim] - coords[[ftime_dim]] <- leadtimes - attr(coords[[ftime_dim]], 'variables') <- list(list(units = paste('hours since', - paste(sdates, collapse = ', ')), + if (!units_hours_since) { + if (all(diff(leadtimes/24) == 1)) { + # daily values + units <- 'days' + vals <- round(leadtimes/24) + 1 + } else if (all(diff(leadtimes/24) %in% c(28, 29, 30, 31))) { + # monthly values + units <- 'months' + vals <- round(leadtimes/730) + 1 + } else { + # other frequency + units <- 'hours' + vals <- leadtimes + 1 + } + } else { + units <- paste('hours since', paste(sdates, collapse = ', ')) + vals <- leadtimes + } + dim(vals) <- dim(data)[ftime_dim] + coords[[ftime_dim]] <- vals + attr(coords[[ftime_dim]], 'variables') <- list(list(units = units, calendar = 'proleptic_gregorian', - longname = ftime_dim, unlim = TRUE)) + longname = ftime_dim, + unlim = TRUE)) } # var definition @@ -670,6 +690,7 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, coords[[varname_j]] <- Subset(data, var_dim, j, drop = 'selected') } if (!is.null(metadata_j)) { + metadata_j$dim <- NULL attr(coords[[varname_j]], 'variables') <- list(metadata_j) names(attributes(coords[[varname_j]])$variables) <- varname_j } @@ -679,6 +700,8 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, } } if (is.null(extra_string)) { + first_sdate <- startdates[1] + last_sdate <- startdates[length(startdates)] gsub("-", "", first_sdate) file_name <- paste0(paste(c(varname, gsub("-", "", first_sdate), diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md new file mode 100644 index 00000000..5a2dc8ba --- /dev/null +++ b/inst/doc/usecase.md @@ -0,0 +1,11 @@ +# Usecase scripts + +In this document, you can link to the example scripts for different usage of the function: + +1. **Climate data assesment and downscaling** + 1. [Bias adjustment for assessment of an extreme event](inst/doc/usecase/UseCase1_WindEvent_March2018.R) + 2. [Precipitation Downscaling with RainFARM RF 4](inst/doc/usecase/UseCase2_PrecipitationDownscaling_RainFARM_RF4.R) + 3. [Precipitation Downscaling with RainFARM RF 100](inst/doc/usecase/UseCase2_PrecipitationDownscaling_RainFARM_RF100.R) + +2. **Examples using 's2dv_cube'** + 1. [Save 's2dv_cube'](inst/doc/usecase/UseCase4_CST_SaveCube.R) \ No newline at end of file diff --git a/inst/doc/usecase/UseCase4_CST_SaveExp.R b/inst/doc/usecase/UseCase4_CST_SaveExp.R new file mode 100644 index 00000000..5e9ded12 --- /dev/null +++ b/inst/doc/usecase/UseCase4_CST_SaveExp.R @@ -0,0 +1,215 @@ +#**************************************************************************** +# Script to test examples of CST_SaveExp +# Eva Rifà Rovira +# 29/11/2024 +#**************************************************************************** + +# Tests 1: Multidimensional array and Dates, without metadata and coordinates +# (1) Minimal use case, without Dates +data <- array(1:5, dim = c(sdate = 5, lon = 4, lat = 4)) +SaveExp(data, ftime_dim = NULL, memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, single_file = TRUE) +SaveExp(data, ftime_dim = NULL, memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, sdate_dim = NULL, single_file = TRUE) # same result + +# (2) Forecast time dimension, without Dates +data <- array(1:5, dim = c(ftime = 5, lon = 4, lat = 4)) +SaveExp(data, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, sdate_dim = NULL, single_file = TRUE) + +# (2) Start date dimension, without Dates +data <- array(1:5, dim = c(sdate = 5, lon = 4, lat = 4)) +SaveExp(data, ftime_dim = NULL, memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, sdate_dim = 'sdate', single_file = TRUE) + +# (3) Only forecast time dimension (no sdate), with Dates +data <- array(1:5, dim = c(ftime = 5, lon = 4, lat = 4)) +dates <- c('20000101', '20010102', '20020103', '20030104', '20040105') +dates <- as.Date(dates, format = "%Y%m%d", tz = "UTC") +dim(dates) <- c(ftime = 5) +SaveExp(data, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, sdate_dim = NULL, Dates = dates, single_file = FALSE) +# For this case we have the same result using: single_file = FALSE /TRUE. + +# (4) Forecast time and 1 sdate, with Dates +data <- array(1:5, dim = c(sdate = 1, ftime = 5, lon = 4, lat = 4)) +dates <- c('20000101', '20010102', '20020103', '20030104', '20040105') +dates <- as.Date(dates, format = "%Y%m%d", tz = "UTC") +dim(dates) <- c(sdate = 1, ftime = 5) +SaveExp(data, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, sdate_dim = 'sdate', Dates = dates, single_file = TRUE) +SaveExp(data, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, sdate_dim = 'sdate', Dates = dates, single_file = FALSE) + +################################################################################ + +# Tests 2: Test sample data from CSTools + +# (a) Data loaded with Start +# (1) lonlat_temp_st$exp in a single file with units 'hours since' + +data <- lonlat_temp_st$exp +CST_SaveExp(data = data, ftime_dim = 'ftime', + var_dim = 'var', dat_dim = 'dataset', sdate_dim = 'sdate', + single_file = TRUE) + +# Now we read the output with Start: +sdate <- as.vector(lonlat_temp_st$exp$coords$sdate) +path <- paste0(getwd(),"/$var$_", sdate[1], "_", sdate[length(sdate)], ".nc") +out <- Start(dat = path, + var = 'tas', + lon = 'all', + lat = 'all', + ftime = 'all', + sdate = 'all', + member = 'all', + return_vars = list(lon = 'dat', + lat = 'dat', + ftime = NULL, + sdate = NULL), + retrieve = TRUE) + +attributes(out)$Variables$common$ftime + +# (2) lonlat_temp_st$exp in a single file with units of time frequency +data <- lonlat_temp_st$exp +CST_SaveExp(data = data, ftime_dim = 'ftime', + var_dim = 'var', dat_dim = 'dataset', sdate_dim = 'sdate', + single_file = TRUE, units_hours_since = FALSE) + +# Now we read the output with Start: +sdate <- as.vector(lonlat_temp_st$exp$coords$sdate) +path <- paste0(getwd(),"/$var$_", sdate[1], "_", sdate[length(sdate)], ".nc") +out <- Start(dat = path, + var = 'tas', + lon = 'all', + lat = 'all', + ftime = 'all', + sdate = 'all', + member = 'all', + return_vars = list( + lon = 'dat', + lat = 'dat', + ftime = NULL, + sdate = NULL), + retrieve = TRUE) + +attributes(out)$Variables$common$ftime +# [1] "1 months" "2 months" "3 months" + +# (3) lonlat_temp_st$exp in separated files with units of hours since +data <- lonlat_temp_st$exp +CST_SaveExp(data = data, ftime_dim = 'ftime', + var_dim = 'var', dat_dim = 'dataset', sdate_dim = 'sdate', + single_file = FALSE) +sdate <- as.vector(lonlat_temp_st$exp$coords$sdate) +path <- paste0(getwd(),"/dat1/$var$/$var$_$sdate$.nc") + +out <- Start(dat = path, var = 'tas', + sdate = sdate, + lon = 'all', + lat = 'all', + ftime = 'all', + member = 'all', + return_vars = list(lon = 'dat', + lat = 'dat', + ftime = NULL), + retrieve = TRUE) + +# (4) lonlat_prec_st$exp in a single file with units of time frequency +data <- lonlat_prec_st +CST_SaveExp(data = data, ftime_dim = 'ftime', + var_dim = 'var', dat_dim = 'dataset', sdate_dim = 'sdate', + single_file = TRUE, units_hours_since = FALSE) + +# Now we read the output with Start: +sdate <- as.vector(data$coords$sdate) +path <- paste0(getwd(),"/$var$_", sdate[1], "_", sdate[length(sdate)], ".nc") +out <- Start(dat = path, + var = 'prlr', + lon = 'all', + lat = 'all', + ftime = 'all', + sdate = 'all', + member = 'all', + return_vars = list( + lon = 'dat', + lat = 'dat', + ftime = NULL, + sdate = NULL), + retrieve = TRUE) + +attributes(out)$Variables$common$sdate +# [1] "1 months" "2 months" "3 months" + +# (b) Data loaded with Load +data <- lonlat_temp$exp +data <- lonlat_temp$obs +dtaa <- lonlat_prec +CST_SaveExp(data = data, ftime_dim = 'ftime', + var_dim = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', + single_file = TRUE, units_hours_since = FALSE) +# Error + + +################################################################################ +# Test 3: Special cases + +# (1) two variables and two datasets in separated files + +repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + +data3 <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = c('tas', 'sfcWind'), + sdate = c('20160101', '20170101'), + ensemble = indices(1), + time = indices(1:2), + lat = indices(1:10), + lon = indices(1:10), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = 'dat', + latitude = 'dat'), + metadata_dims = c('dat', 'var'), + retrieve = T + ) +cube3 <- as.s2dv_cube(data3) + +CST_SaveExp(data = cube3, ftime_dim = 'time', var_dim = 'var', + memb_dim = 'ensemble', dat_dim = 'dat') + +# We read again the data with start +repos <- paste0(getwd(), "/system4_m1/$var$/$var$_$sdate$.nc") +repos2 <- paste0(getwd(), "/system5_m1/$var$/$var$_$sdate$.nc") + +data3out <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = c('tas', 'sfcWind'), + sdate = c('20160101', '20170101'), + ensemble = indices(1), + time = indices(1:2), + lat = indices(1:10), + lon = indices(1:10), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = 'dat', + latitude = 'dat'), + metadata_dims = c('dat', 'var'), + retrieve = T) + +summary(data3out) +summary(data3) + +dim(data3) +dim(data3out) + +# (1) two variables and two datasets in the same file + +CST_SaveExp(data = cube3, ftime_dim = 'time', var_dim = 'var', + memb_dim = 'ensemble', dat_dim = 'dat', + single_file = TRUE) +################################################################################ \ No newline at end of file diff --git a/man/CST_SaveCube.Rd b/man/CST_SaveCube.Rd deleted file mode 100644 index cb12f09f..00000000 --- a/man/CST_SaveCube.Rd +++ /dev/null @@ -1,114 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CST_SaveCube.R -\name{CST_SaveCube} -\alias{CST_SaveCube} -\title{Save objects of class 's2dv_cube' to data in NetCDF format} -\usage{ -CST_SaveCube( - data, - destination = "./", - sdate_dim = "sdate", - ftime_dim = "time", - dat_dim = "dataset", - var_dim = "var", - memb_dim = "member", - startdates = NULL, - drop_dims = NULL, - single_file = FALSE, - extra_string = NULL -) -} -\arguments{ -\item{data}{An object of class \code{s2dv_cube}.} - -\item{destination}{A character string containing the directory name in which -to save the data. NetCDF file for each starting date are saved into the -folder tree: \cr -destination/Dataset/variable/. By default the function -creates and saves the data into the working directory.} - -\item{sdate_dim}{A character string indicating the name of the start date -dimension. By default, it is set to 'sdate'. It can be NULL if there is no -start date dimension.} - -\item{ftime_dim}{A character string indicating the name of the forecast time -dimension. By default, it is set to 'time'. It can be NULL if there is no -forecast time dimension.} - -\item{dat_dim}{A character string indicating the name of dataset dimension. -By default, it is set to 'dataset'. It can be NULL if there is no dataset -dimension.} - -\item{var_dim}{A character string indicating the name of variable dimension. -By default, it is set to 'var'. It can be NULL if there is no variable -dimension.} - -\item{memb_dim}{A character string indicating the name of the member dimension. -By default, it is set to 'member'. It can be NULL if there is no member -dimension.} - -\item{startdates}{A vector of dates that will be used for the filenames -when saving the data in multiple files. It must be a vector of the same -length as the start date dimension of data. It must be a vector of class -\code{Dates}, \code{'POSIXct'} or character with lenghts between 1 and 10. -If it is NULL, the coordinate corresponding the the start date dimension or -the first Date of each time step will be used as the name of the files. -It is NULL by default.} - -\item{drop_dims}{A vector of character strings indicating the dimension names -of length 1 that need to be dropped in order that they don't appear in the -netCDF file. It is NULL by default (optional).} - -\item{single_file}{A logical value indicating if all object is saved in a -single file (TRUE) or in multiple files (FALSE). When it is FALSE, -the array is separated for Datasets, variable and start date. It is FALSE -by default.} - -\item{extra_string}{A character string to be include as part of the file name, -for instance, to identify member or realization. It would be added to the -file name between underscore characters.} -} -\value{ -Multiple or single NetCDF files containing the data array.\cr -\item{\code{single_file = TRUE}}{ - All data is saved in a single file located in the specified destination - path with the following name: - ___.nc. Multiple - variables are saved separately in the same file. The forecast time units - is extracted from the frequency of the time steps (hours, days, months). - The first value of forecast time is 1. If no frequency is found, the units - will be 'hours since' each start date and the time steps are assumed to be - equally spaced. -} -\item{\code{single_file = FALSE}}{ - The data array is subset and stored into multiple files. Each file - contains the data subset for each start date, variable and dataset. Files - with different variables and Datasets are stored in separated directories - within the following directory tree: destination/Dataset/variable/. - The name of each file will be: - __.nc. -} -} -\description{ -This function allows to divide and save a object of class -'s2dv_cube' into a NetCDF file, allowing to reload the saved data using -\code{Start} function from StartR package. If the original 's2dv_cube' object -has been created from \code{CST_Load()}, then it can be reloaded with -\code{Load()}. -} -\examples{ -\dontrun{ -data <- lonlat_temp_st$exp -destination <- "./" -CST_SaveExp(data = data, destination = destination, ftime_dim = 'ftime', - var_dim = 'var', dat_dim = 'dataset') -} - -} -\seealso{ -\code{\link[startR]{Start}}, \code{\link{as.s2dv_cube}} and -\code{\link{s2dv_cube}} -} -\author{ -Perez-Zanon Nuria, \email{nuria.perez@bsc.es} -} diff --git a/man/CST_SaveExp.Rd b/man/CST_SaveExp.Rd index 9352e036..8659b215 100644 --- a/man/CST_SaveExp.Rd +++ b/man/CST_SaveExp.Rd @@ -15,7 +15,9 @@ CST_SaveExp( startdates = NULL, drop_dims = NULL, single_file = FALSE, - extra_string = NULL + extra_string = NULL, + global_attrs = NULL, + units_hours_since = TRUE ) } \arguments{ @@ -67,6 +69,13 @@ by default.} \item{extra_string}{A character string to be include as part of the file name, for instance, to identify member or realization. It would be added to the file name between underscore characters.} + +\item{units_hours_since}{(Optional) A logical value only used for the case +Dates have forecast time and start date dimension and single_file is TRUE. +When it is TRUE, it saves the forecast time with units of 'hours since'; +if it is FALSE, the time units will be a number of time steps with its +corresponding frequency (e.g. n days, n months or n hours). It is TRUE +by default.} } \value{ Multiple or single NetCDF files containing the data array.\cr diff --git a/man/SaveCube.Rd b/man/SaveCube.Rd deleted file mode 100644 index da9a1ed4..00000000 --- a/man/SaveCube.Rd +++ /dev/null @@ -1,143 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CST_SaveCube.R -\name{SaveCube} -\alias{SaveCube} -\title{Save a multidimensional array with metadata to data in NetCDF format} -\usage{ -SaveCube( - data, - destination = "./", - Dates = NULL, - coords = NULL, - varname = NULL, - metadata = NULL, - Datasets = NULL, - startdates = NULL, - dat_dim = "dataset", - sdate_dim = "sdate", - ftime_dim = "time", - var_dim = "var", - memb_dim = "member", - drop_dims = NULL, - single_file = FALSE, - extra_string = NULL, - global_attrs = NULL -) -} -\arguments{ -\item{data}{A multi-dimensional array with named dimensions.} - -\item{destination}{A character string indicating the path where to store the -NetCDF files.} - -\item{Dates}{A named array of dates with the corresponding sdate and forecast -time dimension. If there is no sdate_dim, you can set it to NULL. -It must have ftime_dim dimension.} - -\item{coords}{A named list with elements of the coordinates corresponding to -the dimensions of the data parameter. The names and length of each element -must correspond to the names of the dimensions. If any coordinate is not -provided, it is set as an index vector with the values from 1 to the length -of the corresponding dimension.} - -\item{varname}{A character string indicating the name of the variable to be -saved.} - -\item{metadata}{A named list where each element is a variable containing the -corresponding information. The information must be contained in a list of -lists for each variable.} - -\item{Datasets}{A vector of character string indicating the names of the -datasets.} - -\item{startdates}{A vector of dates that will be used for the filenames -when saving the data in multiple files. It must be a vector of the same -length as the start date dimension of data. It must be a vector of class -\code{Dates}, \code{'POSIXct'} or character with lenghts between 1 and 10. -If it is NULL, the first Date of each time step will be used as the name of -the files. It is NULL by default.} - -\item{dat_dim}{A character string indicating the name of dataset dimension. -By default, it is set to 'dataset'. It can be NULL if there is no dataset -dimension.} - -\item{sdate_dim}{A character string indicating the name of the start date -dimension. By default, it is set to 'sdate'. It can be NULL if there is no -start date dimension.} - -\item{ftime_dim}{A character string indicating the name of the forecast time -dimension. By default, it is set to 'time'. It can be NULL if there is no -forecast time dimension.} - -\item{var_dim}{A character string indicating the name of variable dimension. -By default, it is set to 'var'. It can be NULL if there is no variable -dimension.} - -\item{memb_dim}{A character string indicating the name of the member dimension. -By default, it is set to 'member'. It can be NULL if there is no member -dimension.} - -\item{drop_dims}{A vector of character strings indicating the dimension names -of length 1 that need to be dropped in order that they don't appear in the -netCDF file. It is NULL by default (optional).} - -\item{single_file}{A logical value indicating if all object is saved in a -unique file (TRUE) or in separated directories (FALSE). When it is FALSE, -the array is separated for Datasets, variable and start date. It is FALSE -by default (optional).} - -\item{extra_string}{A character string to be include as part of the file name, -for instance, to identify member or realization. It would be added to the -file name between underscore characters (optional).} - -\item{global_attrs}{A list with elements containing the global attributes -to be saved in the NetCDF.} -} -\value{ -Multiple or single NetCDF files containing the data array.\cr -\item{\code{single_file = TRUE}}{ - All data is saved in a single file located in the specified destination - path with the following name: - ___.nc. Multiple - variables are saved separately in the same file. The forecast time units - is extracted from the frequency of the time steps (hours, days, months). - The first value of forecast time is 1. If no frequency is found, the units - will be 'hours since' each start date and the time steps are assumed to be - equally spaced. -} -\item{\code{single_file = FALSE}}{ - The data array is subset and stored into multiple files. Each file - contains the data subset for each start date, variable and dataset. Files - with different variables and Datasets are stored in separated directories - within the following directory tree: destination/Dataset/variable/. - The name of each file will be: - __.nc. -} -} -\description{ -This function allows to save a data array with metadata into a -NetCDF file, allowing to reload the saved data using \code{Start} function -from StartR package. If the original 's2dv_cube' object has been created from -\code{CST_Load()}, then it can be reloaded with \code{Load()}. -} -\examples{ -\dontrun{ -data <- lonlat_temp_st$exp$data -lon <- lonlat_temp_st$exp$coords$lon -lat <- lonlat_temp_st$exp$coords$lat -coords <- list(lon = lon, lat = lat) -Datasets <- lonlat_temp_st$exp$attrs$Datasets -varname <- 'tas' -Dates <- lonlat_temp_st$exp$attrs$Dates -destination = './' -metadata <- lonlat_temp_st$exp$attrs$Variable$metadata -SaveExp(data = data, destination = destination, coords = coords, - Datasets = Datasets, varname = varname, Dates = Dates, - metadata = metadata, single_file = TRUE, ftime_dim = 'ftime', - var_dim = 'var', dat_dim = 'dataset') -} - -} -\author{ -Perez-Zanon Nuria, \email{nuria.perez@bsc.es} -} diff --git a/man/SaveExp.Rd b/man/SaveExp.Rd index c690d97e..2ff92489 100644 --- a/man/SaveExp.Rd +++ b/man/SaveExp.Rd @@ -20,7 +20,9 @@ SaveExp( memb_dim = "member", drop_dims = NULL, single_file = FALSE, - extra_string = NULL + extra_string = NULL, + global_attrs = NULL, + units_hours_since = TRUE ) } \arguments{ @@ -30,7 +32,8 @@ SaveExp( NetCDF files.} \item{Dates}{A named array of dates with the corresponding sdate and forecast -time dimension.} +time dimension. If there is no sdate_dim, you can set it to NULL. +It must have ftime_dim dimension.} \item{coords}{A named list with elements of the coordinates corresponding to the dimensions of the data parameter. The names and length of each element @@ -87,6 +90,9 @@ by default (optional).} \item{extra_string}{A character string to be include as part of the file name, for instance, to identify member or realization. It would be added to the file name between underscore characters (optional).} + +\item{global_attrs}{A list with elements containing the global attributes +to be saved in the NetCDF.} } \value{ Multiple or single NetCDF files containing the data array.\cr -- GitLab From bd1408913c2c0d103b853414f3441203718cc673 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 29 Nov 2023 16:19:15 +0100 Subject: [PATCH 04/16] Add dependency easyNCDF --- DESCRIPTION | 3 ++- inst/doc/usecase.md | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4e41770d..362663ff 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -79,7 +79,8 @@ Imports: utils, verification, lubridate, - scales + scales, + easyNCDF Suggests: zeallot, testthat, diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index 5a2dc8ba..1fc17ae4 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -8,4 +8,4 @@ In this document, you can link to the example scripts for different usage of the 3. [Precipitation Downscaling with RainFARM RF 100](inst/doc/usecase/UseCase2_PrecipitationDownscaling_RainFARM_RF100.R) 2. **Examples using 's2dv_cube'** - 1. [Save 's2dv_cube'](inst/doc/usecase/UseCase4_CST_SaveCube.R) \ No newline at end of file + 1. [Save 's2dv_cube'](inst/doc/usecase/UseCase4_SaveExp.R) \ No newline at end of file -- GitLab From 417732158eba3c8968e45bc91b39df59021fef70 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 29 Nov 2023 16:29:24 +0100 Subject: [PATCH 05/16] Delete file --- tests/testthat/test-CST_SaveCube.R | 324 ----------------------------- 1 file changed, 324 deletions(-) delete mode 100644 tests/testthat/test-CST_SaveCube.R diff --git a/tests/testthat/test-CST_SaveCube.R b/tests/testthat/test-CST_SaveCube.R deleted file mode 100644 index 0951c392..00000000 --- a/tests/testthat/test-CST_SaveCube.R +++ /dev/null @@ -1,324 +0,0 @@ -############################################## - -# cube0 -cube0 <- array(1:5, dim = c(sdate = 5, lon = 4, lat = 4, ftime = 1)) -class(cube0) <- 's2dv_cube' - -# cube1 -cube1 <- NULL -cube1$data <- array(1:5, dim = c(sdate = 5, lon = 4, lat = 4, ftime = 1)) -coords2 <- list(sdate = c('20000101', '20010102', '20020103', '20030104', '20040105'), - var = 'tas', - lon = 1.:4., - lat = 1.:4.) -cube1$coords <- coords2 -dates2 <- c('20000101', '20010102', '20020103', '20030104', '20040105') -dates2 <- as.Date(dates2, format = "%Y%m%d", tz = "UTC") -dim(dates2) <- c(sdate = 5, ftime = 1) -cube1$attrs$Dates <- dates2 -class(cube1) <- 's2dv_cube' - -# cube2 -cube2 <- cube1 -cube2$data <- array(1:5, dim = c(sdate = 5, lon = 4, lat = 4, ftime = 1, - test = 2, test2 = 3)) -dim(cube2$data) <- c(sdate = 5, lon = 4, lat = 4, ftime = 1, member = 1, - ensemble = 1, test = 2, test2 = 3) - -# cube3 -cube3 <- cube1 - -# dat0 -dates0 <- as.Date('2022-02-01', format = "%Y-%m-%d") -dim(dates0) <- c(sdate = 1) - -# dat1 -dat1 <- array(1, dim = c(test = 1)) - -# dat2 -dat2 <- array(1:5, dim = c(sdate = 5, lon = 4, lat = 4, ftime = 1)) -coords2 <- list(sdate = c('20000101', '20010102', '20020103', '20030104', '20040105'), - var = 'tas', - lon = 1.:4., - lat = 1.:4.) -dates2 <- c('20000101', '20010102', '20020103', '20030104', '20040105') -dates2 <- as.Date(dates2, format = "%Y%m%d", tz = "UTC") -dim(dates2) <- c(sdate = 5, ftime = 1) - -# dat3 (without sdate dim) -dat3 <- array(1:5, dim = c(lon = 4, lat = 4, ftime = 2)) -coords3 <- list(sdate = c('20000101', '20010102'), - var = 'tas', - lon = 1.:4., - lat = 1.:4.) -dates3 <- c('20000101', '20010102') -dates3 <- as.Date(dates3, format = "%Y%m%d", tz = "UTC") -dim(dates3) <- c(ftime = 2) - -# dat4 (without ftime dim) -dat4 <- array(1:5, dim = c(sdate = 2, lon = 4, lat = 4)) -coords4 <- list(sdate = c('20000101', '20010102'), - var = 'tas', - lon = 1.:4., - lat = 1.:4.) -dates4 <- c('20000101', '20010102') -dates4 <- as.Date(dates4, format = "%Y%m%d", tz = "UTC") -dim(dates4) <- c(sdate = 2) - -# dates5 (Dates with extra dimensions) -dates5 <- c('20000101', '20010102', '20010102', '20010102') -dates5 <- as.Date(dates5, format = "%Y%m%d", tz = "UTC") -dim(dates5) <- c(ftime = 2, test = 1, test2 = 2) - -############################################## - -test_that("1. Input checks: CST_SaveCube", { - # s2dv_cube - expect_error( - CST_SaveCube(data = 1), - paste0("Parameter 'data' must be of the class 's2dv_cube'.") - ) - # structure - expect_error( - CST_SaveCube(data = cube0), - paste0("Parameter 'data' must have at least 'data' and 'attrs' elements ", - "within the 's2dv_cube' structure.") - ) - cube0 <- list(data = cube0, attrs = 1) - class(cube0) <- 's2dv_cube' - expect_error( - CST_SaveCube(data = cube0), - paste0("Level 'attrs' must be a list with at least 'Dates' element.") - ) - cube0$attrs <- NULL - cube0$attrs$Dates <- dates2 - expect_warning( - CST_SaveCube(data = cube0, sdate_dim = c('sdate', 'sweek'), - ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, - var_dim = NULL, single_file = FALSE), - paste0("Element 'coords' not found. No coordinates will be used.") - ) - - # sdate_dim - suppressWarnings( - expect_error( - CST_SaveCube(data = cube1, sdate_dim = 1), - paste0("Parameter 'sdate_dim' must be a character string.") - ) - ) - expect_warning( - CST_SaveCube(data = cube1, sdate_dim = c('sdate', 'sweek'), - ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, - var_dim = NULL, extra_string = 'test'), - paste0("Parameter 'sdate_dim' has length greater than 1 and ", - "only the first element will be used.") - ) - suppressWarnings( - expect_error( - CST_SaveCube(data = cube1, sdate_dim = 'a', ftime_dim = 'ftime'), - paste0("Parameter 'sdate_dim' is not found in 'data' dimension.") - ) - ) - # startdates - expect_warning( - CST_SaveCube(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, - dat_dim = NULL, var_dim = NULL, startdates = 1), - "Parameter 'startdates' is not a character string, it will not be used." - ) - expect_warning( - CST_SaveCube(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, - dat_dim = NULL, var_dim = NULL, startdates = '20100101'), - paste0("Parameter 'startdates' doesn't have the same length ", - "as dimension '", 'sdate',"', it will not be used.") - ) - # metadata - expect_warning( - CST_SaveCube(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, - dat_dim = NULL, var_dim = NULL), - paste0("No metadata found in element Variable from attrs.") - ) - cube1$attrs$Variable$metadata <- 'metadata' - expect_error( - CST_SaveCube(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, - dat_dim = NULL, var_dim = NULL), - paste0("Element metadata from Variable element in attrs must be a list.") - ) - cube1$attrs$Variable$metadata <- list(test = 'var') - expect_warning( - CST_SaveCube(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, - dat_dim = NULL, var_dim = NULL), - paste0("Metadata is not found for any coordinate.") - ) - cube1$attrs$Variable$metadata <- list(var = 'var') - expect_warning( - CST_SaveCube(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, - dat_dim = NULL, var_dim = NULL), - paste0("Metadata is not found for any variable.") - ) - # memb_dim - suppressWarnings( - expect_error( - CST_SaveCube(data = cube1, memb_dim = 1, ftime_dim = 'ftime'), - paste0("Parameter 'memb_dim' must be a character string.") - ) - ) - suppressWarnings( - expect_error( - CST_SaveCube(data = cube1, memb_dim = 'member', ftime_dim = 'ftime'), - paste0("Parameter 'memb_dim' is not found in 'data' dimension. Set it ", - "as NULL if there is no member dimension.") - ) - ) -}) - -############################################## - -test_that("1. Input checks", { - # data - expect_error( - SaveCube(data = NULL), - "Parameter 'data' cannot be NULL." - ) - expect_error( - SaveCube(data = 1:10), - "Parameter 'data' must be an array with named dimensions." - ) - # destination - expect_error( - SaveCube(data = array(1, dim = c(a = 1)), destination = NULL), - paste0("Parameter 'destination' must be a character string of one element ", - "indicating the name of the file (including the folder if needed) ", - "where the data will be saved."), - fixed = TRUE - ) - # Dates - expect_error( - SaveCube(data = array(1, dim = c(a = 1)), Dates = 'a'), - paste0("Parameter 'Dates' must be of 'POSIXct' or 'Dates' class.") - ) - expect_error( - SaveCube(data = array(1, dim = c(a = 1)), - Dates = as.Date('2022-02-01', format = "%Y-%m-%d")), - paste0("Parameter 'Dates' must have dimension names.") - ) - # drop_dims - expect_warning( - SaveCube(data = dat2, coords = coords2, - metadata = list(tas = list(level = '2m')), - Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, - dat_dim = NULL, var_dim = NULL, drop_dims = 1), - paste0("Parameter 'drop_dims' must be character string containing ", - "the data dimension names to be dropped. It will not be used.") - ) - expect_warning( - SaveCube(data = dat2, coords = coords2, - metadata = list(tas = list(level = '2m')), - Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, - dat_dim = NULL, var_dim = NULL, drop_dims = 'time'), - paste0("Parameter 'drop_dims' must be character string containing ", - "the data dimension names to be dropped. It will not be used.") - ) - expect_warning( - SaveCube(data = dat2, coords = coords2, - metadata = list(tas = list(level = '2m')), - Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, - dat_dim = NULL, var_dim = NULL, drop_dims = 'sdate'), - paste0("Parameter 'drop_dims' can only contain dimension names ", - "that are of length 1. It will not be used.") - ) - expect_warning( - SaveCube(data = dat2, coords = coords2, - metadata = list(tas = list(level = '2m')), - Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, - dat_dim = NULL, var_dim = NULL, drop_dims = 'ftime'), - paste0("Parameter 'drop_dims' contains dimensions used in the ", - "computation. It will not be used.") - ) - # varname - suppressWarnings( - expect_error( - SaveCube(data = dat2, coords = coords2, varname = 1, - metadata = list(tas = list(level = '2m')), - Dates = dates2), - "Parameter 'varname' must be a character." - ) - ) - # varname, metadata, spatial coords, unknown dim - expect_error( - SaveCube(data = dat1, varname = 1, ftime_dim = NULL, sdate_dim = NULL, - memb_dim = NULL, dat_dim = NULL, var_dim = NULL), - paste0("Parameter 'varname' must be a character string with the ", - "variable names.") - ) - # ftime_dim - expect_error( - SaveCube(data = dat4, coords = coords4, - metadata = list(tas = list(level = '2m')), - Dates = dates4, ftime_dim = 'ftime', memb_dim = NULL, - dat_dim = NULL, var_dim = NULL), - paste0("Parameter 'ftime_dim' is not found in 'data' dimension.") - ) - # Dates dimension check - expect_error( - SaveCube(data = dat4, coords = coords4, - metadata = list(tas = list(level = '2m')), - Dates = dates4, ftime_dim = NULL, memb_dim = NULL, - dat_dim = NULL, var_dim = NULL), - paste0("Parameter 'Dates' must have 'ftime_dim'.") - ) - expect_warning( - SaveCube(data = dat4, coords = coords4, - metadata = list(tas = list(level = '2m')), - Dates = NULL, ftime_dim = NULL, memb_dim = NULL, - dat_dim = NULL, var_dim = NULL), - paste0("Dates must be provided if 'data' must be saved in separated files. ", - "All data will be saved in a single file.") - ) - # Without ftime and sdate - expect_error( - SaveCube(data = dat3, coords = coords3, - metadata = list(tas = list(level = '2m')), - Dates = dates5, ftime_dim = 'ftime', memb_dim = NULL, - dat_dim = NULL, var_dim = NULL, sdate_dim = NULL), - paste0("Parameter 'Dates' must have only 'sdate_dim' and 'ftime_dim' dimensions.") - ) - expect_warning( - SaveCube(data = dat2, coords = coords2, - metadata = list(tas = list(level = '2m')), - startdates = c(paste(1:11, collapse = '')), - Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, - dat_dim = NULL, var_dim = NULL, sdate_dim = 'sdate'), - paste0("Parameter 'startdates' should be a character string containing ", - "the start dates in the format 'yyyy-mm-dd', 'yyyymmdd', 'yyyymm', ", - "'POSIXct' or 'Dates' class. Files will be named with Dates instead.") - ) - expect_warning( - SaveCube(data = dat2, coords = coords2, - metadata = list(tas = list(level = '2m')), - Dates = NULL, ftime_dim = 'ftime', memb_dim = NULL, - dat_dim = NULL, var_dim = NULL, sdate_dim = 'sdate'), - paste0("Dates must be provided if 'data' must be saved in separated files. ", - "All data will be saved in a single file.") - ) - # (dat3) Without sdate_dim - expect_warning( - SaveCube(data = dat3, coords = coords3, - metadata = list(tas = list(level = '2m')), - Dates = NULL, ftime_dim = 'ftime', memb_dim = NULL, - dat_dim = NULL, var_dim = NULL, sdate_dim = NULL, - extra_string = 'nosdate3.nc', single_file = FALSE), - paste0("Dates must be provided if 'data' must be saved in separated files. ", - "All data will be saved in a single file.") - ) - # (dat4) Without ftime_dim - expect_error( - SaveCube(data = dat4, coords = coords4, - metadata = list(tas = list(level = '2m')), - Dates = dates4, ftime_dim = NULL, memb_dim = NULL, - dat_dim = NULL, var_dim = NULL, sdate_dim = 'sdate', - single_file = TRUE), - paste0("Parameter 'Dates' must have 'ftime_dim'.") - ) -}) - -############################################## -- GitLab From 704cedc8288b61c6c86e960762d27fec08eecb3f Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 30 Nov 2023 10:45:50 +0100 Subject: [PATCH 06/16] Update documentation; correct unit test --- R/CST_SaveExp.R | 258 +++++++++++++----------- inst/doc/usecase/UseCase4_CST_SaveExp.R | 115 ++++++++++- man/CST_SaveExp.Rd | 104 +++++----- man/SaveExp.Rd | 93 +++++---- tests/testthat/test-CST_SaveExp.R | 8 +- 5 files changed, 369 insertions(+), 209 deletions(-) diff --git a/R/CST_SaveExp.R b/R/CST_SaveExp.R index ebe25941..26280fdd 100644 --- a/R/CST_SaveExp.R +++ b/R/CST_SaveExp.R @@ -4,93 +4,104 @@ #' #'@description This function allows to divide and save a object of class #''s2dv_cube' into a NetCDF file, allowing to reload the saved data using -#'\code{Start} function from StartR package. If the original 's2dv_cube' object -#'has been created from \code{CST_Load()}, then it can be reloaded with -#'\code{Load()}. +#'\code{CST_Start} or \code{CST_Load} functions. It also allows to save any +#''s2dv_cube' object that follows the NetCDF attributes conventions. #' #'@param data An object of class \code{s2dv_cube}. #'@param destination A character string containing the directory name in which #' to save the data. NetCDF file for each starting date are saved into the -#' folder tree: \cr -#' destination/Dataset/variable/. By default the function -#' creates and saves the data into the working directory. +#' folder tree: 'destination/Dataset/variable/'. By default the function +#' saves the data into the working directory. #'@param sdate_dim A character string indicating the name of the start date #' dimension. By default, it is set to 'sdate'. It can be NULL if there is no #' start date dimension. #'@param ftime_dim A character string indicating the name of the forecast time -#' dimension. By default, it is set to 'time'. It can be NULL if there is no -#' forecast time dimension. +#' dimension. If 'Dates' are used, it can't be NULL. If there is no forecast +#' time dimension, 'Dates' will be set to NULL and will not be used. By +#' default, it is set to 'time'. #'@param dat_dim A character string indicating the name of dataset dimension. -#' By default, it is set to 'dataset'. It can be NULL if there is no dataset -#' dimension. +#' It can be NULL if there is no dataset dimension. By default, it is set to +#' 'dataset'. #'@param var_dim A character string indicating the name of variable dimension. -#' By default, it is set to 'var'. It can be NULL if there is no variable -#' dimension. +#' It can be NULL if there is no variable dimension. By default, it is set to +#' 'var'. #'@param memb_dim A character string indicating the name of the member dimension. -#' By default, it is set to 'member'. It can be NULL if there is no member -#' dimension. +#' It can be NULL if there is no member dimension. By default, it is set to +#' 'member'. #'@param startdates A vector of dates that will be used for the filenames -#' when saving the data in multiple files. It must be a vector of the same -#' length as the start date dimension of data. It must be a vector of class -#' \code{Dates}, \code{'POSIXct'} or character with lenghts between 1 and 10. -#' If it is NULL, the coordinate corresponding the the start date dimension or -#' the first Date of each time step will be used as the name of the files. -#' It is NULL by default. -#'@param drop_dims A vector of character strings indicating the dimension names -#' of length 1 that need to be dropped in order that they don't appear in the -#' netCDF file. It is NULL by default (optional). +#' when saving the data in multiple files (single_file = FALSE). It must be a +#' vector of the same length as the start date dimension of data. It must be a +#' vector of class \code{Dates}, \code{'POSIXct'} or character with lenghts +#' between 1 and 10. If it is NULL, the coordinate corresponding the the start +#' date dimension or the first Date of each time step will be used as the name +#' of the files. It is NULL by default. #'@param single_file A logical value indicating if all object is saved in a #' single file (TRUE) or in multiple files (FALSE). When it is FALSE, -#' the array is separated for Datasets, variable and start date. It is FALSE -#' by default. -#'@param extra_string A character string to be include as part of the file name, -#' for instance, to identify member or realization. It would be added to the -#' file name between underscore characters. +#' the array is separated for datasets, variable and start date. When there are +#' no specified time dimensions, the data will be saved in a single file by +#' default. The output file name when 'single_file' is TRUE is a character +#' string containing: '__.nc'; when it is FALSE, +#' it is '_.nc'. It is FALSE by default. +#'@param drop_dims (optional) A vector of character strings indicating the +#' dimension names of length 1 that need to be dropped in order that they don't +#' appear in the netCDF file. Only is allowed to drop dimensions that are not +#' used in the computation. The dimensions used in the computation are the ones +#' specified in: sdate_dim, ftime_dim, dat_dim, var_dim and memb_dim. It is +#' NULL by default. +#'@param extra_string (Optional) A character string to be included as part of +#' the file name, for instance, to identify member or realization. When +#' single_file is TRUE, the 'extra_string' will substitute all the default +#' file name; when single_file is FALSE, the 'extra_string' will be added +#' in the file name as: '__.nc'. It is NULL by default. #'@param units_hours_since (Optional) A logical value only used for the case #' Dates have forecast time and start date dimension and single_file is TRUE. #' When it is TRUE, it saves the forecast time with units of 'hours since'; #' if it is FALSE, the time units will be a number of time steps with its #' corresponding frequency (e.g. n days, n months or n hours). It is TRUE #' by default. +#'@param global_attrs (Optional) A list with elements containing the global +#' attributes to be saved in the NetCDF. #' #'@return Multiple or single NetCDF files containing the data array.\cr -#'\item{\code{single_file = TRUE}}{ +#'\item{\code{single_file is TRUE}}{ #' All data is saved in a single file located in the specified destination -#' path with the following name: -#' ___.nc. Multiple -#' variables are saved separately in the same file. The forecast time units -#' is extracted from the frequency of the time steps (hours, days, months). -#' The first value of forecast time is 1. If no frequency is found, the units -#' will be 'hours since' each start date and the time steps are assumed to be -#' equally spaced. +#' path with the following name (by default): +#' '__.nc'. Multiple variables +#' are saved separately in the same file. The forecast time units +#' are calculated from each start date (if sdate_dim is not NULL) or from +#' the time step. If 'units_hours_since' is TRUE, the forecast time units +#' will be 'hours since '. If 'units_hours_since' is FALSE, +#' the forecast time units are extracted from the frequency of the time steps +#' (hours, days, months); if no frequency is found, the units will be ’hours +#' since’. When the time units are 'hours since' the time ateps are assumed to +#' be equally spaced. #'} -#'\item{\code{single_file = FALSE}}{ +#'\item{\code{single_file is FALSE}}{ #' The data array is subset and stored into multiple files. Each file #' contains the data subset for each start date, variable and dataset. Files -#' with different variables and Datasets are stored in separated directories -#' within the following directory tree: destination/Dataset/variable/. -#' The name of each file will be: -#' __.nc. +#' with different variables and datasets are stored in separated directories +#' within the following directory tree: 'destination/Dataset/variable/'. +#' The name of each file will be by default: '_.nc'. +#' The forecast time units are calculated from each start date (if sdate_dim +#' is not NULL) or from the time step. The forecast time units will be 'hours +#' since '. #'} #' #'@seealso \code{\link[startR]{Start}}, \code{\link{as.s2dv_cube}} and #'\code{\link{s2dv_cube}} #' #'@examples -#'\dontrun{ #'data <- lonlat_temp_st$exp -#'destination <- "./" -#'CST_SaveExp(data = data, destination = destination, ftime_dim = 'ftime', -#' var_dim = 'var', dat_dim = 'dataset') -#'} +#'CST_SaveExp(data = data, ftime_dim = 'ftime', var_dim = 'var', +#' dat_dim = 'dataset', sdate_dim = 'sdate') #' #'@export CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', - ftime_dim = 'time', dat_dim = 'dataset', - var_dim = 'var', memb_dim = 'member', - startdates = NULL, drop_dims = NULL, - single_file = FALSE, extra_string = NULL, - global_attrs = NULL, units_hours_since = TRUE) { + ftime_dim = 'time', dat_dim = 'dataset', + var_dim = 'var', memb_dim = 'member', + startdates = NULL, drop_dims = NULL, + single_file = FALSE, extra_string = NULL, + global_attrs = NULL, units_hours_since = TRUE) { # Check 's2dv_cube' if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of the class 's2dv_cube'.") @@ -107,9 +118,7 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', warning("Element 'coords' not found. No coordinates will be used.") } # metadata - if (is.null(data$attrs$Variable$metadata)) { - warning("No metadata found in element Variable from attrs.") - } else { + if (!is.null(data$attrs$Variable$metadata)) { if (!inherits(data$attrs$Variable$metadata, 'list')) { stop("Element metadata from Variable element in attrs must be a list.") } @@ -144,6 +153,10 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', dim(data$attrs$Dates) <- c(sdate = 1, dim(data$attrs$Dates)) data$coords[[sdate_dim]] <- data$attrs$Dates[1] } + # ftime_dim + if (is.null(ftime_dim)) { + data$attrs$Dates <- NULL + } # startdates if (is.null(startdates)) { if (is.character(data$coords[[sdate_dim]])) { @@ -162,21 +175,21 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', } SaveExp(data = data$data, - destination = destination, - Dates = data$attrs$Dates, - coords = data$coords, - varname = data$attrs$Variable$varName, - metadata = data$attrs$Variable$metadata, - Datasets = data$attrs$Datasets, - startdates = startdates, - dat_dim = dat_dim, sdate_dim = sdate_dim, - ftime_dim = ftime_dim, var_dim = var_dim, - memb_dim = memb_dim, - drop_dims = drop_dims, - extra_string = extra_string, - single_file = single_file, - global_attrs = global_attrs, - units_hours_since = units_hours_since) + destination = destination, + Dates = data$attrs$Dates, + coords = data$coords, + varname = data$attrs$Variable$varName, + metadata = data$attrs$Variable$metadata, + Datasets = data$attrs$Datasets, + startdates = startdates, + dat_dim = dat_dim, sdate_dim = sdate_dim, + ftime_dim = ftime_dim, var_dim = var_dim, + memb_dim = memb_dim, + drop_dims = drop_dims, + extra_string = extra_string, + single_file = single_file, + global_attrs = global_attrs, + units_hours_since = units_hours_since) } #'Save a multidimensional array with metadata to data in NetCDF format #'@description This function allows to save a data array with metadata into a @@ -204,12 +217,6 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', #' lists for each variable. #'@param Datasets A vector of character string indicating the names of the #' datasets. -#'@param startdates A vector of dates that will be used for the filenames -#' when saving the data in multiple files. It must be a vector of the same -#' length as the start date dimension of data. It must be a vector of class -#' \code{Dates}, \code{'POSIXct'} or character with lenghts between 1 and 10. -#' If it is NULL, the first Date of each time step will be used as the name of -#' the files. It is NULL by default. #'@param sdate_dim A character string indicating the name of the start date #' dimension. By default, it is set to 'sdate'. It can be NULL if there is no #' start date dimension. @@ -225,41 +232,66 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', #'@param memb_dim A character string indicating the name of the member dimension. #' By default, it is set to 'member'. It can be NULL if there is no member #' dimension. -#'@param drop_dims A vector of character strings indicating the dimension names -#' of length 1 that need to be dropped in order that they don't appear in the -#' netCDF file. It is NULL by default (optional). +#'@param startdates A vector of dates that will be used for the filenames +#' when saving the data in multiple files (single_file = FALSE). It must be a +#' vector of the same length as the start date dimension of data. It must be a +#' vector of class \code{Dates}, \code{'POSIXct'} or character with lenghts +#' between 1 and 10. If it is NULL, the coordinate corresponding the the start +#' date dimension or the first Date of each time step will be used as the name +#' of the files. It is NULL by default. #'@param single_file A logical value indicating if all object is saved in a -#' unique file (TRUE) or in separated directories (FALSE). When it is FALSE, -#' the array is separated for Datasets, variable and start date. It is FALSE -#' by default (optional). -#'@param extra_string A character string to be include as part of the file name, -#' for instance, to identify member or realization. It would be added to the -#' file name between underscore characters (optional). -#'@param global_attrs A list with elements containing the global attributes -#' to be saved in the NetCDF. +#' single file (TRUE) or in multiple files (FALSE). When it is FALSE, +#' the array is separated for datasets, variable and start date. When there are +#' no specified time dimensions, the data will be saved in a single file by +#' default. The output file name when 'single_file' is TRUE is a character +#' string containing: __.nc; when it is FALSE, +#' it is _.nc. It is FALSE by default. +#'@param drop_dims (optional) A vector of character strings indicating the +#' dimension names of length 1 that need to be dropped in order that they don't +#' appear in the netCDF file. Only is allowed to drop dimensions that are not +#' used in the computation. The dimensions used in the computation are the ones +#' specified in: sdate_dim, ftime_dim, dat_dim, var_dim and memb_dim. It is +#' NULL by default. +#'@param extra_string (Optional) A character string to be included as part of +#' the file name, for instance, to identify member or realization. When +#' single_file is TRUE, the 'extra_string' will substitute all the default +#' file name; when single_file is FALSE, the 'extra_string' will be added +#' in the file name as: __.nc. It is NULL by default. +#'@param units_hours_since (Optional) A logical value only used for the case +#' Dates have forecast time and start date dimension and single_file is TRUE. +#' When it is TRUE, it saves the forecast time with units of 'hours since'; +#' if it is FALSE, the time units will be a number of time steps with its +#' corresponding frequency (e.g. n days, n months or n hours). It is TRUE +#' by default. +#'@param global_attrs (Optional) A list with elements containing the global +#' attributes to be saved in the NetCDF. #' #'@return Multiple or single NetCDF files containing the data array.\cr -#'\item{\code{single_file = TRUE}}{ +#'\item{\code{single_file is TRUE}}{ #' All data is saved in a single file located in the specified destination -#' path with the following name: -#' ___.nc. Multiple -#' variables are saved separately in the same file. The forecast time units -#' is extracted from the frequency of the time steps (hours, days, months). -#' The first value of forecast time is 1. If no frequency is found, the units -#' will be 'hours since' each start date and the time steps are assumed to be -#' equally spaced. +#' path with the following name (by default): +#' '__.nc'. Multiple variables +#' are saved separately in the same file. The forecast time units +#' are calculated from each start date (if sdate_dim is not NULL) or from +#' the time step. If 'units_hours_since' is TRUE, the forecast time units +#' will be 'hours since '. If 'units_hours_since' is FALSE, +#' the forecast time units are extracted from the frequency of the time steps +#' (hours, days, months); if no frequency is found, the units will be ’hours +#' since’. When the time units are 'hours since' the time ateps are assumed to +#' be equally spaced. #'} -#'\item{\code{single_file = FALSE}}{ +#'\item{\code{single_file is FALSE}}{ #' The data array is subset and stored into multiple files. Each file #' contains the data subset for each start date, variable and dataset. Files -#' with different variables and Datasets are stored in separated directories -#' within the following directory tree: destination/Dataset/variable/. -#' The name of each file will be: -#' __.nc. +#' with different variables and datasets are stored in separated directories +#' within the following directory tree: 'destination/Dataset/variable/'. +#' The name of each file will be by default: '_.nc'. +#' The forecast time units are calculated from each start date (if sdate_dim +#' is not NULL) or from the time step. The forecast time units will be 'hours +#' since '. #'} #' #'@examples -#'\dontrun{ #'data <- lonlat_temp_st$exp$data #'lon <- lonlat_temp_st$exp$coords$lon #'lat <- lonlat_temp_st$exp$coords$lat @@ -267,13 +299,10 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', #'Datasets <- lonlat_temp_st$exp$attrs$Datasets #'varname <- 'tas' #'Dates <- lonlat_temp_st$exp$attrs$Dates -#'destination = './' #'metadata <- lonlat_temp_st$exp$attrs$Variable$metadata -#'SaveExp(data = data, destination = destination, coords = coords, -#' Datasets = Datasets, varname = varname, Dates = Dates, -#' metadata = metadata, single_file = TRUE, ftime_dim = 'ftime', -#' var_dim = 'var', dat_dim = 'dataset') -#'} +#'SaveExp(data = data, coords = coords, Datasets = Datasets, varname = varname, +#' Dates = Dates, metadata = metadata, single_file = TRUE, +#' ftime_dim = 'ftime', var_dim = 'var', dat_dim = 'dataset') #' #'@import easyNCDF #'@importFrom s2dv Reorder @@ -281,11 +310,11 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', #'@importFrom ClimProjDiags Subset #'@export SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, - varname = NULL, metadata = NULL, Datasets = NULL, - startdates = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', - ftime_dim = 'time', var_dim = 'var', memb_dim = 'member', - drop_dims = NULL, single_file = FALSE, extra_string = NULL, - global_attrs = NULL, units_hours_since = TRUE) { + varname = NULL, metadata = NULL, Datasets = NULL, + startdates = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', + ftime_dim = 'time', var_dim = 'var', memb_dim = 'member', + drop_dims = NULL, single_file = FALSE, extra_string = NULL, + global_attrs = NULL, units_hours_since = TRUE) { ## Initial checks # data if (is.null(data)) { @@ -569,10 +598,11 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, names(attributes(coords[[i_coord]])$variables) <- i_coord } else if (!is.null(attributes(metadata[[i_coord]]))) { # from Load - attr(coords[[i_coord]], 'variables') <- list(attributes(metadata[[i_coord]])) + attrs <- attributes(metadata[[i_coord]]) + # We remove because some attributes can't be saved + attrs <- NULL + attr(coords[[i_coord]], 'variables') <- list(attrs) names(attributes(coords[[i_coord]])$variables) <- i_coord - } else { - stop("Metadata is not correct.") } } } diff --git a/inst/doc/usecase/UseCase4_CST_SaveExp.R b/inst/doc/usecase/UseCase4_CST_SaveExp.R index 5e9ded12..a6cbbe86 100644 --- a/inst/doc/usecase/UseCase4_CST_SaveExp.R +++ b/inst/doc/usecase/UseCase4_CST_SaveExp.R @@ -139,18 +139,70 @@ out <- Start(dat = path, sdate = NULL), retrieve = TRUE) -attributes(out)$Variables$common$sdate -# [1] "1 months" "2 months" "3 months" +attributes(out)$Variables$common$ftime +# [1] "1 days" "2 days" "3 days" "4 days" "5 days" "6 days" "7 days" +# [8] "8 days" "9 days" "10 days" "11 days" "12 days" "13 days" "14 days" +# [15] "15 days" "16 days" "17 days" "18 days" "19 days" "20 days" "21 days" +# [22] "22 days" "23 days" "24 days" "25 days" "26 days" "27 days" "28 days" +# [29] "29 days" "30 days" "31 days" + +# (5) Test observations +data <- lonlat_temp$obs +CST_SaveExp(data = data, ftime_dim = 'ftime', memb_dim = NULL, + var_dim = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', + single_file = TRUE, units_hours_since = FALSE) +# Now we read the output with Start: +sdate <- c('20001101', '20051101') +path <- paste0(getwd(),"/$var$_", sdate[1], "_", sdate[length(sdate)], ".nc") +out <- Start(dat = path, + var = 'tas', # tas + lon = 'all', + lat = 'all', + ftime = 'all', + member = 1, + sdate = 'all', + return_vars = list( + lon = 'dat', + lat = 'dat', + ftime = NULL, + sdate = NULL), + retrieve = TRUE) +dim(out) +attributes(out)$Variables$common$ftime # (b) Data loaded with Load data <- lonlat_temp$exp data <- lonlat_temp$obs -dtaa <- lonlat_prec +data <- lonlat_prec CST_SaveExp(data = data, ftime_dim = 'ftime', var_dim = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', single_file = TRUE, units_hours_since = FALSE) -# Error +# Now we read the output with Start: +sdate <- as.vector(data$coords$sdate) +path <- paste0(getwd(),"/$var$_", sdate[1], "_", sdate[length(sdate)], ".nc") +out <- Start(dat = path, + var = 'prlr', # tas + lon = 'all', + lat = 'all', + ftime = 'all', + sdate = 'all', + member = 'all', + return_vars = list( + lon = 'dat', + lat = 'dat', + ftime = NULL, + sdate = NULL), + retrieve = TRUE) +dim(out) +lonlat_prec$dims + +# Test with ftime_dim NULL +data <- lonlat_temp$exp +data <- CST_Subset(data, along = 'ftime', indices = 1, drop = 'selected') +CST_SaveExp(data = data, ftime_dim = NULL, + var_dim = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', + single_file = FALSE, units_hours_since = FALSE) ################################################################################ # Test 3: Special cases @@ -207,9 +259,62 @@ summary(data3) dim(data3) dim(data3out) -# (1) two variables and two datasets in the same file +# (2) two variables and two datasets in the same file CST_SaveExp(data = cube3, ftime_dim = 'time', var_dim = 'var', memb_dim = 'ensemble', dat_dim = 'dat', single_file = TRUE) + +# (3) Observations +repos_exp <- paste0('/esarchive/exp/ecearth/a1tr/cmorfiles/CMIP/EC-Earth-Consortium/', + 'EC-Earth3/historical/r24i1p1f1/Amon/$var$/gr/v20190312/', + '$var$_Amon_EC-Earth3_historical_r24i1p1f1_gr_$sdate$01-$sdate$12.nc') + +exp <- Start(dat = repos_exp, + var = 'tas', + sdate = as.character(c(2005:2008)), + time = indices(1:3), + lat = 1:10, + lat_reorder = Sort(), + lon = 1:10, + lon_reorder = CircularSort(0, 360), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(lon = NULL, + lat = NULL, + time = 'sdate'), + retrieve = FALSE) +lats <- attr(exp, 'Variables')$common$lat +lons <- attr(exp, 'Variables')$common$lon +## The 'time' attribute is a two-dim array +dates <- attr(exp, 'Variables')$common$time +dim(dates) +repos_obs <- '/esarchive/recon/ecmwf/erainterim/monthly_mean/$var$_f6h/$var$_$date$.nc' + +obs <- Start(dat = repos_obs, + var = 'tas', + date = unique(format(dates, '%Y%m')), + time = values(dates), #dim: [sdate = 4, time = 3] + lat = 1:10, + lat_reorder = Sort(), + lon = 1:10, + lon_reorder = CircularSort(0, 360), + time_across = 'date', + merge_across_dims = TRUE, + split_multiselected_dims = TRUE, + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(lon = NULL, + lat = NULL, + time = 'date'), + retrieve = TRUE) +dim(obs) +attributes(obs)$Variables$common$time +obscube <- as.s2dv_cube(obs) +CST_SaveExp(data = obscube, ftime_dim = 'time', var_dim = 'var', + memb_dim = 'ensemble', dat_dim = 'dat', + single_file = TRUE) + + + ################################################################################ \ No newline at end of file diff --git a/man/CST_SaveExp.Rd b/man/CST_SaveExp.Rd index 8659b215..6b5006fa 100644 --- a/man/CST_SaveExp.Rd +++ b/man/CST_SaveExp.Rd @@ -25,50 +25,61 @@ CST_SaveExp( \item{destination}{A character string containing the directory name in which to save the data. NetCDF file for each starting date are saved into the -folder tree: \cr -destination/Dataset/variable/. By default the function -creates and saves the data into the working directory.} +folder tree: 'destination/Dataset/variable/'. By default the function +saves the data into the working directory.} \item{sdate_dim}{A character string indicating the name of the start date dimension. By default, it is set to 'sdate'. It can be NULL if there is no start date dimension.} \item{ftime_dim}{A character string indicating the name of the forecast time -dimension. By default, it is set to 'time'. It can be NULL if there is no -forecast time dimension.} +dimension. If 'Dates' are used, it can't be NULL. If there is no forecast +time dimension, 'Dates' will be set to NULL and will not be used. By +default, it is set to 'time'.} \item{dat_dim}{A character string indicating the name of dataset dimension. -By default, it is set to 'dataset'. It can be NULL if there is no dataset -dimension.} +It can be NULL if there is no dataset dimension. By default, it is set to +'dataset'.} \item{var_dim}{A character string indicating the name of variable dimension. -By default, it is set to 'var'. It can be NULL if there is no variable -dimension.} +It can be NULL if there is no variable dimension. By default, it is set to +'var'.} \item{memb_dim}{A character string indicating the name of the member dimension. -By default, it is set to 'member'. It can be NULL if there is no member -dimension.} +It can be NULL if there is no member dimension. By default, it is set to +'member'.} \item{startdates}{A vector of dates that will be used for the filenames -when saving the data in multiple files. It must be a vector of the same -length as the start date dimension of data. It must be a vector of class -\code{Dates}, \code{'POSIXct'} or character with lenghts between 1 and 10. -If it is NULL, the coordinate corresponding the the start date dimension or -the first Date of each time step will be used as the name of the files. -It is NULL by default.} +when saving the data in multiple files (single_file = FALSE). It must be a +vector of the same length as the start date dimension of data. It must be a +vector of class \code{Dates}, \code{'POSIXct'} or character with lenghts +between 1 and 10. If it is NULL, the coordinate corresponding the the start +date dimension or the first Date of each time step will be used as the name +of the files. It is NULL by default.} -\item{drop_dims}{A vector of character strings indicating the dimension names -of length 1 that need to be dropped in order that they don't appear in the -netCDF file. It is NULL by default (optional).} +\item{drop_dims}{(optional) A vector of character strings indicating the +dimension names of length 1 that need to be dropped in order that they don't +appear in the netCDF file. Only is allowed to drop dimensions that are not +used in the computation. The dimensions used in the computation are the ones +specified in: sdate_dim, ftime_dim, dat_dim, var_dim and memb_dim. It is +NULL by default.} \item{single_file}{A logical value indicating if all object is saved in a single file (TRUE) or in multiple files (FALSE). When it is FALSE, -the array is separated for Datasets, variable and start date. It is FALSE -by default.} +the array is separated for datasets, variable and start date. When there are +no specified time dimensions, the data will be saved in a single file by +default. The output file name when 'single_file' is TRUE is a character +string containing: '__.nc'; when it is FALSE, +it is '_.nc'. It is FALSE by default.} + +\item{extra_string}{(Optional) A character string to be included as part of +the file name, for instance, to identify member or realization. When +single_file is TRUE, the 'extra_string' will substitute all the default +file name; when single_file is FALSE, the 'extra_string' will be added +in the file name as: '__.nc'. It is NULL by default.} -\item{extra_string}{A character string to be include as part of the file name, -for instance, to identify member or realization. It would be added to the -file name between underscore characters.} +\item{global_attrs}{(Optional) A list with elements containing the global +attributes to be saved in the NetCDF.} \item{units_hours_since}{(Optional) A logical value only used for the case Dates have forecast time and start date dimension and single_file is TRUE. @@ -79,39 +90,40 @@ by default.} } \value{ Multiple or single NetCDF files containing the data array.\cr -\item{\code{single_file = TRUE}}{ +\item{\code{single_file is TRUE}}{ All data is saved in a single file located in the specified destination - path with the following name: - ___.nc. Multiple - variables are saved separately in the same file. The forecast time units - is extracted from the frequency of the time steps (hours, days, months). - The first value of forecast time is 1. If no frequency is found, the units - will be 'hours since' each start date and the time steps are assumed to be - equally spaced. + path with the following name (by default): + '__.nc'. Multiple variables + are saved separately in the same file. The forecast time units + are calculated from each start date (if sdate_dim is not NULL) or from + the time step. If 'units_hours_since' is TRUE, the forecast time units + will be 'hours since '. If 'units_hours_since' is FALSE, + the forecast time units are extracted from the frequency of the time steps + (hours, days, months); if no frequency is found, the units will be ’hours + since’. When the time units are 'hours since' the time ateps are assumed to + be equally spaced. } -\item{\code{single_file = FALSE}}{ +\item{\code{single_file is FALSE}}{ The data array is subset and stored into multiple files. Each file contains the data subset for each start date, variable and dataset. Files - with different variables and Datasets are stored in separated directories - within the following directory tree: destination/Dataset/variable/. - The name of each file will be: - __.nc. + with different variables and datasets are stored in separated directories + within the following directory tree: 'destination/Dataset/variable/'. + The name of each file will be by default: '_.nc'. + The forecast time units are calculated from each start date (if sdate_dim + is not NULL) or from the time step. The forecast time units will be 'hours + since '. } } \description{ This function allows to divide and save a object of class 's2dv_cube' into a NetCDF file, allowing to reload the saved data using -\code{Start} function from StartR package. If the original 's2dv_cube' object -has been created from \code{CST_Load()}, then it can be reloaded with -\code{Load()}. +\code{CST_Start} or \code{CST_Load} functions. It also allows to save any +'s2dv_cube' object that follows the NetCDF attributes conventions. } \examples{ -\dontrun{ data <- lonlat_temp_st$exp -destination <- "./" -CST_SaveExp(data = data, destination = destination, ftime_dim = 'ftime', - var_dim = 'var', dat_dim = 'dataset') -} +CST_SaveExp(data = data, ftime_dim = 'ftime', var_dim = 'var', + dat_dim = 'dataset', sdate_dim = 'sdate') } \seealso{ diff --git a/man/SaveExp.Rd b/man/SaveExp.Rd index 2ff92489..7acd17d1 100644 --- a/man/SaveExp.Rd +++ b/man/SaveExp.Rd @@ -52,11 +52,12 @@ lists for each variable.} datasets.} \item{startdates}{A vector of dates that will be used for the filenames -when saving the data in multiple files. It must be a vector of the same -length as the start date dimension of data. It must be a vector of class -\code{Dates}, \code{'POSIXct'} or character with lenghts between 1 and 10. -If it is NULL, the first Date of each time step will be used as the name of -the files. It is NULL by default.} +when saving the data in multiple files (single_file = FALSE). It must be a +vector of the same length as the start date dimension of data. It must be a +vector of class \code{Dates}, \code{'POSIXct'} or character with lenghts +between 1 and 10. If it is NULL, the coordinate corresponding the the start +date dimension or the first Date of each time step will be used as the name +of the files. It is NULL by default.} \item{dat_dim}{A character string indicating the name of dataset dimension. By default, it is set to 'dataset'. It can be NULL if there is no dataset @@ -78,41 +79,61 @@ dimension.} By default, it is set to 'member'. It can be NULL if there is no member dimension.} -\item{drop_dims}{A vector of character strings indicating the dimension names -of length 1 that need to be dropped in order that they don't appear in the -netCDF file. It is NULL by default (optional).} +\item{drop_dims}{(optional) A vector of character strings indicating the +dimension names of length 1 that need to be dropped in order that they don't +appear in the netCDF file. Only is allowed to drop dimensions that are not +used in the computation. The dimensions used in the computation are the ones +specified in: sdate_dim, ftime_dim, dat_dim, var_dim and memb_dim. It is +NULL by default.} \item{single_file}{A logical value indicating if all object is saved in a -unique file (TRUE) or in separated directories (FALSE). When it is FALSE, -the array is separated for Datasets, variable and start date. It is FALSE -by default (optional).} - -\item{extra_string}{A character string to be include as part of the file name, -for instance, to identify member or realization. It would be added to the -file name between underscore characters (optional).} - -\item{global_attrs}{A list with elements containing the global attributes -to be saved in the NetCDF.} +single file (TRUE) or in multiple files (FALSE). When it is FALSE, +the array is separated for datasets, variable and start date. When there are +no specified time dimensions, the data will be saved in a single file by +default. The output file name when 'single_file' is TRUE is a character +string containing: __.nc; when it is FALSE, +it is _.nc. It is FALSE by default.} + +\item{extra_string}{(Optional) A character string to be included as part of +the file name, for instance, to identify member or realization. When +single_file is TRUE, the 'extra_string' will substitute all the default +file name; when single_file is FALSE, the 'extra_string' will be added +in the file name as: __.nc. It is NULL by default.} + +\item{global_attrs}{(Optional) A list with elements containing the global +attributes to be saved in the NetCDF.} + +\item{units_hours_since}{(Optional) A logical value only used for the case +Dates have forecast time and start date dimension and single_file is TRUE. +When it is TRUE, it saves the forecast time with units of 'hours since'; +if it is FALSE, the time units will be a number of time steps with its +corresponding frequency (e.g. n days, n months or n hours). It is TRUE +by default.} } \value{ Multiple or single NetCDF files containing the data array.\cr -\item{\code{single_file = TRUE}}{ +\item{\code{single_file is TRUE}}{ All data is saved in a single file located in the specified destination - path with the following name: - ___.nc. Multiple - variables are saved separately in the same file. The forecast time units - is extracted from the frequency of the time steps (hours, days, months). - The first value of forecast time is 1. If no frequency is found, the units - will be 'hours since' each start date and the time steps are assumed to be - equally spaced. + path with the following name (by default): + '__.nc'. Multiple variables + are saved separately in the same file. The forecast time units + are calculated from each start date (if sdate_dim is not NULL) or from + the time step. If 'units_hours_since' is TRUE, the forecast time units + will be 'hours since '. If 'units_hours_since' is FALSE, + the forecast time units are extracted from the frequency of the time steps + (hours, days, months); if no frequency is found, the units will be ’hours + since’. When the time units are 'hours since' the time ateps are assumed to + be equally spaced. } -\item{\code{single_file = FALSE}}{ +\item{\code{single_file is FALSE}}{ The data array is subset and stored into multiple files. Each file contains the data subset for each start date, variable and dataset. Files - with different variables and Datasets are stored in separated directories - within the following directory tree: destination/Dataset/variable/. - The name of each file will be: - __.nc. + with different variables and datasets are stored in separated directories + within the following directory tree: 'destination/Dataset/variable/'. + The name of each file will be by default: '_.nc'. + The forecast time units are calculated from each start date (if sdate_dim + is not NULL) or from the time step. The forecast time units will be 'hours + since '. } } \description{ @@ -122,7 +143,6 @@ from StartR package. If the original 's2dv_cube' object has been created from \code{CST_Load()}, then it can be reloaded with \code{Load()}. } \examples{ -\dontrun{ data <- lonlat_temp_st$exp$data lon <- lonlat_temp_st$exp$coords$lon lat <- lonlat_temp_st$exp$coords$lat @@ -130,13 +150,10 @@ coords <- list(lon = lon, lat = lat) Datasets <- lonlat_temp_st$exp$attrs$Datasets varname <- 'tas' Dates <- lonlat_temp_st$exp$attrs$Dates -destination = './' metadata <- lonlat_temp_st$exp$attrs$Variable$metadata -SaveExp(data = data, destination = destination, coords = coords, - Datasets = Datasets, varname = varname, Dates = Dates, - metadata = metadata, single_file = TRUE, ftime_dim = 'ftime', - var_dim = 'var', dat_dim = 'dataset') -} +SaveExp(data = data, coords = coords, Datasets = Datasets, varname = varname, + Dates = Dates, metadata = metadata, single_file = TRUE, + ftime_dim = 'ftime', var_dim = 'var', dat_dim = 'dataset') } \author{ diff --git a/tests/testthat/test-CST_SaveExp.R b/tests/testthat/test-CST_SaveExp.R index 17226161..65c0cb22 100644 --- a/tests/testthat/test-CST_SaveExp.R +++ b/tests/testthat/test-CST_SaveExp.R @@ -123,7 +123,8 @@ test_that("1. Input checks: CST_SaveExp", { expect_warning( CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, var_dim = NULL, startdates = 1), - "Parameter 'startdates' is not a character string, it will not be used." + paste0("Parameter 'startdates' doesn't have the same length ", + "as dimension 'sdate', it will not be used.") ) expect_warning( CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, @@ -132,11 +133,6 @@ test_that("1. Input checks: CST_SaveExp", { "as dimension '", 'sdate',"', it will not be used.") ) # metadata - expect_warning( - CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, - dat_dim = NULL, var_dim = NULL), - paste0("No metadata found in element Variable from attrs.") - ) cube1$attrs$Variable$metadata <- 'metadata' expect_error( CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, -- GitLab From e7b04c705456a32ff78fad3b13bf60b25fcde407 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 1 Dec 2023 15:27:11 +0100 Subject: [PATCH 07/16] Improve CST_SaveExp and correct unit test --- R/CST_SaveExp.R | 386 +++++++++++++++++++----------- man/CST_SaveExp.Rd | 43 ++-- man/SaveExp.Rd | 71 +++--- tests/testthat/test-CST_SaveExp.R | 66 +---- 4 files changed, 317 insertions(+), 249 deletions(-) diff --git a/R/CST_SaveExp.R b/R/CST_SaveExp.R index 26280fdd..c5742031 100644 --- a/R/CST_SaveExp.R +++ b/R/CST_SaveExp.R @@ -25,9 +25,9 @@ #'@param var_dim A character string indicating the name of variable dimension. #' It can be NULL if there is no variable dimension. By default, it is set to #' 'var'. -#'@param memb_dim A character string indicating the name of the member dimension. -#' It can be NULL if there is no member dimension. By default, it is set to -#' 'member'. +#'@param memb_dim A character string indicating the name of the member +#' dimension. It can be NULL if there is no member dimension. By default, it is +#' set to 'member'. #'@param startdates A vector of dates that will be used for the filenames #' when saving the data in multiple files (single_file = FALSE). It must be a #' vector of the same length as the start date dimension of data. It must be a @@ -52,13 +52,14 @@ #' the file name, for instance, to identify member or realization. When #' single_file is TRUE, the 'extra_string' will substitute all the default #' file name; when single_file is FALSE, the 'extra_string' will be added -#' in the file name as: '__.nc'. It is NULL by default. -#'@param units_hours_since (Optional) A logical value only used for the case -#' Dates have forecast time and start date dimension and single_file is TRUE. -#' When it is TRUE, it saves the forecast time with units of 'hours since'; -#' if it is FALSE, the time units will be a number of time steps with its -#' corresponding frequency (e.g. n days, n months or n hours). It is TRUE -#' by default. +#' in the file name as: '__.nc'. It is NULL by +#' default. +#'@param units_hours_since (Optional) A logical value only available for the +#' case: Dates have forecast time and start date dimension, single_file is +#' TRUE and 'time_bounds' are not used. When it is TRUE, it saves the forecast +#' time with units of 'hours since'; if it is FALSE, the time units will be a +#' number of time steps with its corresponding frequency (e.g. n days, n months +#' or n hours). It is TRUE by default. #'@param global_attrs (Optional) A list with elements containing the global #' attributes to be saved in the NetCDF. #' @@ -96,10 +97,10 @@ #' dat_dim = 'dataset', sdate_dim = 'sdate') #' #'@export -CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', - ftime_dim = 'time', dat_dim = 'dataset', - var_dim = 'var', memb_dim = 'member', - startdates = NULL, drop_dims = NULL, +CST_SaveExp <- function(data, destination = "./", startdates = NULL, + sdate_dim = 'sdate', ftime_dim = 'time', + memb_dim = 'member', dat_dim = 'dataset', + var_dim = 'var', drop_dims = NULL, single_file = FALSE, extra_string = NULL, global_attrs = NULL, units_hours_since = TRUE) { # Check 's2dv_cube' @@ -114,20 +115,11 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', if (!inherits(data$attrs, 'list')) { stop("Level 'attrs' must be a list with at least 'Dates' element.") } - if (!all(c('coords') %in% names(data))) { - warning("Element 'coords' not found. No coordinates will be used.") - } # metadata if (!is.null(data$attrs$Variable$metadata)) { if (!inherits(data$attrs$Variable$metadata, 'list')) { stop("Element metadata from Variable element in attrs must be a list.") } - if (!any(names(data$attrs$Variable$metadata) %in% names(data$coords))) { - warning("Metadata is not found for any coordinate.") - } else if (!any(names(data$attrs$Variable$metadata) %in% - data$attrs$Variable$varName)) { - warning("Metadata is not found for any variable.") - } } # Dates if (is.null(data$attrs$Dates)) { @@ -141,21 +133,6 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', if (!is.character(sdate_dim)) { stop("Parameter 'sdate_dim' must be a character string.") } - if (length(sdate_dim) > 1) { - warning("Parameter 'sdate_dim' has length greater than 1 and ", - "only the first element will be used.") - sdate_dim <- sdate_dim[1] - } - } else if (length(dim(data$attrs$Dates)) == 1) { - sdate_dim <- 'sdate' - dim(data$data) <- c(sdate = 1, dim(data$data)) - data$dims <- dim(data$data) - dim(data$attrs$Dates) <- c(sdate = 1, dim(data$attrs$Dates)) - data$coords[[sdate_dim]] <- data$attrs$Dates[1] - } - # ftime_dim - if (is.null(ftime_dim)) { - data$attrs$Dates <- NULL } # startdates if (is.null(startdates)) { @@ -163,31 +140,22 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', startdates <- data$coords[[sdate_dim]] } } - - if (!is.null(startdates)) { - if (!is.null(sdate_dim)) { - if (dim(data$data)[sdate_dim] != length(startdates)) { - warning(paste0("Parameter 'startdates' doesn't have the same length ", - "as dimension '", sdate_dim,"', it will not be used.")) - startdates <- data$coords[[sdate_dim]] - } - } - } SaveExp(data = data$data, destination = destination, - Dates = data$attrs$Dates, coords = data$coords, + Dates = data$attrs$Dates, + time_bounds = data$attrs$time_bounds, + startdates = startdates, varname = data$attrs$Variable$varName, metadata = data$attrs$Variable$metadata, Datasets = data$attrs$Datasets, - startdates = startdates, - dat_dim = dat_dim, sdate_dim = sdate_dim, - ftime_dim = ftime_dim, var_dim = var_dim, + sdate_dim = sdate_dim, ftime_dim = ftime_dim, memb_dim = memb_dim, + dat_dim = dat_dim, var_dim = var_dim, drop_dims = drop_dims, - extra_string = extra_string, single_file = single_file, + extra_string = extra_string, global_attrs = global_attrs, units_hours_since = units_hours_since) } @@ -202,14 +170,25 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', #'@param data A multi-dimensional array with named dimensions. #'@param destination A character string indicating the path where to store the #' NetCDF files. -#'@param Dates A named array of dates with the corresponding sdate and forecast -#' time dimension. If there is no sdate_dim, you can set it to NULL. -#' It must have ftime_dim dimension. #'@param coords A named list with elements of the coordinates corresponding to #' the dimensions of the data parameter. The names and length of each element #' must correspond to the names of the dimensions. If any coordinate is not #' provided, it is set as an index vector with the values from 1 to the length #' of the corresponding dimension. +#'@param Dates A named array of dates with the corresponding sdate and forecast +#' time dimension. If there is no sdate_dim, you can set it to NULL. +#' It must have ftime_dim dimension. +#'@param time_bounds (Optional) A list of two arrays of dates containing +#' the lower (first array) and the upper (second array) time bounds +#' corresponding to Dates. Each array must have the same dimensions as Dates. +#' It is NULL by default. +#'@param startdates A vector of dates that will be used for the filenames +#' when saving the data in multiple files (single_file = FALSE). It must be a +#' vector of the same length as the start date dimension of data. It must be a +#' vector of class \code{Dates}, \code{'POSIXct'} or character with lenghts +#' between 1 and 10. If it is NULL, the coordinate corresponding the the start +#' date dimension or the first Date of each time step will be used as the name +#' of the files. It is NULL by default. #'@param varname A character string indicating the name of the variable to be #' saved. #'@param metadata A named list where each element is a variable containing the @@ -229,42 +208,36 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', #'@param var_dim A character string indicating the name of variable dimension. #' By default, it is set to 'var'. It can be NULL if there is no variable #' dimension. -#'@param memb_dim A character string indicating the name of the member dimension. -#' By default, it is set to 'member'. It can be NULL if there is no member -#' dimension. -#'@param startdates A vector of dates that will be used for the filenames -#' when saving the data in multiple files (single_file = FALSE). It must be a -#' vector of the same length as the start date dimension of data. It must be a -#' vector of class \code{Dates}, \code{'POSIXct'} or character with lenghts -#' between 1 and 10. If it is NULL, the coordinate corresponding the the start -#' date dimension or the first Date of each time step will be used as the name -#' of the files. It is NULL by default. -#'@param single_file A logical value indicating if all object is saved in a -#' single file (TRUE) or in multiple files (FALSE). When it is FALSE, -#' the array is separated for datasets, variable and start date. When there are -#' no specified time dimensions, the data will be saved in a single file by -#' default. The output file name when 'single_file' is TRUE is a character -#' string containing: __.nc; when it is FALSE, -#' it is _.nc. It is FALSE by default. +#'@param memb_dim A character string indicating the name of the member +#' dimension. By default, it is set to 'member'. It can be NULL if there is no +#' member dimension. #'@param drop_dims (optional) A vector of character strings indicating the #' dimension names of length 1 that need to be dropped in order that they don't #' appear in the netCDF file. Only is allowed to drop dimensions that are not #' used in the computation. The dimensions used in the computation are the ones #' specified in: sdate_dim, ftime_dim, dat_dim, var_dim and memb_dim. It is #' NULL by default. +#'@param single_file A logical value indicating if all object is saved in a +#' single file (TRUE) or in multiple files (FALSE). When it is FALSE, +#' the array is separated for datasets, variable and start date. When there are +#' no specified time dimensions, the data will be saved in a single file by +#' default. The output file name when 'single_file' is TRUE is a character +#' string containing: '__.nc'; when it is FALSE, +#' it is '_.nc'. It is FALSE by default. #'@param extra_string (Optional) A character string to be included as part of #' the file name, for instance, to identify member or realization. When #' single_file is TRUE, the 'extra_string' will substitute all the default #' file name; when single_file is FALSE, the 'extra_string' will be added -#' in the file name as: __.nc. It is NULL by default. -#'@param units_hours_since (Optional) A logical value only used for the case -#' Dates have forecast time and start date dimension and single_file is TRUE. -#' When it is TRUE, it saves the forecast time with units of 'hours since'; -#' if it is FALSE, the time units will be a number of time steps with its -#' corresponding frequency (e.g. n days, n months or n hours). It is TRUE -#' by default. +#' in the file name as: '__.nc'. It is NULL by +#' default. #'@param global_attrs (Optional) A list with elements containing the global #' attributes to be saved in the NetCDF. +#'@param units_hours_since (Optional) A logical value only available for the +#' case: Dates have forecast time and start date dimension, single_file is +#' TRUE and 'time_bounds' is NULL. When it is TRUE, it saves the forecast time +#' with units of 'hours since'; if it is FALSE, the time units will be a number +#' of time steps with its corresponding frequency (e.g. n days, n months or n +#' hours). It is TRUE by default. #' #'@return Multiple or single NetCDF files containing the data array.\cr #'\item{\code{single_file is TRUE}}{ @@ -309,10 +282,11 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', #'@import multiApply #'@importFrom ClimProjDiags Subset #'@export -SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, +SaveExp <- function(data, destination = "./", coords = NULL, + Dates = NULL, time_bounds = NULL, startdates = NULL, varname = NULL, metadata = NULL, Datasets = NULL, - startdates = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', - ftime_dim = 'time', var_dim = 'var', memb_dim = 'member', + sdate_dim = 'sdate', ftime_dim = 'time', + memb_dim = 'member', dat_dim = 'dataset', var_dim = 'var', drop_dims = NULL, single_file = FALSE, extra_string = NULL, global_attrs = NULL, units_hours_since = TRUE) { ## Initial checks @@ -330,15 +304,6 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, "indicating the name of the file (including the folder if needed) ", "where the data will be saved.") } - # Dates - if (!is.null(Dates)) { - if (!any(inherits(Dates, "POSIXct"), inherits(Dates, "Date"))) { - stop("Parameter 'Dates' must be of 'POSIXct' or 'Dates' class.") - } - if (is.null(dim(Dates))) { - stop("Parameter 'Dates' must have dimension names.") - } - } # drop_dims if (!is.null(drop_dims)) { if (!is.character(drop_dims) | any(!drop_dims %in% names(dim(data)))) { @@ -456,11 +421,6 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, stop("Parameter 'dat_dim' is not found in 'data' dimension. Set it ", "as NULL if there is no Datasets dimension.") } - if (length(dat_dim) > 1) { - warning("Parameter 'dat_dim' has length greater than 1 and ", - "only the first element will be used.") - dat_dim <- dat_dim[1] - } n_datasets <- dim(data)[dat_dim] } else { n_datasets <- 1 @@ -488,35 +448,110 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, single_file <- TRUE } } - # Dates dimension check + # Dates (1): initial checks if (!is.null(Dates)) { - if (is.null(ftime_dim)) { - stop("Parameter 'Dates' must have 'ftime_dim'.") + if (!any(inherits(Dates, "POSIXct"), inherits(Dates, "Date"))) { + stop("Parameter 'Dates' must be of 'POSIXct' or 'Dates' class.") } - if (all(c(ftime_dim, sdate_dim) %in% names(dim(Dates)))) { - if (any(!names(dim(Dates)) %in% c(ftime_dim, sdate_dim))) { - if (all(dim(Dates)[!names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] == 1)) { - dim(Dates) <- dim(Dates)[names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] - } else { - stop("Parameter 'Dates' must have only 'sdate_dim' and 'ftime_dim' dimensions.") - } + if (is.null(dim(Dates))) { + stop("Parameter 'Dates' must have dimension names.") + } + if (all(is.null(ftime_dim), is.null(sdate_dim))) { + warning("Parameters 'ftime_dim' and 'sdate_dim' can't both be NULL ", + "if 'Dates' are used. 'Dates' will not be used.") + Dates <- NULL + } + # sdate_dim in Dates + if (!is.null(sdate_dim)) { + if (!sdate_dim %in% names(dim(Dates))) { + warning("Parameter 'sdate_dim' is not found in 'Dates' dimension. ", + "Dates will not be used.") + Dates <- NULL } - if (is.null(startdates)) { - startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') - } else if (any(nchar(startdates) > 10, nchar(startdates) < 1)) { - warning("Parameter 'startdates' should be a character string containing ", - "the start dates in the format 'yyyy-mm-dd', 'yyyymmdd', 'yyyymm', ", - "'POSIXct' or 'Dates' class. Files will be named with Dates instead.") - startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + } + # ftime_dim in Dates + if (!is.null(ftime_dim)) { + if (!ftime_dim %in% names(dim(Dates))) { + warning("Parameter 'ftime_dim' is not found in 'Dates' dimension. ", + "Dates will not be used.") + Dates <- NULL } - if (any(inherits(startdates, "POSIXct"), inherits(startdates, "Date"))) { - startdates <- format(startdates, "%Y%m%d") + } + } + # time_bounds + if (!is.null(time_bounds)) { + if (!inherits(time_bounds, 'list')) { + stop("Parameter 'time_bounds' must be a list with two dates arrays.") + } + time_bounds_dims <- lapply(time_bounds, function(x) dim(x)) + if (!identical(time_bounds_dims[[1]], time_bounds_dims[[2]])) { + stop("Parameter 'time_bounds' must have 2 arrays with same dimensions.") + } + name_tb <- sort(names(time_bounds_dims[[1]])) + name_dt <- sort(names(dim(Dates))) + if (!identical(dim(Dates)[name_dt], time_bounds_dims[[1]][name_tb])) { + stop(paste0("Parameter 'Dates' and 'time_bounds' must have same length ", + "of all dimensions.")) + } + } + # Dates (2): Check dimensions + if (!is.null(Dates)) { + if (any(dim(Dates)[!names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] != 1)) { + stop("Parameter 'Dates' can have only 'sdate_dim' and 'ftime_dim' ", + "dimensions of length greater than 1.") + } + # drop dimensions of length 1 different from sdate_dim and ftime_dim + dim(Dates) <- dim(Dates)[names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] + + # add ftime if needed + if (is.null(ftime_dim)) { + warning("A 'time' dimension of length 1 will be added to 'Dates'.") + dim(Dates) <- c(time = 1, dim(Dates)) + dim(data) <- c(time = 1, dim(data)) + dimnames <- names(dim(data)) + ftime_dim <- 'time' + if (!is.null(time_bounds)) { + time_bounds <- lapply(time_bounds, function(x) { + dim(x) <- c(time = 1, dim(x)) + return(x) + }) } - } else if (any(ftime_dim %in% names(dim(Dates)))) { - if (all(dim(Dates)[!names(dim(Dates)) %in% c(ftime_dim)] == 1)) { - dim(Dates) <- dim(Dates)[names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] + } + # add sdate if needed + if (is.null(sdate_dim)) { + if (!single_file) { + warning("A 'sdate' dimension of length 1 will be added to 'Dates'.") + dim(Dates) <- c(dim(Dates), sdate = 1) + dim(data) <- c(dim(data), sdate = 1) + dimnames <- names(dim(data)) + sdate_dim <- 'sdate' + if (!is.null(time_bounds)) { + time_bounds <- lapply(time_bounds, function(x) { + dim(x) <- c(dim(x), sdate = 1) + return(x) + }) + } + if (!is.null(startdates)) { + if (length(startdates) != 1) { + warning("Parameter 'startdates' must be of length 1 if 'sdate_dim' is NULL.", + "They won't be used.") + startdates <- NULL + } + } } } + } + # startdates + if (!is.null(Dates)) { + # check startdates + if (is.null(startdates)) { + startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + } else if (any(nchar(startdates) > 10, nchar(startdates) < 1)) { + warning("Parameter 'startdates' should be a character string containing ", + "the start dates in the format 'yyyy-mm-dd', 'yyyymmdd', 'yyyymm', ", + "'POSIXct' or 'Dates' class. Files will be named with Dates instead.") + startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + } } else if (!single_file) { warning("Dates must be provided if 'data' must be saved in separated files. ", "All data will be saved in a single file.") @@ -529,7 +564,20 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, } else { startdates <- rep('XXX', dim(data)[sdate_dim]) } + } else { + if (any(inherits(startdates, "POSIXct"), inherits(startdates, "Date"))) { + startdates <- format(startdates, "%Y%m%d") + } + if (!is.null(sdate_dim)) { + if (dim(data)[sdate_dim] != length(startdates)) { + warning(paste0("Parameter 'startdates' doesn't have the same length ", + "as dimension '", sdate_dim,"', it will not be used.")) + startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + startdates <- format(startdates, "%Y%m%d") + } + } } + # Datasets if (is.null(Datasets)) { Datasets <- rep('XXX', n_datasets ) @@ -574,7 +622,7 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, for (i_coord in filedims) { # vals - if (i_coord %in% names(coords)) { + if (i_coord %in% names(coords)) {str if (is.numeric(coords[[i_coord]])) { coords[[i_coord]] <- as.vector(coords[[i_coord]]) } else { @@ -609,7 +657,6 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, # Reorder coords coords[c(names(coords)[!names(coords) %in% filedims])] <- NULL coords <- coords[filedims] - defined_vars <- list() if (!single_file) { for (i in 1:n_datasets) { path <- file.path(destination, Datasets[i], varname) @@ -630,6 +677,11 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, if (is.null(Dates)) { input_data <- list(data_subset, startdates) target_dims <- list(c(lon_dim, lat_dim, memb_dim, ftime_dim), NULL) + } else if (!is.null(time_bounds)) { + input_data <- list(data_subset, startdates, Dates, + time_bounds[[1]], time_bounds[[2]]) + target_dims = list(c(lon_dim, lat_dim, memb_dim, ftime_dim), NULL, + ftime_dim, ftime_dim, ftime_dim) } else { input_data <- list(data_subset, startdates, Dates) target_dims = list(c(lon_dim, lat_dim, memb_dim, ftime_dim), NULL, ftime_dim) @@ -652,8 +704,12 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, if (!is.null(dat_dim)) { coords[[dat_dim]] <- array(1:dim(data)[dat_dim], dim = dim(data)[dat_dim]) attr(coords[[dat_dim]], 'variables') <- list(list(units = 'adim')) - # extra_info_dim[[dat_dim]] <- list(Datasets = paste(Datasets, collapse = ', ')) } + # time_bnds + if (!is.null(time_bounds)) { + time_bnds <- c(time_bounds[[1]], time_bounds[[2]]) + } + # Dates if (!is.null(Dates)) { if (is.null(sdate_dim)) { sdates <- Dates[1] @@ -663,17 +719,18 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, # sdate definition sdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') differ <- as.numeric(difftime(sdates, sdates[1], units = "hours")) - # new dim(differ) <- dim(data)[sdate_dim] coords[[sdate_dim]] <- differ - attr(coords[[sdate_dim]], 'variables') <- list(list(units = paste('hours since', sdates[1]), - calendar = 'proleptic_gregorian', - longname = sdate_dim)) + attrs <- list(units = paste('hours since', sdates[1]), + calendar = 'proleptic_gregorian', longname = sdate_dim) + attr(coords[[sdate_dim]], 'variables')[[sdate_dim]] <- attrs # ftime definition Dates <- Reorder(Dates, c(ftime_dim, sdate_dim)) differ_ftime <- array(dim = dim(Dates)) - for (i in 1:length(sdates)) differ_ftime[, i] <- as.numeric(difftime(Dates[, i], Dates[1, i], - units = "hours")) + for (i in 1:length(sdates)) { + differ_ftime[, i] <- as.numeric(difftime(Dates[, i], Dates[1, i], + units = "hours")) + } dim(differ_ftime) <- dim(Dates) leadtimes <- Subset(differ_ftime, along = sdate_dim, 1, drop = 'selected') if (!all(apply(differ_ftime, 1, function(x){length(unique(x)) == 1}))) { @@ -682,7 +739,7 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, "correctly.") } } - if (!units_hours_since) { + if (all(!units_hours_since, is.null(time_bounds))) { if (all(diff(leadtimes/24) == 1)) { # daily values units <- 'days' @@ -690,7 +747,7 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, } else if (all(diff(leadtimes/24) %in% c(28, 29, 30, 31))) { # monthly values units <- 'months' - vals <- round(leadtimes/730) + 1 + vals <- round(leadtimes/(30.437*24)) + 1 } else { # other frequency units <- 'hours' @@ -700,16 +757,47 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, units <- paste('hours since', paste(sdates, collapse = ', ')) vals <- leadtimes } + # Add ftime var dim(vals) <- dim(data)[ftime_dim] coords[[ftime_dim]] <- vals - attr(coords[[ftime_dim]], 'variables') <- list(list(units = units, - calendar = 'proleptic_gregorian', - longname = ftime_dim, - unlim = TRUE)) + attrs <- list(units = units, calendar = 'proleptic_gregorian', + longname = ftime_dim, unlim = TRUE) + attr(coords[[ftime_dim]], 'variables')[[ftime_dim]] <- attrs + + # Add time_bnds + if (!is.null(time_bounds)) { + if (is.null(sdate_dim)) { + sdates <- Dates[1] + time_bnds <- c(time_bounds[[1]], time_bounds[[2]]) + leadtimes_bnds <- as.numeric(difftime(time_bnds, sdates, units = "hours")) + dim(leadtimes_bnds) <- c(dim(Dates), bnds = 2) + } else { + # assuming they have sdate and ftime + time_bnds <- lapply(time_bounds, function(x) { + x <- Reorder(x, c(ftime_dim, sdate_dim)) + return(x) + }) + time_bnds <- c(time_bounds[[1]], time_bounds[[2]]) + dim(time_bnds) <- c(dim(Dates), bnds = 2) + differ_bnds <- array(dim = c(dim(time_bnds))) + for (i in 1:length(sdates)) { + differ_bnds[, i, ] <- as.numeric(difftime(time_bnds[, i, ], Dates[1, i], + units = "hours")) + } + # NOTE: Add a warning when they are not equally spaced? + leadtimes_bnds <- Subset(differ_bnds, along = sdate_dim, 1, drop = 'selected') + } + # Add time_bnds + leadtimes_bnds <- Reorder(leadtimes_bnds, c('bnds', ftime_dim)) + coords[['time_bnds']] <- leadtimes_bnds + attrs <- list(units = paste('hours since', paste(sdates, collapse = ', ')), + calendar = 'proleptic_gregorian', + longname = 'time bounds', unlim = FALSE) + attr(coords[['time_bnds']], 'variables')$time_bnds <- attrs + } } # var definition - defined_vars <- list() extra_info_var <- NULL for (j in 1:n_vars) { varname_j <- varname[j] @@ -751,18 +839,30 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, } .saveexp <- function(data, coords, destination = "./", - startdates = NULL, dates = NULL, - ftime_dim = 'time', varname = 'var', - metadata_var = NULL, extra_string = NULL, - global_attrs = NULL) { + startdates = NULL, dates = NULL, + time_bnds1 = NULL, time_bnds2 = NULL, + ftime_dim = 'time', varname = 'var', + metadata_var = NULL, extra_string = NULL, + global_attrs = NULL) { if (!is.null(dates)) { differ <- as.numeric(difftime(dates, dates[1], units = "hours")) dim(differ) <- dim(data)[ftime_dim] coords[[ftime_dim]] <- differ - attr(coords[[ftime_dim]], 'variables') <- list(list(units = paste('hours since', dates[1]), - calendar = 'proleptic_gregorian', - longname = ftime_dim, unlim = TRUE)) - names(attributes(coords[[ftime_dim]])$variables) <- ftime_dim + attrs <- list(units = paste('hours since', dates[1]), + calendar = 'proleptic_gregorian', + longname = ftime_dim, unlim = TRUE) + attr(coords[[ftime_dim]], 'variables')[[ftime_dim]] <- attrs + } + if (!any(is.null(time_bnds1), is.null(time_bnds2))) { + time_bnds <- c(time_bnds1, time_bnds2) + time_bnds <- as.numeric(difftime(time_bnds, dates[1], units = "hours")) + dim(time_bnds) <- c(dim(data)[ftime_dim], bnds = 2) + time_bnds <- Reorder(time_bnds, c('bnds', ftime_dim)) + coords[['time_bnds']] <- time_bnds + attrs <- list(units = paste('hours since', dates[1]), + calendar = 'proleptic_gregorian', + longname = 'time bounds', unlim = FALSE) + attr(coords[['time_bnds']], 'variables')$time_bnds <- attrs } # Add data coords[[varname]] <- data diff --git a/man/CST_SaveExp.Rd b/man/CST_SaveExp.Rd index 6b5006fa..1520eb08 100644 --- a/man/CST_SaveExp.Rd +++ b/man/CST_SaveExp.Rd @@ -7,12 +7,12 @@ CST_SaveExp( data, destination = "./", + startdates = NULL, sdate_dim = "sdate", ftime_dim = "time", + memb_dim = "member", dat_dim = "dataset", var_dim = "var", - memb_dim = "member", - startdates = NULL, drop_dims = NULL, single_file = FALSE, extra_string = NULL, @@ -28,6 +28,14 @@ to save the data. NetCDF file for each starting date are saved into the folder tree: 'destination/Dataset/variable/'. By default the function saves the data into the working directory.} +\item{startdates}{A vector of dates that will be used for the filenames +when saving the data in multiple files (single_file = FALSE). It must be a +vector of the same length as the start date dimension of data. It must be a +vector of class \code{Dates}, \code{'POSIXct'} or character with lenghts +between 1 and 10. If it is NULL, the coordinate corresponding the the start +date dimension or the first Date of each time step will be used as the name +of the files. It is NULL by default.} + \item{sdate_dim}{A character string indicating the name of the start date dimension. By default, it is set to 'sdate'. It can be NULL if there is no start date dimension.} @@ -37,6 +45,10 @@ dimension. If 'Dates' are used, it can't be NULL. If there is no forecast time dimension, 'Dates' will be set to NULL and will not be used. By default, it is set to 'time'.} +\item{memb_dim}{A character string indicating the name of the member +dimension. It can be NULL if there is no member dimension. By default, it is + set to 'member'.} + \item{dat_dim}{A character string indicating the name of dataset dimension. It can be NULL if there is no dataset dimension. By default, it is set to 'dataset'.} @@ -45,18 +57,6 @@ It can be NULL if there is no dataset dimension. By default, it is set to It can be NULL if there is no variable dimension. By default, it is set to 'var'.} -\item{memb_dim}{A character string indicating the name of the member dimension. -It can be NULL if there is no member dimension. By default, it is set to -'member'.} - -\item{startdates}{A vector of dates that will be used for the filenames -when saving the data in multiple files (single_file = FALSE). It must be a -vector of the same length as the start date dimension of data. It must be a -vector of class \code{Dates}, \code{'POSIXct'} or character with lenghts -between 1 and 10. If it is NULL, the coordinate corresponding the the start -date dimension or the first Date of each time step will be used as the name -of the files. It is NULL by default.} - \item{drop_dims}{(optional) A vector of character strings indicating the dimension names of length 1 that need to be dropped in order that they don't appear in the netCDF file. Only is allowed to drop dimensions that are not @@ -76,17 +76,18 @@ it is '_.nc'. It is FALSE by default.} the file name, for instance, to identify member or realization. When single_file is TRUE, the 'extra_string' will substitute all the default file name; when single_file is FALSE, the 'extra_string' will be added -in the file name as: '__.nc'. It is NULL by default.} +in the file name as: '__.nc'. It is NULL by +default.} \item{global_attrs}{(Optional) A list with elements containing the global attributes to be saved in the NetCDF.} -\item{units_hours_since}{(Optional) A logical value only used for the case -Dates have forecast time and start date dimension and single_file is TRUE. -When it is TRUE, it saves the forecast time with units of 'hours since'; -if it is FALSE, the time units will be a number of time steps with its -corresponding frequency (e.g. n days, n months or n hours). It is TRUE -by default.} +\item{units_hours_since}{(Optional) A logical value only available for the +case: Dates have forecast time and start date dimension, single_file is +TRUE and 'time_bounds' are not used. When it is TRUE, it saves the forecast +time with units of 'hours since'; if it is FALSE, the time units will be a +number of time steps with its corresponding frequency (e.g. n days, n months +or n hours). It is TRUE by default.} } \value{ Multiple or single NetCDF files containing the data array.\cr diff --git a/man/SaveExp.Rd b/man/SaveExp.Rd index 7acd17d1..dbef149d 100644 --- a/man/SaveExp.Rd +++ b/man/SaveExp.Rd @@ -7,17 +7,18 @@ SaveExp( data, destination = "./", - Dates = NULL, coords = NULL, + Dates = NULL, + time_bounds = NULL, + startdates = NULL, varname = NULL, metadata = NULL, Datasets = NULL, - startdates = NULL, - dat_dim = "dataset", sdate_dim = "sdate", ftime_dim = "time", - var_dim = "var", memb_dim = "member", + dat_dim = "dataset", + var_dim = "var", drop_dims = NULL, single_file = FALSE, extra_string = NULL, @@ -31,25 +32,20 @@ SaveExp( \item{destination}{A character string indicating the path where to store the NetCDF files.} -\item{Dates}{A named array of dates with the corresponding sdate and forecast -time dimension. If there is no sdate_dim, you can set it to NULL. -It must have ftime_dim dimension.} - \item{coords}{A named list with elements of the coordinates corresponding to the dimensions of the data parameter. The names and length of each element must correspond to the names of the dimensions. If any coordinate is not provided, it is set as an index vector with the values from 1 to the length of the corresponding dimension.} -\item{varname}{A character string indicating the name of the variable to be -saved.} - -\item{metadata}{A named list where each element is a variable containing the -corresponding information. The information must be contained in a list of -lists for each variable.} +\item{Dates}{A named array of dates with the corresponding sdate and forecast +time dimension. If there is no sdate_dim, you can set it to NULL. +It must have ftime_dim dimension.} -\item{Datasets}{A vector of character string indicating the names of the -datasets.} +\item{time_bounds}{(Optional) A list of two arrays of dates containing +the lower (first array) and the upper (second array) time bounds +corresponding to Dates. Each array must have the same dimensions as Dates. +It is NULL by default.} \item{startdates}{A vector of dates that will be used for the filenames when saving the data in multiple files (single_file = FALSE). It must be a @@ -59,9 +55,15 @@ between 1 and 10. If it is NULL, the coordinate corresponding the the start date dimension or the first Date of each time step will be used as the name of the files. It is NULL by default.} -\item{dat_dim}{A character string indicating the name of dataset dimension. -By default, it is set to 'dataset'. It can be NULL if there is no dataset -dimension.} +\item{varname}{A character string indicating the name of the variable to be +saved.} + +\item{metadata}{A named list where each element is a variable containing the +corresponding information. The information must be contained in a list of +lists for each variable.} + +\item{Datasets}{A vector of character string indicating the names of the +datasets.} \item{sdate_dim}{A character string indicating the name of the start date dimension. By default, it is set to 'sdate'. It can be NULL if there is no @@ -71,12 +73,16 @@ start date dimension.} dimension. By default, it is set to 'time'. It can be NULL if there is no forecast time dimension.} -\item{var_dim}{A character string indicating the name of variable dimension. -By default, it is set to 'var'. It can be NULL if there is no variable +\item{memb_dim}{A character string indicating the name of the member +dimension. By default, it is set to 'member'. It can be NULL if there is no +member dimension.} + +\item{dat_dim}{A character string indicating the name of dataset dimension. +By default, it is set to 'dataset'. It can be NULL if there is no dataset dimension.} -\item{memb_dim}{A character string indicating the name of the member dimension. -By default, it is set to 'member'. It can be NULL if there is no member +\item{var_dim}{A character string indicating the name of variable dimension. +By default, it is set to 'var'. It can be NULL if there is no variable dimension.} \item{drop_dims}{(optional) A vector of character strings indicating the @@ -91,24 +97,25 @@ single file (TRUE) or in multiple files (FALSE). When it is FALSE, the array is separated for datasets, variable and start date. When there are no specified time dimensions, the data will be saved in a single file by default. The output file name when 'single_file' is TRUE is a character -string containing: __.nc; when it is FALSE, -it is _.nc. It is FALSE by default.} +string containing: '__.nc'; when it is FALSE, +it is '_.nc'. It is FALSE by default.} \item{extra_string}{(Optional) A character string to be included as part of the file name, for instance, to identify member or realization. When single_file is TRUE, the 'extra_string' will substitute all the default file name; when single_file is FALSE, the 'extra_string' will be added -in the file name as: __.nc. It is NULL by default.} +in the file name as: '__.nc'. It is NULL by +default.} \item{global_attrs}{(Optional) A list with elements containing the global attributes to be saved in the NetCDF.} -\item{units_hours_since}{(Optional) A logical value only used for the case -Dates have forecast time and start date dimension and single_file is TRUE. -When it is TRUE, it saves the forecast time with units of 'hours since'; -if it is FALSE, the time units will be a number of time steps with its -corresponding frequency (e.g. n days, n months or n hours). It is TRUE -by default.} +\item{units_hours_since}{(Optional) A logical value only available for the +case: Dates have forecast time and start date dimension, single_file is +TRUE and 'time_bounds' is NULL. When it is TRUE, it saves the forecast time +with units of 'hours since'; if it is FALSE, the time units will be a number +of time steps with its corresponding frequency (e.g. n days, n months or n +hours). It is TRUE by default.} } \value{ Multiple or single NetCDF files containing the data array.\cr diff --git a/tests/testthat/test-CST_SaveExp.R b/tests/testthat/test-CST_SaveExp.R index 65c0cb22..b4e17554 100644 --- a/tests/testthat/test-CST_SaveExp.R +++ b/tests/testthat/test-CST_SaveExp.R @@ -90,14 +90,6 @@ test_that("1. Input checks: CST_SaveExp", { CST_SaveExp(data = cube0), paste0("Level 'attrs' must be a list with at least 'Dates' element.") ) - cube0$attrs <- NULL - cube0$attrs$Dates <- dates2 - expect_warning( - CST_SaveExp(data = cube0, sdate_dim = c('sdate', 'sweek'), - ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, - var_dim = NULL, single_file = FALSE), - paste0("Element 'coords' not found. No coordinates will be used.") - ) # sdate_dim suppressWarnings( @@ -106,13 +98,6 @@ test_that("1. Input checks: CST_SaveExp", { paste0("Parameter 'sdate_dim' must be a character string.") ) ) - expect_warning( - CST_SaveExp(data = cube1, sdate_dim = c('sdate', 'sweek'), - ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, - var_dim = NULL, extra_string = 'test'), - paste0("Parameter 'sdate_dim' has length greater than 1 and ", - "only the first element will be used.") - ) suppressWarnings( expect_error( CST_SaveExp(data = cube1, sdate_dim = 'a', ftime_dim = 'ftime'), @@ -132,25 +117,6 @@ test_that("1. Input checks: CST_SaveExp", { paste0("Parameter 'startdates' doesn't have the same length ", "as dimension '", 'sdate',"', it will not be used.") ) - # metadata - cube1$attrs$Variable$metadata <- 'metadata' - expect_error( - CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, - dat_dim = NULL, var_dim = NULL), - paste0("Element metadata from Variable element in attrs must be a list.") - ) - cube1$attrs$Variable$metadata <- list(test = 'var') - expect_warning( - CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, - dat_dim = NULL, var_dim = NULL), - paste0("Metadata is not found for any coordinate.") - ) - cube1$attrs$Variable$metadata <- list(var = 'var') - expect_warning( - CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, - dat_dim = NULL, var_dim = NULL), - paste0("Metadata is not found for any variable.") - ) # memb_dim suppressWarnings( expect_error( @@ -165,6 +131,13 @@ test_that("1. Input checks: CST_SaveExp", { "as NULL if there is no member dimension.") ) ) + # metadata + cube1$attrs$Variable$metadata <- 'metadata' + expect_error( + CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL), + paste0("Element metadata from Variable element in attrs must be a list.") + ) }) ############################################## @@ -189,11 +162,13 @@ test_that("1. Input checks", { ) # Dates expect_error( - SaveExp(data = array(1, dim = c(a = 1)), Dates = 'a'), + SaveExp(data = array(1, dim = c(a = 1)), Dates = 'a', sdate_dim = NULL, + memb_dim = NULL, ftime_dim = 'a', dat_dim = NULL, var_dim = NULL), paste0("Parameter 'Dates' must be of 'POSIXct' or 'Dates' class.") ) expect_error( - SaveExp(data = array(1, dim = c(a = 1)), + SaveExp(data = array(1, dim = c(time = 1, sdate = 1, member = 1)), + dat_dim = NULL, var_dim = NULL, Dates = as.Date('2022-02-01', format = "%Y-%m-%d")), paste0("Parameter 'Dates' must have dimension names.") ) @@ -255,13 +230,6 @@ test_that("1. Input checks", { paste0("Parameter 'ftime_dim' is not found in 'data' dimension.") ) # Dates dimension check - expect_error( - SaveExp(data = dat4, coords = coords4, - metadata = list(tas = list(level = '2m')), - Dates = dates4, ftime_dim = NULL, memb_dim = NULL, - dat_dim = NULL, var_dim = NULL), - paste0("Parameter 'Dates' must have 'ftime_dim'.") - ) expect_warning( SaveExp(data = dat4, coords = coords4, metadata = list(tas = list(level = '2m')), @@ -276,7 +244,8 @@ test_that("1. Input checks", { metadata = list(tas = list(level = '2m')), Dates = dates5, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, var_dim = NULL, sdate_dim = NULL), - paste0("Parameter 'Dates' must have only 'sdate_dim' and 'ftime_dim' dimensions.") + paste0("Parameter 'Dates' can have only 'sdate_dim' and 'ftime_dim' ", + "dimensions of length greater than 1.") ) expect_warning( SaveExp(data = dat2, coords = coords2, @@ -306,15 +275,6 @@ test_that("1. Input checks", { paste0("Dates must be provided if 'data' must be saved in separated files. ", "All data will be saved in a single file.") ) - # (dat4) Without ftime_dim - expect_error( - SaveExp(data = dat4, coords = coords4, - metadata = list(tas = list(level = '2m')), - Dates = dates4, ftime_dim = NULL, memb_dim = NULL, - dat_dim = NULL, var_dim = NULL, sdate_dim = 'sdate', - single_file = TRUE), - paste0("Parameter 'Dates' must have 'ftime_dim'.") - ) }) ############################################## -- GitLab From 6eef0c208b28bd55fea3428ddf5ab955cad2c2f4 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 1 Dec 2023 16:10:15 +0100 Subject: [PATCH 08/16] Update usecase for saveexp --- R/CST_SaveExp.R | 6 +- inst/doc/usecase.md | 1 + inst/doc/usecase/UseCase4_CST_SaveExp.R | 245 +++++++++++++++++------- 3 files changed, 182 insertions(+), 70 deletions(-) diff --git a/R/CST_SaveExp.R b/R/CST_SaveExp.R index c5742031..ea2cf6ac 100644 --- a/R/CST_SaveExp.R +++ b/R/CST_SaveExp.R @@ -298,6 +298,9 @@ SaveExp <- function(data, destination = "./", coords = NULL, if (is.null(dimnames)) { stop("Parameter 'data' must be an array with named dimensions.") } + if (!is.null(attributes(data)$dimensions)) { + attributes(data)$dimensions <- NULL + } # destination if (!is.character(destination) | length(destination) > 1) { stop("Parameter 'destination' must be a character string of one element ", @@ -606,9 +609,6 @@ SaveExp <- function(data, destination = "./", coords = NULL, if (any(dimnames != alldims)) { data <- Reorder(data, alldims) dimnames <- names(dim(data)) - if (!is.null(attr(data, 'dimensions'))) { - attr(data, 'dimensions') <- dimnames - } } ## NetCDF dimensions definition diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index 1fc17ae4..8ebf0478 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -6,6 +6,7 @@ In this document, you can link to the example scripts for different usage of the 1. [Bias adjustment for assessment of an extreme event](inst/doc/usecase/UseCase1_WindEvent_March2018.R) 2. [Precipitation Downscaling with RainFARM RF 4](inst/doc/usecase/UseCase2_PrecipitationDownscaling_RainFARM_RF4.R) 3. [Precipitation Downscaling with RainFARM RF 100](inst/doc/usecase/UseCase2_PrecipitationDownscaling_RainFARM_RF100.R) + 4. [Seasonal forecasts for a river flow](inst/doc/usecase/UseCase3_data_preparation_SCHEME_model.R) 2. **Examples using 's2dv_cube'** 1. [Save 's2dv_cube'](inst/doc/usecase/UseCase4_SaveExp.R) \ No newline at end of file diff --git a/inst/doc/usecase/UseCase4_CST_SaveExp.R b/inst/doc/usecase/UseCase4_CST_SaveExp.R index a6cbbe86..4a7db4f4 100644 --- a/inst/doc/usecase/UseCase4_CST_SaveExp.R +++ b/inst/doc/usecase/UseCase4_CST_SaveExp.R @@ -1,59 +1,102 @@ -#**************************************************************************** +#******************************************************************************* # Script to test examples of CST_SaveExp # Eva Rifà Rovira # 29/11/2024 -#**************************************************************************** +#******************************************************************************* + +#------------------------------------------------------------------------------- +# Needed packages before a new version is installed +library(CSIndicators) +library(multiApply) +library(easyNCDF) +library(s2dv) +library(ClimProjDiags) +library(CSTools) +library(startR) +source("https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-SaveCube/R/CST_SaveExp.R") +source("https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-SaveCube/R/zzz.R") +################################################################################ +# Tests: +#----------------------------------------------------- # Tests 1: Multidimensional array and Dates, without metadata and coordinates -# (1) Minimal use case, without Dates +#----------------------------------------------------- +# (1.1) Minimal use case, without Dates data <- array(1:5, dim = c(sdate = 5, lon = 4, lat = 4)) SaveExp(data, ftime_dim = NULL, memb_dim = NULL, dat_dim = NULL, var_dim = NULL, single_file = TRUE) SaveExp(data, ftime_dim = NULL, memb_dim = NULL, dat_dim = NULL, - var_dim = NULL, sdate_dim = NULL, single_file = TRUE) # same result + var_dim = NULL, sdate_dim = NULL, single_file = FALSE) # same result -# (2) Forecast time dimension, without Dates +# (1.2) Forecast time dimension, without Dates data <- array(1:5, dim = c(ftime = 5, lon = 4, lat = 4)) SaveExp(data, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, var_dim = NULL, sdate_dim = NULL, single_file = TRUE) -# (2) Start date dimension, without Dates +# (1.3) Start date dimension, without Dates data <- array(1:5, dim = c(sdate = 5, lon = 4, lat = 4)) SaveExp(data, ftime_dim = NULL, memb_dim = NULL, dat_dim = NULL, var_dim = NULL, sdate_dim = 'sdate', single_file = TRUE) -# (3) Only forecast time dimension (no sdate), with Dates +# (1.4) Only forecast time dimension (no sdate), with Dates data <- array(1:5, dim = c(ftime = 5, lon = 4, lat = 4)) dates <- c('20000101', '20010102', '20020103', '20030104', '20040105') dates <- as.Date(dates, format = "%Y%m%d", tz = "UTC") dim(dates) <- c(ftime = 5) SaveExp(data, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, var_dim = NULL, sdate_dim = NULL, Dates = dates, single_file = FALSE) +SaveExp(data, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, sdate_dim = NULL, Dates = dates, single_file = TRUE) # For this case we have the same result using: single_file = FALSE /TRUE. -# (4) Forecast time and 1 sdate, with Dates +# (1.5) Forecast time and 1 sdate, with Dates data <- array(1:5, dim = c(sdate = 1, ftime = 5, lon = 4, lat = 4)) dates <- c('20000101', '20010102', '20020103', '20030104', '20040105') dates <- as.Date(dates, format = "%Y%m%d", tz = "UTC") dim(dates) <- c(sdate = 1, ftime = 5) -SaveExp(data, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, - var_dim = NULL, sdate_dim = 'sdate', Dates = dates, single_file = TRUE) SaveExp(data, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, var_dim = NULL, sdate_dim = 'sdate', Dates = dates, single_file = FALSE) +SaveExp(data, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, sdate_dim = 'sdate', Dates = dates, single_file = TRUE) -################################################################################ - -# Tests 2: Test sample data from CSTools - -# (a) Data loaded with Start -# (1) lonlat_temp_st$exp in a single file with units 'hours since' - +# (1.6) Test global attributes +SaveExp(data, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, sdate_dim = 'sdate', Dates = dates, single_file = TRUE, + extra_string = 'test', + global_attrs = list(system = 'tes1', reference = 'test2')) +# (1.6) Test global attributes +SaveExp(data, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, sdate_dim = 'sdate', Dates = dates, single_file = FALSE, + extra_string = 'test', + global_attrs = list(system = 'tes1', reference = 'test2')) +#----------------------------------------------------- +# Tests 2: Test sample data from Start and from Load +#----------------------------------------------------- + +# (2.1) Test SaveExp +exp <- CSTools::lonlat_prec_st +data <- exp$data +Dates = exp$attrs$Dates +coords = exp$coords +varname = exp$attrs$Variable$varName +metadata = exp$attrs$Variable$metadata +SaveExp(data = data, Dates = Dates, coords = coords, varname = varname, + metadata = metadata, ftime_dim = 'ftime', startdates = 1:4, + var_dim = 'var', memb_dim = 'member', dat_dim = 'dataset', + sdate_dim = 'sdate', single_file = FALSE) +SaveExp(data = data, Dates = Dates, coords = coords, varname = varname, + metadata = metadata, ftime_dim = 'ftime', startdates = 1:4, + var_dim = 'var', memb_dim = 'member', dat_dim = 'dataset', + sdate_dim = 'sdate', single_file = TRUE) + +# (2.2) lonlat_temp_st$exp in a single file with units 'hours since' +# (2.2.1) We save the data data <- lonlat_temp_st$exp CST_SaveExp(data = data, ftime_dim = 'ftime', var_dim = 'var', dat_dim = 'dataset', sdate_dim = 'sdate', single_file = TRUE) -# Now we read the output with Start: +# (2.2.2) Now we read the output with Start: sdate <- as.vector(lonlat_temp_st$exp$coords$sdate) path <- paste0(getwd(),"/$var$_", sdate[1], "_", sdate[length(sdate)], ".nc") out <- Start(dat = path, @@ -71,13 +114,14 @@ out <- Start(dat = path, attributes(out)$Variables$common$ftime -# (2) lonlat_temp_st$exp in a single file with units of time frequency +# (2.3) lonlat_temp_st$exp in a single file with units of time frequency +# (2.3.1) we save the data data <- lonlat_temp_st$exp CST_SaveExp(data = data, ftime_dim = 'ftime', var_dim = 'var', dat_dim = 'dataset', sdate_dim = 'sdate', single_file = TRUE, units_hours_since = FALSE) -# Now we read the output with Start: +# (2.3.2) Now we read the output with Start: sdate <- as.vector(lonlat_temp_st$exp$coords$sdate) path <- paste0(getwd(),"/$var$_", sdate[1], "_", sdate[length(sdate)], ".nc") out <- Start(dat = path, @@ -97,11 +141,13 @@ out <- Start(dat = path, attributes(out)$Variables$common$ftime # [1] "1 months" "2 months" "3 months" -# (3) lonlat_temp_st$exp in separated files with units of hours since +# (2.4) lonlat_temp_st$exp in separated files with units of hours since +# (2.4.1) we save the data data <- lonlat_temp_st$exp CST_SaveExp(data = data, ftime_dim = 'ftime', var_dim = 'var', dat_dim = 'dataset', sdate_dim = 'sdate', single_file = FALSE) +# (2.4.2) we load the data sdate <- as.vector(lonlat_temp_st$exp$coords$sdate) path <- paste0(getwd(),"/dat1/$var$/$var$_$sdate$.nc") @@ -116,13 +162,14 @@ out <- Start(dat = path, var = 'tas', ftime = NULL), retrieve = TRUE) -# (4) lonlat_prec_st$exp in a single file with units of time frequency +# (2.5) lonlat_prec_st$exp in a single file with units of time frequency +# (2.5.1) we save the data data <- lonlat_prec_st CST_SaveExp(data = data, ftime_dim = 'ftime', var_dim = 'var', dat_dim = 'dataset', sdate_dim = 'sdate', single_file = TRUE, units_hours_since = FALSE) -# Now we read the output with Start: +# (2.5.2) we read the data sdate <- as.vector(data$coords$sdate) path <- paste0(getwd(),"/$var$_", sdate[1], "_", sdate[length(sdate)], ".nc") out <- Start(dat = path, @@ -146,12 +193,13 @@ attributes(out)$Variables$common$ftime # [22] "22 days" "23 days" "24 days" "25 days" "26 days" "27 days" "28 days" # [29] "29 days" "30 days" "31 days" -# (5) Test observations +# (2.6) Test observations: lonlat_temp +# (2.6.1) Save the data data <- lonlat_temp$obs CST_SaveExp(data = data, ftime_dim = 'ftime', memb_dim = NULL, var_dim = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', single_file = TRUE, units_hours_since = FALSE) -# Now we read the output with Start: +# (2.6.2) Now we read the output with Start: sdate <- c('20001101', '20051101') path <- paste0(getwd(),"/$var$_", sdate[1], "_", sdate[length(sdate)], ".nc") out <- Start(dat = path, @@ -162,22 +210,21 @@ out <- Start(dat = path, member = 1, sdate = 'all', return_vars = list( - lon = 'dat', - lat = 'dat', - ftime = NULL, - sdate = NULL), + lon = 'dat', + lat = 'dat', + ftime = NULL, + sdate = NULL), retrieve = TRUE) dim(out) attributes(out)$Variables$common$ftime -# (b) Data loaded with Load -data <- lonlat_temp$exp -data <- lonlat_temp$obs +# (2.7) Test lonlat_prec +# (2.7.1) Save the data data <- lonlat_prec -CST_SaveExp(data = data, ftime_dim = 'ftime', +CST_SaveExp(data = data, ftime_dim = 'ftime', memb_dim = NULL, var_dim = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', single_file = TRUE, units_hours_since = FALSE) -# Now we read the output with Start: +# (2.7.2) Now we read the output with Start: sdate <- as.vector(data$coords$sdate) path <- paste0(getwd(),"/$var$_", sdate[1], "_", sdate[length(sdate)], ".nc") out <- Start(dat = path, @@ -196,7 +243,7 @@ out <- Start(dat = path, dim(out) lonlat_prec$dims -# Test with ftime_dim NULL +# (2.8) Test with ftime_dim NULL data <- lonlat_temp$exp data <- CST_Subset(data, along = 'ftime', indices = 1, drop = 'selected') @@ -204,36 +251,37 @@ CST_SaveExp(data = data, ftime_dim = NULL, var_dim = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', single_file = FALSE, units_hours_since = FALSE) -################################################################################ +#----------------------------------------------------- # Test 3: Special cases +#----------------------------------------------------- -# (1) two variables and two datasets in separated files - +# (3.1) Two variables and two datasets in separated files +# (3.1.1) We load the data from Start repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" data3 <- Start(dat = list(list(name = 'system4_m1', path = repos2), list(name = 'system5_m1', path = repos)), - var = c('tas', 'sfcWind'), - sdate = c('20160101', '20170101'), - ensemble = indices(1), - time = indices(1:2), - lat = indices(1:10), - lon = indices(1:10), - synonims = list(lat = c('lat', 'latitude'), - lon = c('lon', 'longitude')), - return_vars = list(time = 'sdate', - longitude = 'dat', - latitude = 'dat'), - metadata_dims = c('dat', 'var'), - retrieve = T - ) + var = c('tas', 'sfcWind'), + sdate = c('20160101', '20170101'), + ensemble = indices(1), + time = indices(1:2), + lat = indices(1:10), + lon = indices(1:10), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = 'dat', + latitude = 'dat'), + metadata_dims = c('dat', 'var'), + retrieve = T) cube3 <- as.s2dv_cube(data3) +# (3.1.2) We save the data CST_SaveExp(data = cube3, ftime_dim = 'time', var_dim = 'var', memb_dim = 'ensemble', dat_dim = 'dat') -# We read again the data with start +# (3.1.3) We read again the data with start repos <- paste0(getwd(), "/system4_m1/$var$/$var$_$sdate$.nc") repos2 <- paste0(getwd(), "/system5_m1/$var$/$var$_$sdate$.nc") @@ -259,13 +307,13 @@ summary(data3) dim(data3) dim(data3out) -# (2) two variables and two datasets in the same file +# (3.2) Two variables and two datasets in the same file CST_SaveExp(data = cube3, ftime_dim = 'time', var_dim = 'var', - memb_dim = 'ensemble', dat_dim = 'dat', - single_file = TRUE) + memb_dim = 'ensemble', dat_dim = 'dat', + single_file = TRUE) -# (3) Observations +# (3.3) Observations (from startR usecase) repos_exp <- paste0('/esarchive/exp/ecearth/a1tr/cmorfiles/CMIP/EC-Earth-Consortium/', 'EC-Earth3/historical/r24i1p1f1/Amon/$var$/gr/v20190312/', '$var$_Amon_EC-Earth3_historical_r24i1p1f1_gr_$sdate$01-$sdate$12.nc') @@ -284,11 +332,7 @@ exp <- Start(dat = repos_exp, lat = NULL, time = 'sdate'), retrieve = FALSE) -lats <- attr(exp, 'Variables')$common$lat -lons <- attr(exp, 'Variables')$common$lon -## The 'time' attribute is a two-dim array dates <- attr(exp, 'Variables')$common$time -dim(dates) repos_obs <- '/esarchive/recon/ecmwf/erainterim/monthly_mean/$var$_f6h/$var$_$date$.nc' obs <- Start(dat = repos_obs, @@ -308,13 +352,80 @@ obs <- Start(dat = repos_obs, lat = NULL, time = 'date'), retrieve = TRUE) -dim(obs) -attributes(obs)$Variables$common$time obscube <- as.s2dv_cube(obs) CST_SaveExp(data = obscube, ftime_dim = 'time', var_dim = 'var', - memb_dim = 'ensemble', dat_dim = 'dat', - single_file = TRUE) - - + memb_dim = NULL, dat_dim = 'dat', + single_file = TRUE, extra_string = 'obs_tas') +CST_SaveExp(data = obscube, ftime_dim = 'time', var_dim = 'var', + memb_dim = NULL, dat_dim = 'dat', + single_file = FALSE, extra_string = 'obs_tas') + +# (4) Time bounds: +# example: /esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20231128.nc +library(CSIndicators) +exp <- CSTools::lonlat_prec_st +exp$attrs$Dates <- Reorder(exp$attrs$Dates, c(2,1)) +res <- CST_PeriodAccumulation(data = exp, time_dim = 'ftime', + start = list(10, 03), end = list(20, 03)) +# > dim(res$attrs$Dates) +# sdate +# 3 +# (4.1) All data in a single file +CST_SaveExp(data = res, ftime_dim = NULL, var_dim = 'var', + memb_dim = 'member', dat_dim = 'dataset', + startdates = res$attrs$Dates, single_file = TRUE) +# (4.2) All data in separated files +CST_SaveExp(data = res, ftime_dim = NULL, var_dim = 'var', + memb_dim = 'member', dat_dim = 'dataset', + startdates = res$attrs$Dates, single_file = FALSE) +# (4.3) +CST_SaveExp(data = res, ftime_dim = NULL, var_dim = 'var', + memb_dim = 'member', dat_dim = 'dataset', + startdates = 1:4, single_file = FALSE) + +# (4.4) We change the time dimensions to ftime and sdate_dim = NULL +dim(res$attrs$time_bounds[[1]]) <- c(time = 3) +dim(res$attrs$time_bounds[[2]]) <- c(time = 3) +dim(res$attrs$Dates) <- c(time = 3) +dim(res$data) <- c(dataset = 1, var = 1, member = 6, time = 3, lat = 4, lon = 4) + +# (4.4.1) All data in a single file +CST_SaveExp(data = res, ftime_dim = 'time', var_dim = 'var', + memb_dim = 'member', dat_dim = 'dataset', sdate_dim = NULL, + startdates = res$attrs$Dates, single_file = TRUE) +# (4.4.2) All data in separated files +CST_SaveExp(data = res, ftime_dim = 'time', var_dim = 'var', + memb_dim = 'member', dat_dim = 'dataset', sdate_dim = NULL, + startdates = res$attrs$Dates, single_file = FALSE) + +# (4.5) Forecast time units +CST_SaveExp(data = res, ftime_dim = 'time', var_dim = 'var', + memb_dim = 'member', dat_dim = 'dataset', sdate_dim = NULL, + startdates = res$attrs$Dates, single_file = TRUE, + units_hours_since = FALSE) + +#----------------------------------------------------- +# Test 4: Read data with Load +#----------------------------------------------------- +data <- lonlat_temp$exp +# data <- lonlat_temp$obs +# data <- lonlat_prec +CST_SaveExp(data = data, ftime_dim = 'ftime', + var_dim = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', + single_file = FALSE, units_hours_since = FALSE) +# Now we read the output with Load: +# startDates <- c('20001101', '20011101', '20021101', +# '20031101', '20041101', '20051101') + +# infile <- list(path = paste0(getwd(), +# '/system5c3s/$VAR_NAME$/$VAR_NAME$_$START_DATE$.nc')) +# out_lonlat_temp <- CST_Load(var = 'tas', exp = list(infile), obs = NULL, +# sdates = startDates, +# nmember = 15, +# leadtimemax = 3, +# latmin = 27, latmax = 48, +# lonmin = -12, lonmax = 40, +# output = "lonlat") +# Error ################################################################################ \ No newline at end of file -- GitLab From 9d16b4558909960c5ea03843f87aed503ca513e9 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 1 Dec 2023 16:20:45 +0100 Subject: [PATCH 09/16] Correct usecase --- inst/doc/usecase/UseCase4_CST_SaveExp.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/inst/doc/usecase/UseCase4_CST_SaveExp.R b/inst/doc/usecase/UseCase4_CST_SaveExp.R index 4a7db4f4..ba43a0ac 100644 --- a/inst/doc/usecase/UseCase4_CST_SaveExp.R +++ b/inst/doc/usecase/UseCase4_CST_SaveExp.R @@ -360,7 +360,10 @@ CST_SaveExp(data = obscube, ftime_dim = 'time', var_dim = 'var', memb_dim = NULL, dat_dim = 'dat', single_file = FALSE, extra_string = 'obs_tas') +#----------------------------------------------------- # (4) Time bounds: +#----------------------------------------------------- + # example: /esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20231128.nc library(CSIndicators) exp <- CSTools::lonlat_prec_st @@ -405,7 +408,7 @@ CST_SaveExp(data = res, ftime_dim = 'time', var_dim = 'var', units_hours_since = FALSE) #----------------------------------------------------- -# Test 4: Read data with Load +# Test 5: Read data with Load #----------------------------------------------------- data <- lonlat_temp$exp -- GitLab From 0f814db160ffd4e36eba072f2af1c8d0a12f053d Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 1 Dec 2023 16:48:46 +0100 Subject: [PATCH 10/16] Minor change --- inst/doc/usecase/UseCase4_CST_SaveExp.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/doc/usecase/UseCase4_CST_SaveExp.R b/inst/doc/usecase/UseCase4_CST_SaveExp.R index ba43a0ac..10ca74d6 100644 --- a/inst/doc/usecase/UseCase4_CST_SaveExp.R +++ b/inst/doc/usecase/UseCase4_CST_SaveExp.R @@ -64,7 +64,7 @@ SaveExp(data, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, var_dim = NULL, sdate_dim = 'sdate', Dates = dates, single_file = TRUE, extra_string = 'test', global_attrs = list(system = 'tes1', reference = 'test2')) -# (1.6) Test global attributes +# (1.7) Test global attributes SaveExp(data, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, var_dim = NULL, sdate_dim = 'sdate', Dates = dates, single_file = FALSE, extra_string = 'test', @@ -361,7 +361,7 @@ CST_SaveExp(data = obscube, ftime_dim = 'time', var_dim = 'var', single_file = FALSE, extra_string = 'obs_tas') #----------------------------------------------------- -# (4) Time bounds: +# Test 4: Time bounds: #----------------------------------------------------- # example: /esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20231128.nc -- GitLab From 92fa27f2dd097f3f08462b4c5f19d8b1d3ceceea Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 14 Dec 2023 17:35:07 +0100 Subject: [PATCH 11/16] Corrections after review: unlimited dim, dimension order, reduce warnings --- R/CST_SaveExp.R | 71 ++++++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 31 deletions(-) diff --git a/R/CST_SaveExp.R b/R/CST_SaveExp.R index ea2cf6ac..270cbf2c 100644 --- a/R/CST_SaveExp.R +++ b/R/CST_SaveExp.R @@ -519,11 +519,11 @@ SaveExp <- function(data, destination = "./", coords = NULL, return(x) }) } + units_hours_since <- TRUE } # add sdate if needed if (is.null(sdate_dim)) { if (!single_file) { - warning("A 'sdate' dimension of length 1 will be added to 'Dates'.") dim(Dates) <- c(dim(Dates), sdate = 1) dim(data) <- c(dim(data), sdate = 1) dimnames <- names(dim(data)) @@ -542,6 +542,7 @@ SaveExp <- function(data, destination = "./", coords = NULL, } } } + units_hours_since <- TRUE } } # startdates @@ -603,12 +604,6 @@ SaveExp <- function(data, destination = "./", coords = NULL, if (!all(dimnames %in% alldims)) { unknown_dims <- dimnames[which(!dimnames %in% alldims)] memb_dim <- c(memb_dim, unknown_dims) - alldims <- c(dat_dim, var_dim, sdate_dim, lon_dim, lat_dim, memb_dim, ftime_dim) - } - # Reorder - if (any(dimnames != alldims)) { - data <- Reorder(data, alldims) - dimnames <- names(dim(data)) } ## NetCDF dimensions definition @@ -661,7 +656,9 @@ SaveExp <- function(data, destination = "./", coords = NULL, for (i in 1:n_datasets) { path <- file.path(destination, Datasets[i], varname) for (j in 1:n_vars) { - dir.create(path[j], recursive = TRUE) + if (!dir.exists(path[j])) { + dir.create(path[j], recursive = TRUE) + } startdates <- gsub("-", "", startdates) dim(startdates) <- c(length(startdates)) names(dim(startdates)) <- sdate_dim @@ -674,17 +671,19 @@ SaveExp <- function(data, destination = "./", coords = NULL, } else { data_subset <- Subset(data, c(dat_dim, var_dim), list(i, j), drop = 'selected') } + target <- names(dim(data_subset))[which(!names(dim(data_subset)) %in% c(sdate_dim, ftime_dim))] + target_dims_data <- c(target, ftime_dim) if (is.null(Dates)) { input_data <- list(data_subset, startdates) - target_dims <- list(c(lon_dim, lat_dim, memb_dim, ftime_dim), NULL) + target_dims <- list(target_dims_data, NULL) } else if (!is.null(time_bounds)) { input_data <- list(data_subset, startdates, Dates, time_bounds[[1]], time_bounds[[2]]) - target_dims = list(c(lon_dim, lat_dim, memb_dim, ftime_dim), NULL, + target_dims = list(target_dims_data, NULL, ftime_dim, ftime_dim, ftime_dim) } else { input_data <- list(data_subset, startdates, Dates) - target_dims = list(c(lon_dim, lat_dim, memb_dim, ftime_dim), NULL, ftime_dim) + target_dims = list(target_dims_data, NULL, ftime_dim) } Apply(data = input_data, target_dims = target_dims, @@ -757,12 +756,6 @@ SaveExp <- function(data, destination = "./", coords = NULL, units <- paste('hours since', paste(sdates, collapse = ', ')) vals <- leadtimes } - # Add ftime var - dim(vals) <- dim(data)[ftime_dim] - coords[[ftime_dim]] <- vals - attrs <- list(units = units, calendar = 'proleptic_gregorian', - longname = ftime_dim, unlim = TRUE) - attr(coords[[ftime_dim]], 'variables')[[ftime_dim]] <- attrs # Add time_bnds if (!is.null(time_bounds)) { @@ -795,6 +788,21 @@ SaveExp <- function(data, destination = "./", coords = NULL, longname = 'time bounds', unlim = FALSE) attr(coords[['time_bnds']], 'variables')$time_bnds <- attrs } + # Add ftime var + dim(vals) <- dim(data)[ftime_dim] + coords[[ftime_dim]] <- vals + attrs <- list(units = units, calendar = 'proleptic_gregorian', + longname = ftime_dim, + dim = list(list(name = ftime_dim, unlim = TRUE))) + attr(coords[[ftime_dim]], 'variables')[[ftime_dim]] <- attrs + for (j in 1:n_vars) { + metadata[[varname[j]]]$dim <- list(list(name = ftime_dim, unlim = TRUE)) + } + # Reorder ftime_dim to last + if (length(dim(data)) != which(names(dim(data)) == ftime_dim)) { + order <- c(names(dim(data))[which(!names(dim(data)) %in% c(ftime_dim))], ftime_dim) + data <- Reorder(data, order) + } } # var definition @@ -808,7 +816,6 @@ SaveExp <- function(data, destination = "./", coords = NULL, coords[[varname_j]] <- Subset(data, var_dim, j, drop = 'selected') } if (!is.null(metadata_j)) { - metadata_j$dim <- NULL attr(coords[[varname_j]], 'variables') <- list(metadata_j) names(attributes(coords[[varname_j]])$variables) <- varname_j } @@ -845,29 +852,31 @@ SaveExp <- function(data, destination = "./", coords = NULL, metadata_var = NULL, extra_string = NULL, global_attrs = NULL) { if (!is.null(dates)) { + if (!any(is.null(time_bnds1), is.null(time_bnds2))) { + time_bnds <- c(time_bnds1, time_bnds2) + time_bnds <- as.numeric(difftime(time_bnds, dates[1], units = "hours")) + dim(time_bnds) <- c(dim(data)[ftime_dim], bnds = 2) + time_bnds <- Reorder(time_bnds, c('bnds', ftime_dim)) + coords[['time_bnds']] <- time_bnds + attrs <- list(units = paste('hours since', dates[1]), + calendar = 'proleptic_gregorian', + longname = 'time bounds') + attr(coords[['time_bnds']], 'variables')$time_bnds <- attrs + } + # Add ftime_dim differ <- as.numeric(difftime(dates, dates[1], units = "hours")) dim(differ) <- dim(data)[ftime_dim] coords[[ftime_dim]] <- differ attrs <- list(units = paste('hours since', dates[1]), calendar = 'proleptic_gregorian', - longname = ftime_dim, unlim = TRUE) + longname = ftime_dim, + dim = list(list(name = ftime_dim, unlim = TRUE))) attr(coords[[ftime_dim]], 'variables')[[ftime_dim]] <- attrs - } - if (!any(is.null(time_bnds1), is.null(time_bnds2))) { - time_bnds <- c(time_bnds1, time_bnds2) - time_bnds <- as.numeric(difftime(time_bnds, dates[1], units = "hours")) - dim(time_bnds) <- c(dim(data)[ftime_dim], bnds = 2) - time_bnds <- Reorder(time_bnds, c('bnds', ftime_dim)) - coords[['time_bnds']] <- time_bnds - attrs <- list(units = paste('hours since', dates[1]), - calendar = 'proleptic_gregorian', - longname = 'time bounds', unlim = FALSE) - attr(coords[['time_bnds']], 'variables')$time_bnds <- attrs + metadata_var$dim <- list(list(name = ftime_dim, unlim = TRUE)) } # Add data coords[[varname]] <- data if (!is.null(metadata_var)) { - metadata_var$dim <- NULL attr(coords[[varname]], 'variables') <- list(metadata_var) names(attributes(coords[[varname]])$variables) <- varname } -- GitLab From f27f154dfd66c6216699f4d5ef2c64c437331dcd Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 15 Dec 2023 11:51:59 +0100 Subject: [PATCH 12/16] Correct dimensions order for single_file is TRUE --- R/CST_SaveExp.R | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/R/CST_SaveExp.R b/R/CST_SaveExp.R index 270cbf2c..30b6d775 100644 --- a/R/CST_SaveExp.R +++ b/R/CST_SaveExp.R @@ -610,10 +610,14 @@ SaveExp <- function(data, destination = "./", coords = NULL, defined_dims <- NULL extra_info_dim <- NULL if (is.null(Dates)) { - filedims <- dimnames[which(!dimnames %in% c(dat_dim, var_dim))] + excluded_dims <- c(dat_dim, var_dim) } else { - filedims <- dimnames[which(!dimnames %in% c(dat_dim, var_dim, sdate_dim, ftime_dim))] + excluded_dims <- c(dat_dim, var_dim, sdate_dim, ftime_dim) } + if (single_file) { + excluded_dims <- excluded_dims[which(!excluded_dims %in% c(dat_dim))] + } + filedims <- dimnames[which(!dimnames %in% excluded_dims)] for (i_coord in filedims) { # vals @@ -649,9 +653,9 @@ SaveExp <- function(data, destination = "./", coords = NULL, } } } - # Reorder coords + # Delete unneded coords coords[c(names(coords)[!names(coords) %in% filedims])] <- NULL - coords <- coords[filedims] +# coords <- coords[filedims] if (!single_file) { for (i in 1:n_datasets) { path <- file.path(destination, Datasets[i], varname) @@ -698,12 +702,6 @@ SaveExp <- function(data, destination = "./", coords = NULL, } } } else { - # Datasets definition - # From here - if (!is.null(dat_dim)) { - coords[[dat_dim]] <- array(1:dim(data)[dat_dim], dim = dim(data)[dat_dim]) - attr(coords[[dat_dim]], 'variables') <- list(list(units = 'adim')) - } # time_bnds if (!is.null(time_bounds)) { time_bnds <- c(time_bounds[[1]], time_bounds[[2]]) @@ -777,7 +775,7 @@ SaveExp <- function(data, destination = "./", coords = NULL, differ_bnds[, i, ] <- as.numeric(difftime(time_bnds[, i, ], Dates[1, i], units = "hours")) } - # NOTE: Add a warning when they are not equally spaced? + # NOTE (TODO): Add a warning when they are not equally spaced? leadtimes_bnds <- Subset(differ_bnds, along = sdate_dim, 1, drop = 'selected') } # Add time_bnds @@ -804,7 +802,6 @@ SaveExp <- function(data, destination = "./", coords = NULL, data <- Reorder(data, order) } } - # var definition extra_info_var <- NULL for (j in 1:n_vars) { -- GitLab From 917d3155f5dc71ed1b6c89ce7580d8feee539033 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 20 Dec 2023 14:58:32 +0100 Subject: [PATCH 13/16] Add bounds attribute to 'time' and reorder variables --- R/CST_SaveExp.R | 155 ++++++++++++++---------- inst/doc/usecase/UseCase4_CST_SaveExp.R | 16 +++ man/SaveExp.Rd | 3 +- 3 files changed, 107 insertions(+), 67 deletions(-) diff --git a/R/CST_SaveExp.R b/R/CST_SaveExp.R index 30b6d775..33158008 100644 --- a/R/CST_SaveExp.R +++ b/R/CST_SaveExp.R @@ -181,7 +181,8 @@ CST_SaveExp <- function(data, destination = "./", startdates = NULL, #'@param time_bounds (Optional) A list of two arrays of dates containing #' the lower (first array) and the upper (second array) time bounds #' corresponding to Dates. Each array must have the same dimensions as Dates. -#' It is NULL by default. +#' If 'Dates' parameter is NULL, 'time_bounds' are not used. It is NULL by +#' default. #'@param startdates A vector of dates that will be used for the filenames #' when saving the data in multiple files (single_file = FALSE). It must be a #' vector of the same length as the start date dimension of data. It must be a @@ -328,19 +329,11 @@ SaveExp <- function(data, destination = "./", coords = NULL, } # coords if (!is.null(coords)) { - if (!all(names(coords) %in% dimnames)) { - coords <- coords[-which(!names(coords) %in% dimnames)] - } - for (i_coord in dimnames) { - if (i_coord %in% names(coords)) { - if (length(coords[[i_coord]]) != dim(data)[i_coord]) { - warning(paste0("Coordinate '", i_coord, "' has different lenght as ", - "its dimension and it will not be used.")) - coords[[i_coord]] <- 1:dim(data)[i_coord] - } - } else { - coords[[i_coord]] <- 1:dim(data)[i_coord] - } + if (!inherits(coords, 'list')) { + stop("Parameter 'coords' must be a named list of coordinates.") + } + if (is.null(names(coords))) { + stop("Parameter 'coords' must have names corresponding to coordinates.") } } else { coords <- sapply(dimnames, function(x) 1:dim(data)[x]) @@ -490,11 +483,15 @@ SaveExp <- function(data, destination = "./", coords = NULL, if (!identical(time_bounds_dims[[1]], time_bounds_dims[[2]])) { stop("Parameter 'time_bounds' must have 2 arrays with same dimensions.") } - name_tb <- sort(names(time_bounds_dims[[1]])) - name_dt <- sort(names(dim(Dates))) - if (!identical(dim(Dates)[name_dt], time_bounds_dims[[1]][name_tb])) { - stop(paste0("Parameter 'Dates' and 'time_bounds' must have same length ", - "of all dimensions.")) + if (is.null(Dates)) { + time_bounds <- NULL + } else { + name_tb <- sort(names(time_bounds_dims[[1]])) + name_dt <- sort(names(dim(Dates))) + if (!identical(dim(Dates)[name_dt], time_bounds_dims[[1]][name_tb])) { + stop(paste0("Parameter 'Dates' and 'time_bounds' must have same length ", + "of all dimensions.")) + } } } # Dates (2): Check dimensions @@ -599,38 +596,44 @@ SaveExp <- function(data, destination = "./", coords = NULL, Datasets <- Datasets[1:n_datasets] } + ## NetCDF dimensions definition + excluded_dims <- var_dim + if (!is.null(Dates)) { + excluded_dims <- c(excluded_dims, sdate_dim, ftime_dim) + } + if (!single_file) { + excluded_dims <- c(excluded_dims, dat_dim) + } + ## Unknown dimensions check - alldims <- c(dat_dim, var_dim, sdate_dim, lon_dim, lat_dim, memb_dim, ftime_dim) + alldims <- c(dat_dim, var_dim, sdate_dim, lon_dim, lat_dim, ftime_dim, memb_dim) if (!all(dimnames %in% alldims)) { unknown_dims <- dimnames[which(!dimnames %in% alldims)] memb_dim <- c(memb_dim, unknown_dims) } - ## NetCDF dimensions definition - defined_dims <- NULL - extra_info_dim <- NULL - if (is.null(Dates)) { - excluded_dims <- c(dat_dim, var_dim) - } else { - excluded_dims <- c(dat_dim, var_dim, sdate_dim, ftime_dim) - } - if (single_file) { - excluded_dims <- excluded_dims[which(!excluded_dims %in% c(dat_dim))] - } - filedims <- dimnames[which(!dimnames %in% excluded_dims)] + filedims <- c(dat_dim, var_dim, sdate_dim, lon_dim, lat_dim, ftime_dim, memb_dim) + filedims <- filedims[which(!filedims %in% excluded_dims)] + # Delete unneded coords + coords[c(names(coords)[!names(coords) %in% filedims])] <- NULL + out_coords <- NULL for (i_coord in filedims) { # vals - if (i_coord %in% names(coords)) {str - if (is.numeric(coords[[i_coord]])) { - coords[[i_coord]] <- as.vector(coords[[i_coord]]) + if (i_coord %in% names(coords)) { + if (length(coords[[i_coord]]) != dim(data)[i_coord]) { + warning(paste0("Coordinate '", i_coord, "' has different lenght as ", + "its dimension and it will not be used.")) + out_coords[[i_coord]] <- 1:dim(data)[i_coord] + } else if (is.numeric(coords[[i_coord]])) { + out_coords[[i_coord]] <- as.vector(coords[[i_coord]]) } else { - coords[[i_coord]] <- 1:dim(data)[i_coord] + out_coords[[i_coord]] <- 1:dim(data)[i_coord] } } else { - coords[[i_coord]] <- 1:dim(data)[i_coord] + out_coords[[i_coord]] <- 1:dim(data)[i_coord] } - dim(coords[[i_coord]]) <- dim(data)[i_coord] + dim(out_coords[[i_coord]]) <- dim(data)[i_coord] ## metadata if (i_coord %in% names(metadata)) { @@ -638,24 +641,22 @@ SaveExp <- function(data, destination = "./", coords = NULL, # from Start: 'lon' or 'lat' attrs <- attributes(metadata[[i_coord]])[['variables']] attrs[[i_coord]]$dim <- NULL - attr(coords[[i_coord]], 'variables') <- attrs + attr(out_coords[[i_coord]], 'variables') <- attrs } else if (inherits(metadata[[i_coord]], 'list')) { # from Start and Load: main var - attr(coords[[i_coord]], 'variables') <- list(metadata[[i_coord]]) - names(attributes(coords[[i_coord]])$variables) <- i_coord + attr(out_coords[[i_coord]], 'variables') <- list(metadata[[i_coord]]) + names(attributes(out_coords[[i_coord]])$variables) <- i_coord } else if (!is.null(attributes(metadata[[i_coord]]))) { # from Load attrs <- attributes(metadata[[i_coord]]) # We remove because some attributes can't be saved attrs <- NULL - attr(coords[[i_coord]], 'variables') <- list(attrs) - names(attributes(coords[[i_coord]])$variables) <- i_coord + attr(out_coords[[i_coord]], 'variables') <- list(attrs) + names(attributes(out_coords[[i_coord]])$variables) <- i_coord } } } - # Delete unneded coords - coords[c(names(coords)[!names(coords) %in% filedims])] <- NULL -# coords <- coords[filedims] + if (!single_file) { for (i in 1:n_datasets) { path <- file.path(destination, Datasets[i], varname) @@ -693,7 +694,7 @@ SaveExp <- function(data, destination = "./", coords = NULL, target_dims = target_dims, fun = .saveexp, destination = path[j], - coords = coords, + coords = out_coords, ftime_dim = ftime_dim, varname = varname[j], metadata_var = metadata[[varname[j]]], @@ -707,6 +708,7 @@ SaveExp <- function(data, destination = "./", coords = NULL, time_bnds <- c(time_bounds[[1]], time_bounds[[2]]) } # Dates + remove_metadata_dim <- TRUE if (!is.null(Dates)) { if (is.null(sdate_dim)) { sdates <- Dates[1] @@ -717,10 +719,12 @@ SaveExp <- function(data, destination = "./", coords = NULL, sdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') differ <- as.numeric(difftime(sdates, sdates[1], units = "hours")) dim(differ) <- dim(data)[sdate_dim] - coords[[sdate_dim]] <- differ + differ <- list(differ) + names(differ) <- sdate_dim + out_coords <- c(differ, out_coords) attrs <- list(units = paste('hours since', sdates[1]), calendar = 'proleptic_gregorian', longname = sdate_dim) - attr(coords[[sdate_dim]], 'variables')[[sdate_dim]] <- attrs + attr(out_coords[[sdate_dim]], 'variables')[[sdate_dim]] <- attrs # ftime definition Dates <- Reorder(Dates, c(ftime_dim, sdate_dim)) differ_ftime <- array(dim = dim(Dates)) @@ -740,19 +744,19 @@ SaveExp <- function(data, destination = "./", coords = NULL, if (all(diff(leadtimes/24) == 1)) { # daily values units <- 'days' - vals <- round(leadtimes/24) + 1 + leadtimes_vals <- round(leadtimes/24) + 1 } else if (all(diff(leadtimes/24) %in% c(28, 29, 30, 31))) { # monthly values units <- 'months' - vals <- round(leadtimes/(30.437*24)) + 1 + leadtimes_vals <- round(leadtimes/(30.437*24)) + 1 } else { # other frequency units <- 'hours' - vals <- leadtimes + 1 + leadtimes_vals <- leadtimes + 1 } } else { units <- paste('hours since', paste(sdates, collapse = ', ')) - vals <- leadtimes + leadtimes_vals <- leadtimes } # Add time_bnds @@ -780,20 +784,28 @@ SaveExp <- function(data, destination = "./", coords = NULL, } # Add time_bnds leadtimes_bnds <- Reorder(leadtimes_bnds, c('bnds', ftime_dim)) - coords[['time_bnds']] <- leadtimes_bnds + leadtimes_bnds <- list(leadtimes_bnds) + names(leadtimes_bnds) <- 'time_bnds' + out_coords <- c(leadtimes_bnds, out_coords) attrs <- list(units = paste('hours since', paste(sdates, collapse = ', ')), calendar = 'proleptic_gregorian', longname = 'time bounds', unlim = FALSE) - attr(coords[['time_bnds']], 'variables')$time_bnds <- attrs + attr(out_coords[['time_bnds']], 'variables')$time_bnds <- attrs } # Add ftime var - dim(vals) <- dim(data)[ftime_dim] - coords[[ftime_dim]] <- vals + dim(leadtimes_vals) <- dim(data)[ftime_dim] + leadtimes_vals <- list(leadtimes_vals) + names(leadtimes_vals) <- ftime_dim + out_coords <- c(leadtimes_vals, out_coords) attrs <- list(units = units, calendar = 'proleptic_gregorian', longname = ftime_dim, dim = list(list(name = ftime_dim, unlim = TRUE))) - attr(coords[[ftime_dim]], 'variables')[[ftime_dim]] <- attrs + if (!is.null(time_bounds)) { + attrs$bounds = 'time_bnds' + } + attr(out_coords[[ftime_dim]], 'variables')[[ftime_dim]] <- attrs for (j in 1:n_vars) { + remove_metadata_dim <- FALSE metadata[[varname[j]]]$dim <- list(list(name = ftime_dim, unlim = TRUE)) } # Reorder ftime_dim to last @@ -808,17 +820,18 @@ SaveExp <- function(data, destination = "./", coords = NULL, varname_j <- varname[j] metadata_j <- metadata[[varname_j]] if (is.null(var_dim)) { - coords[[varname_j]] <- data + out_coords[[varname_j]] <- data } else { - coords[[varname_j]] <- Subset(data, var_dim, j, drop = 'selected') + out_coords[[varname_j]] <- Subset(data, var_dim, j, drop = 'selected') } if (!is.null(metadata_j)) { - attr(coords[[varname_j]], 'variables') <- list(metadata_j) - names(attributes(coords[[varname_j]])$variables) <- varname_j + if (remove_metadata_dim) metadata_j$dim <- NULL + attr(out_coords[[varname_j]], 'variables') <- list(metadata_j) + names(attributes(out_coords[[varname_j]])$variables) <- varname_j } # Add global attributes if (!is.null(global_attrs)) { - attributes(coords[[varname_j]])$global_attrs <- global_attrs + attributes(out_coords[[varname_j]])$global_attrs <- global_attrs } } if (is.null(extra_string)) { @@ -838,7 +851,7 @@ SaveExp <- function(data, destination = "./", coords = NULL, } } full_filename <- file.path(destination, file_name) - ArrayToNc(coords, full_filename) + ArrayToNc(out_coords, full_filename) } } @@ -848,13 +861,16 @@ SaveExp <- function(data, destination = "./", coords = NULL, ftime_dim = 'time', varname = 'var', metadata_var = NULL, extra_string = NULL, global_attrs = NULL) { + remove_metadata_dim <- TRUE if (!is.null(dates)) { if (!any(is.null(time_bnds1), is.null(time_bnds2))) { time_bnds <- c(time_bnds1, time_bnds2) time_bnds <- as.numeric(difftime(time_bnds, dates[1], units = "hours")) dim(time_bnds) <- c(dim(data)[ftime_dim], bnds = 2) time_bnds <- Reorder(time_bnds, c('bnds', ftime_dim)) - coords[['time_bnds']] <- time_bnds + time_bnds <- list(time_bnds) + names(time_bnds) <- 'time_bnds' + coords <- c(time_bnds, coords) attrs <- list(units = paste('hours since', dates[1]), calendar = 'proleptic_gregorian', longname = 'time bounds') @@ -863,17 +879,24 @@ SaveExp <- function(data, destination = "./", coords = NULL, # Add ftime_dim differ <- as.numeric(difftime(dates, dates[1], units = "hours")) dim(differ) <- dim(data)[ftime_dim] - coords[[ftime_dim]] <- differ + differ <- list(differ) + names(differ) <- ftime_dim + coords <- c(differ, coords) attrs <- list(units = paste('hours since', dates[1]), calendar = 'proleptic_gregorian', longname = ftime_dim, dim = list(list(name = ftime_dim, unlim = TRUE))) + if (!is.null(time_bnds1)) { + attrs$bounds = 'time_bnds' + } attr(coords[[ftime_dim]], 'variables')[[ftime_dim]] <- attrs metadata_var$dim <- list(list(name = ftime_dim, unlim = TRUE)) + remove_metadata_dim <- FALSE } # Add data coords[[varname]] <- data if (!is.null(metadata_var)) { + if (remove_metadata_dim) metadata_var$dim <- NULL attr(coords[[varname]], 'variables') <- list(metadata_var) names(attributes(coords[[varname]])$variables) <- varname } diff --git a/inst/doc/usecase/UseCase4_CST_SaveExp.R b/inst/doc/usecase/UseCase4_CST_SaveExp.R index 10ca74d6..5bd7db76 100644 --- a/inst/doc/usecase/UseCase4_CST_SaveExp.R +++ b/inst/doc/usecase/UseCase4_CST_SaveExp.R @@ -377,10 +377,26 @@ res <- CST_PeriodAccumulation(data = exp, time_dim = 'ftime', CST_SaveExp(data = res, ftime_dim = NULL, var_dim = 'var', memb_dim = 'member', dat_dim = 'dataset', startdates = res$attrs$Dates, single_file = TRUE) +# (4.1.1) Same with SaveExp +SaveExp(data = res$data, coords = res$coords, + Dates = NULL, time_bounds = res$attrs$time_bounds, + ftime_dim = NULL, var_dim = 'var', + varname = res$attrs$Variable$varName, + metadata = res$attrs$Variable$metadata, + memb_dim = 'member', dat_dim = 'dataset', + startdates = res$attrs$Dates, single_file = TRUE) # (4.2) All data in separated files CST_SaveExp(data = res, ftime_dim = NULL, var_dim = 'var', memb_dim = 'member', dat_dim = 'dataset', startdates = res$attrs$Dates, single_file = FALSE) +# (4.2.1) Same with SaveExp +SaveExp(data = res$data, coords = res$coords, + Dates = res$attrs$Dates, time_bounds = res$attrs$time_bounds, + ftime_dim = NULL, var_dim = 'var', + varname = res$attrs$Variable$varName, + metadata = res$attrs$Variable$metadata, + memb_dim = 'member', dat_dim = 'dataset', + startdates = res$attrs$Dates, single_file = FALSE) # (4.3) CST_SaveExp(data = res, ftime_dim = NULL, var_dim = 'var', memb_dim = 'member', dat_dim = 'dataset', diff --git a/man/SaveExp.Rd b/man/SaveExp.Rd index dbef149d..53c791f7 100644 --- a/man/SaveExp.Rd +++ b/man/SaveExp.Rd @@ -45,7 +45,8 @@ It must have ftime_dim dimension.} \item{time_bounds}{(Optional) A list of two arrays of dates containing the lower (first array) and the upper (second array) time bounds corresponding to Dates. Each array must have the same dimensions as Dates. -It is NULL by default.} +If 'Dates' parameter is NULL, 'time_bounds' are not used. It is NULL by +default.} \item{startdates}{A vector of dates that will be used for the filenames when saving the data in multiple files (single_file = FALSE). It must be a -- GitLab From 45247e44105f9826fbd638a0f9b24fb377c61b79 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 20 Dec 2023 17:01:09 +0100 Subject: [PATCH 14/16] Add long_name to time_bounds and set units_hours_since to FALSE by default --- R/CST_SaveExp.R | 12 ++++++------ inst/doc/usecase/UseCase4_CST_SaveExp.R | 12 ++++++++---- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/R/CST_SaveExp.R b/R/CST_SaveExp.R index 33158008..72a97b8c 100644 --- a/R/CST_SaveExp.R +++ b/R/CST_SaveExp.R @@ -55,11 +55,11 @@ #' in the file name as: '__.nc'. It is NULL by #' default. #'@param units_hours_since (Optional) A logical value only available for the -#' case: Dates have forecast time and start date dimension, single_file is +#' case: 'Dates' have forecast time and start date dimension, 'single_file' is #' TRUE and 'time_bounds' are not used. When it is TRUE, it saves the forecast #' time with units of 'hours since'; if it is FALSE, the time units will be a #' number of time steps with its corresponding frequency (e.g. n days, n months -#' or n hours). It is TRUE by default. +#' or n hours). It is FALSE by default. #'@param global_attrs (Optional) A list with elements containing the global #' attributes to be saved in the NetCDF. #' @@ -102,7 +102,7 @@ CST_SaveExp <- function(data, destination = "./", startdates = NULL, memb_dim = 'member', dat_dim = 'dataset', var_dim = 'var', drop_dims = NULL, single_file = FALSE, extra_string = NULL, - global_attrs = NULL, units_hours_since = TRUE) { + global_attrs = NULL, units_hours_since = FALSE) { # Check 's2dv_cube' if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of the class 's2dv_cube'.") @@ -238,7 +238,7 @@ CST_SaveExp <- function(data, destination = "./", startdates = NULL, #' TRUE and 'time_bounds' is NULL. When it is TRUE, it saves the forecast time #' with units of 'hours since'; if it is FALSE, the time units will be a number #' of time steps with its corresponding frequency (e.g. n days, n months or n -#' hours). It is TRUE by default. +#' hours). It is FALSE by default. #' #'@return Multiple or single NetCDF files containing the data array.\cr #'\item{\code{single_file is TRUE}}{ @@ -289,7 +289,7 @@ SaveExp <- function(data, destination = "./", coords = NULL, sdate_dim = 'sdate', ftime_dim = 'time', memb_dim = 'member', dat_dim = 'dataset', var_dim = 'var', drop_dims = NULL, single_file = FALSE, extra_string = NULL, - global_attrs = NULL, units_hours_since = TRUE) { + global_attrs = NULL, units_hours_since = FALSE) { ## Initial checks # data if (is.null(data)) { @@ -789,7 +789,7 @@ SaveExp <- function(data, destination = "./", coords = NULL, out_coords <- c(leadtimes_bnds, out_coords) attrs <- list(units = paste('hours since', paste(sdates, collapse = ', ')), calendar = 'proleptic_gregorian', - longname = 'time bounds', unlim = FALSE) + long_name = 'time bounds', unlim = FALSE) attr(out_coords[['time_bnds']], 'variables')$time_bnds <- attrs } # Add ftime var diff --git a/inst/doc/usecase/UseCase4_CST_SaveExp.R b/inst/doc/usecase/UseCase4_CST_SaveExp.R index 5bd7db76..d700e1b9 100644 --- a/inst/doc/usecase/UseCase4_CST_SaveExp.R +++ b/inst/doc/usecase/UseCase4_CST_SaveExp.R @@ -94,7 +94,7 @@ SaveExp(data = data, Dates = Dates, coords = coords, varname = varname, data <- lonlat_temp_st$exp CST_SaveExp(data = data, ftime_dim = 'ftime', var_dim = 'var', dat_dim = 'dataset', sdate_dim = 'sdate', - single_file = TRUE) + units_hours_since = TRUE, single_file = TRUE) # (2.2.2) Now we read the output with Start: sdate <- as.vector(lonlat_temp_st$exp$coords$sdate) @@ -113,6 +113,7 @@ out <- Start(dat = path, retrieve = TRUE) attributes(out)$Variables$common$ftime +out_cube <- as.s2dv_cube(out) # (2.3) lonlat_temp_st$exp in a single file with units of time frequency # (2.3.1) we save the data @@ -120,7 +121,7 @@ data <- lonlat_temp_st$exp CST_SaveExp(data = data, ftime_dim = 'ftime', var_dim = 'var', dat_dim = 'dataset', sdate_dim = 'sdate', single_file = TRUE, units_hours_since = FALSE) - +dates <- lonlat_temp_st$exp$attrs$Dates # (2.3.2) Now we read the output with Start: sdate <- as.vector(lonlat_temp_st$exp$coords$sdate) path <- paste0(getwd(),"/$var$_", sdate[1], "_", sdate[length(sdate)], ".nc") @@ -140,6 +141,7 @@ out <- Start(dat = path, attributes(out)$Variables$common$ftime # [1] "1 months" "2 months" "3 months" +out_cube2 <- as.s2dv_cube(out) # (2.4) lonlat_temp_st$exp in separated files with units of hours since # (2.4.1) we save the data @@ -159,9 +161,9 @@ out <- Start(dat = path, var = 'tas', member = 'all', return_vars = list(lon = 'dat', lat = 'dat', - ftime = NULL), + ftime = 'sdate'), retrieve = TRUE) - +out_cube1 <- as.s2dv_cube(out) # (2.5) lonlat_prec_st$exp in a single file with units of time frequency # (2.5.1) we save the data data <- lonlat_prec_st @@ -192,6 +194,7 @@ attributes(out)$Variables$common$ftime # [15] "15 days" "16 days" "17 days" "18 days" "19 days" "20 days" "21 days" # [22] "22 days" "23 days" "24 days" "25 days" "26 days" "27 days" "28 days" # [29] "29 days" "30 days" "31 days" +out_cube <- as.s2dv_cube(out) # (2.6) Test observations: lonlat_temp # (2.6.1) Save the data @@ -312,6 +315,7 @@ dim(data3out) CST_SaveExp(data = cube3, ftime_dim = 'time', var_dim = 'var', memb_dim = 'ensemble', dat_dim = 'dat', single_file = TRUE) +# TODO: Read the output with Start # (3.3) Observations (from startR usecase) repos_exp <- paste0('/esarchive/exp/ecearth/a1tr/cmorfiles/CMIP/EC-Earth-Consortium/', -- GitLab From 07a6f67b3bcef3602c54e6b7ea61c7fb801cff10 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 16 Jan 2024 12:08:10 +0100 Subject: [PATCH 15/16] Add test in UseCase4 --- inst/doc/usecase/UseCase4_CST_SaveExp.R | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/inst/doc/usecase/UseCase4_CST_SaveExp.R b/inst/doc/usecase/UseCase4_CST_SaveExp.R index d700e1b9..e85ef8d1 100644 --- a/inst/doc/usecase/UseCase4_CST_SaveExp.R +++ b/inst/doc/usecase/UseCase4_CST_SaveExp.R @@ -101,11 +101,11 @@ sdate <- as.vector(lonlat_temp_st$exp$coords$sdate) path <- paste0(getwd(),"/$var$_", sdate[1], "_", sdate[length(sdate)], ".nc") out <- Start(dat = path, var = 'tas', - lon = 'all', - lat = 'all', - ftime = 'all', - sdate = 'all', member = 'all', + sdate = 'all', + ftime = 'all', + lat = 'all', + lon = 'all', return_vars = list(lon = 'dat', lat = 'dat', ftime = NULL, @@ -114,6 +114,11 @@ out <- Start(dat = path, attributes(out)$Variables$common$ftime out_cube <- as.s2dv_cube(out) +out_cube <- CST_ChangeDimNames(out_cube, + original_names = c("dat"), + new_names = c("dataset")) +all.equal(data$data, out_cube$data) +identical(data$data, out_cube$data) # (2.3) lonlat_temp_st$exp in a single file with units of time frequency # (2.3.1) we save the data -- GitLab From c78601a4591b780e70b65e4e47a1357ee60ecc47 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 16 Jan 2024 15:06:29 +0100 Subject: [PATCH 16/16] Add plotting test to use case SaveExp --- inst/doc/usecase/UseCase4_CST_SaveExp.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/inst/doc/usecase/UseCase4_CST_SaveExp.R b/inst/doc/usecase/UseCase4_CST_SaveExp.R index e85ef8d1..926754f3 100644 --- a/inst/doc/usecase/UseCase4_CST_SaveExp.R +++ b/inst/doc/usecase/UseCase4_CST_SaveExp.R @@ -120,6 +120,13 @@ out_cube <- CST_ChangeDimNames(out_cube, all.equal(data$data, out_cube$data) identical(data$data, out_cube$data) +# Plot the results and compare +PlotEquiMap(out_cube$data[,,1,1,1,,], lon = out_cube$coords$lon, + lat = out_cube$coords$lat, filled.continents = FALSE) + +PlotEquiMap(lonlat_temp_st$exp$data[,,1,1,1,,], lon = out_cube$coords$lon, + lat = out_cube$coords$lat, filled.continents = FALSE) + # (2.3) lonlat_temp_st$exp in a single file with units of time frequency # (2.3.1) we save the data data <- lonlat_temp_st$exp -- GitLab