From 903fca855b13d9249612ee8cb22b77fa5e0ebd04 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 11 Jul 2023 16:20:35 +0200 Subject: [PATCH 1/6] Add memb_dim to MergeRefToExp; Improve function --- NAMESPACE | 1 + R/MergeRefToExp.R | 243 ++++++++++++++++++++++++++------------- man/CST_MergeRefToExp.Rd | 5 + man/MergeRefToExp.Rd | 13 ++- 4 files changed, 179 insertions(+), 83 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d80accb..8795a86 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -26,6 +26,7 @@ export(TotalTimeExceedingThreshold) export(WindCapacityFactor) export(WindPowerDensity) import(multiApply) +importFrom(s2dv,InsertDim) importFrom(stats,approxfun) importFrom(stats,ecdf) importFrom(stats,quantile) diff --git a/R/MergeRefToExp.R b/R/MergeRefToExp.R index 434cae3..534bb55 100644 --- a/R/MergeRefToExp.R +++ b/R/MergeRefToExp.R @@ -33,6 +33,9 @@ #' specified. This dimension is required to subset the data in a requested period. #'@param sdate_dim A character string indicating the name of the dimension in #' which the initialization dates are stored. +#'@param memb_dim A character string indicating the name of the member +#' dimension. If the data are not ensemble ones, set as NULL. The default +#' value is 'member'. #'@param ncores An integer indicating the number of cores to use in parallel #' computation. #'@return A 's2dv_cube' object containing the indicator in the element @@ -63,7 +66,7 @@ #'@export CST_MergeRefToExp <- function(data1, data2, start1, end1, start2, end2, time_dim = 'ftime', sdate_dim = 'sdate', - ncores = NULL) { + memb_dim = 'member', ncores = NULL) { # Check 's2dv_cube' if (!inherits(data1, 's2dv_cube')) { stop("Parameter 'ref' must be of the class 's2dv_cube'.") @@ -72,74 +75,74 @@ CST_MergeRefToExp <- function(data1, data2, start1, end1, start2, end2, stop("Parameter 'data' must be of the class 's2dv_cube'.") } # Dates subset of data1 - dates1 <- NULL if (!is.null(start1) && !is.null(end1)) { if (is.null(dim(data1$attrs$Dates))) { warning("Dimensions in 'data1' element 'attrs$Dates' are missed and ", "all data would be used.") - start <- NULL - end <- NULL - } else { - dates1 <- data1$attrs$Dates + start1 <- NULL + end1 <- NULL } } # Dates subset of data2 - dates2 <- NULL if (!is.null(start2) && !is.null(end2)) { if (is.null(dim(data2$attrs$Dates))) { warning("Dimensions in 'data2' element 'attrs$Dates' are missed and ", "all data would be used.") - start <- NULL - end <- NULL - } else { - dates2 <- data2$attrs$Dates + start2 <- NULL + end2 <- NULL } } + dates1 <- data1$attrs$Dates + dates2 <- data2$attrs$Dates + data1$data <- MergeRefToExp(data1 = data1$data, dates1 = dates1, start1 = start1, end1 = end1, data2 = data2$data, dates2 = dates2, - start2, end2, time_dim = time_dim, + start2, end2, time_dim = time_dim, + memb_dim = memb_dim, sdate_dim = sdate_dim, ncores = ncores) + data1$dims <- dim(data1$data) + if (!is.null(dates1)) { - data1$attrs$Dates <- SelectPeriodOnDates(dates1, start = start1, end = end1, - time_dim = time_dim) + if (!is.null(start1) && !is.null(end1)) { + dates1 <- SelectPeriodOnDates(dates1, start = start1, end = end1, + time_dim = time_dim) + } } if (!is.null(dates2)) { - data2$attrs$Dates <- SelectPeriodOnDates(dates2, start = start2, - end = end2, time_dim = time_dim) + if ((!is.null(start2) && !is.null(end2))) { + dates2 <- SelectPeriodOnDates(dates2, start = start2, + end = end2, time_dim = time_dim) + } } # TO DO CONCATENATE DATES - remove_dates1_dim <- FALSE - remove_dates2_dim <- FALSE - if (!is.null(data1$attrs$Dates) & !is.null(data2$attrs$Dates)) { - if (is.null(dim(data1$attrs$Dates))) { - remove_dates1_dim <- TRUE - dim(data1$attrs$Dates) <- length(data1$attrs$Dates) - names(dim(data1$attrs$Dates)) <- time_dim + remove_dates_dim <- FALSE + + if (!is.null(dates1) & !is.null(dates2)) { + if (is.null(dim(dates1))) { + remove_dates_dim <- TRUE + dim(dates1) <- length(dates1) + names(dim(dates1)) <- time_dim } - if (is.null(dim(data2$attrs$Dates))) { - remove_dates2_dim <- TRUE - dim(data2$attrs$Dates) <- length(data2$attrs$Dates) - names(dim(data2$attrs$Dates)) <- time_dim + if (is.null(dim(dates2))) { + remove_dates_dim <- TRUE + dim(dates2) <- length(dates2) + names(dim(dates2)) <- time_dim } } - res <- Apply(list(data1$attrs$Dates, data2$attrs$Dates), target_dims = time_dim, - c, output_dims = time_dim, ncores = ncores)$output1 - if (inherits(data1$attrs$Dates, 'Date')) { + res <- Apply(list(dates1, dates2), target_dims = time_dim, + 'c', output_dims = time_dim, ncores = ncores)$output1 + if (inherits(dates1, 'Date')) { data1$attrs$Dates <- as.Date(res, origin = '1970-01-01') } else { data1$attrs$Dates <- as.POSIXct(res*3600*24, origin = '1970-01-01', tz = 'UTC') } - if (remove_dates1_dim) { + if (remove_dates_dim) { dim(data1$attrs$Dates) <- NULL } - if (remove_dates2_dim) { - dim(data2$attrs$Dates) <- NULL - } - return(data1) } @@ -177,6 +180,9 @@ CST_MergeRefToExp <- function(data1, data2, start1, end1, start2, end2, #' period. #'@param sdate_dim A character string indicating the name of the dimension in #' which the initialization dates are stored. +#'@param memb_dim A character string indicating the name of the member +#' dimension. If the data are not ensemble ones, set as NULL. The default +#' value is 'member'. #'@param ncores An integer indicating the number of cores to use in parallel #' computation. #' @@ -199,57 +205,89 @@ CST_MergeRefToExp <- function(data1, data2, start1, end1, start2, end2, #' time_dim = 'time') #' #'@import multiApply +#'@importFrom s2dv InsertDim #'@export -MergeRefToExp <- function(data1, dates1, start1, end1, data2, dates2, start2, - end2, time_dim = 'ftime', sdate_dim = 'sdate', - ncores = NULL) { +MergeRefToExp <- function(data1, dates1, start1 = NULL, end1 = NULL, data2, + dates2, start2 = NULL, end2 = NULL, + time_dim = 'ftime', sdate_dim = 'sdate', + memb_dim = 'member', ncores = NULL) { # Input checks # data - if (!is.array(data1)) { - dim(data1) <- c(length(data1)) - names(dim(data1)) <- time_dim + if (!is.array(data1) | !is.array(data2)) { + stop("Parameters 'data1' and 'data2' must be arrays.") } - if (!is.array(data2)) { - dim(data2) <- c(length(data2)) - names(dim(data2)) <- time_dim + if (is.null(names(dim(data1))) | is.null(names(dim(data2)))) { + stop("Parameters 'data1' and 'data2' must have named dimensions.") } - # dates - if (!is.null(dates1) & !is.null(dates2)) { - if (is.null(dim(dates1))) { - warning("Dimensions in 'dates1' element are missed and ", - "all data would be used.") - dim(dates1) <- length(dates1) - names(dim(dates1)) <- time_dim + # time_dim + if (!is.character(time_dim)) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data1)) | !time_dim %in% names(dim(data2))) { + stop("Parameter 'time_dim' is not found in 'data1' or 'data2' dimension ", + "names.") + } + # memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim)) { + stop("Parameter 'memb_dim' must be a character string.") } - if (is.null(dim(dates2))) { - warning("Dimensions in 'dates2' element are missed and ", - "all data would be used.") - dim(dates2) <- length(dates2) - names(dim(dates2)) <- time_dim + if (!memb_dim %in% names(dim(data1)) & !memb_dim %in% names(dim(data2))) { + stop("Parameter 'memb_dim' is not found in 'data1' or 'data2' dimension. ", + "Set it to NULL if there is no member dimension.") + } + if (memb_dim %in% names(dim(data1))) { + if (dim(data1)[memb_dim] == 1) { + print('memb_dim removed data1') + data1 <- array(data1, dim = dim(data1)[-which(names(dim(data1)) == memb_dim)]) + } + } + if (memb_dim %in% names(dim(data2))) { + if (dim(data2)[memb_dim] == 1) { + print('memb_dim removed data2') + data2 <- array(data2, dim = dim(data2)[-which(names(dim(data2)) == memb_dim)]) + } + } + # Add NA to fill member_dim + if (memb_dim %in% names(dim(data1)) & memb_dim %in% names(dim(data2))) { + if (dim(data1)[memb_dim] != dim(data2)[memb_dim]) { + if (dim(data1)[memb_dim] > dim(data2)[memb_dim]) { + data2 <- Apply(list(data2), target_dims = memb_dim, + fun = function(x, length_new_dim) { + return(c(x, rep(NA, length_new_dim - length(x)))) + }, length_new_dim = dim(data1)[memb_dim], + output_dims = memb_dim)$output1 + } else { + data1 <- Apply(list(data1), target_dims = memb_dim, + fun = function(x, length_new_dim) { + return(c(x, rep(NA, length_new_dim - length(x)))) + }, length_new_dim = dim(data2)[memb_dim], + output_dims = memb_dim)$output1 + } + } } - data1 <- SelectPeriodOnData(data1, dates = dates1, start = start1, - end = end1, time_dim = time_dim, ncores = ncores) } - # Check if data2 has dimension sdate_dim and it should be added to data1: - if ((sdate_dim %in% names(dim(data2))) && dim(data2)[sdate_dim] > 1 && - !sdate_dim %in% names(dim(data1))) { - dim(data1) <- c(length(data1)/dim(data2)[sdate_dim], dim(data2)[sdate_dim]) - names(dim(data1)) <- c(time_dim, sdate_dim) - } - # Check if data1 has dimension sdate_dim and it should be added to data2: - if ((sdate_dim %in% names(dim(data1))) && dim(data1)[sdate_dim] > 1 && - !sdate_dim %in% names(dim(data2))) { - dim(data2) <- c(length(data2)/dim(data1)[sdate_dim], dim(data1)[sdate_dim]) - names(dim(data2)) <- c(time_dim, sdate_dim) - } + # # Check if data2 has dimension sdate_dim and it should be added to data1: + # if ((sdate_dim %in% names(dim(data2))) && dim(data2)[sdate_dim] > 1 && + # !sdate_dim %in% names(dim(data1))) { + # dim(data1) <- c(length(data1)/dim(data2)[sdate_dim], dim(data2)[sdate_dim]) + # names(dim(data1)) <- c(time_dim, sdate_dim) + # } + # # Check if data1 has dimension sdate_dim and it should be added to data2: + # if ((sdate_dim %in% names(dim(data1))) && dim(data1)[sdate_dim] > 1 && + # !sdate_dim %in% names(dim(data2))) { + # dim(data2) <- c(length(data2)/dim(data1)[sdate_dim], dim(data1)[sdate_dim]) + # names(dim(data2)) <- c(time_dim, sdate_dim) + # } + # Check if data1 needs to be extended to the length of the dimensions of data2: if (length(dim(data2)) != length(dim(data1))) { dif_dims <- which(names(dim(data2)) %in% names(dim(data1)) == FALSE) if (length(dif_dims) > 0) { for (i in dif_dims) { - data1 <- .insertdim(data1, posdim = i, lendim = dim(data2)[i], - name = names(dim(data2))[i]) + data1 <- InsertDim(data1, posdim = length(dim(data1)), lendim = dim(data2)[i], + name = names(dim(data2))[i]) } } } @@ -258,18 +296,65 @@ MergeRefToExp <- function(data1, dates1, start1, end1, data2, dates2, start2, dif_dims <- which(names(dim(data1)) %in% names(dim(data2)) == FALSE) if (length(dif_dims) > 0) { for (i in dif_dims) { - data2 <- .insertdim(data2, posdim = i, lendim = dim(data1)[i], - name = names(dim(data1))[i]) + data2 <- InsertDim(data2, posdim = length(dim(data1)), lendim = dim(data1)[i], + name = names(dim(data1))[i]) } } } - if (!is.null(dates2)) { - data2 <- SelectPeriodOnData(data2, dates = dates2, start = start2, - end = end2, time_dim = time_dim, ncores = ncores) + + # dates1 + if (!is.null(start1) & !is.null(end1)) { + if (is.null(dates1)) { + warning("Parameter 'dates' is NULL and the average of the ", + "full data provided in 'data' is computed.") + } else { + if (!any(c(is.list(start1), is.list(end1)))) { + stop("Parameter 'start1' and 'end1' must be lists indicating the ", + "day and the month of the period start and end.") + } + if (!is.null(dim(dates1))) { + data1 <- SelectPeriodOnData(data = data1, dates = dates1, start = start1, + end = end1, time_dim = time_dim, + ncores = ncores) + } else { + warning("Parameter 'dates1' must have named dimensions if 'start' and ", + "'end' are not NULL. All 'data1' will be used.") + } + } } + # dates2 + if (!is.null(start2) & !is.null(end2)) { + if (is.null(dates2)) { + warning("Parameter 'dates2' is NULL and the average of the ", + "full data provided in 'data' is computed.") + } else { + if (!any(c(is.list(start2), is.list(end2)))) { + stop("Parameter 'start2' and 'end2' must be lists indicating the ", + "day and the month of the period start and end.") + } + if (!is.null(dim(dates2))) { + data2 <- SelectPeriodOnData(data = data2, dates = dates2, start = start2, + end = end2, time_dim = time_dim, + ncores = ncores) + } else { + warning("Parameter 'dates2' must have named dimensions if 'start2' and ", + "'end2' are not NULL. All 'data2' will be used.") + } + } + } + + data1dims <- names(dim(data1)) + data2dims <- names(dim(data2)) + data1 <- Apply(list(data1, data2), target_dims = time_dim, fun = 'c', output_dims = time_dim, ncores = ncores)$output1 + if (all(names(dim(data1)) %in% data1dims)) { + pos <- match(names(dim(data1)), data1dims) + data1 <- aperm(data1, pos) + } + if (all(names(dim(data1)) %in% data2dims)) { + pos <- match(names(dim(data1)), data2dims) + data1 <- aperm(data1, pos) + } return(data1) } - - diff --git a/man/CST_MergeRefToExp.Rd b/man/CST_MergeRefToExp.Rd index 9f9a3b9..c15ab9e 100644 --- a/man/CST_MergeRefToExp.Rd +++ b/man/CST_MergeRefToExp.Rd @@ -13,6 +13,7 @@ CST_MergeRefToExp( end2, time_dim = "ftime", sdate_dim = "sdate", + memb_dim = "member", ncores = NULL ) } @@ -47,6 +48,10 @@ specified. This dimension is required to subset the data in a requested period.} \item{sdate_dim}{A character string indicating the name of the dimension in which the initialization dates are stored.} +\item{memb_dim}{A character string indicating the name of the member +dimension. If the data are not ensemble ones, set as NULL. The default +value is 'member'.} + \item{ncores}{An integer indicating the number of cores to use in parallel computation.} } diff --git a/man/MergeRefToExp.Rd b/man/MergeRefToExp.Rd index e6b40c8..c446a10 100644 --- a/man/MergeRefToExp.Rd +++ b/man/MergeRefToExp.Rd @@ -7,14 +7,15 @@ MergeRefToExp( data1, dates1, - start1, - end1, + start1 = NULL, + end1 = NULL, data2, dates2, - start2, - end2, + start2 = NULL, + end2 = NULL, time_dim = "ftime", sdate_dim = "sdate", + memb_dim = "member", ncores = NULL ) } @@ -54,6 +55,10 @@ period.} \item{sdate_dim}{A character string indicating the name of the dimension in which the initialization dates are stored.} +\item{memb_dim}{A character string indicating the name of the member +dimension. If the data are not ensemble ones, set as NULL. The default +value is 'member'.} + \item{ncores}{An integer indicating the number of cores to use in parallel computation.} } -- GitLab From 3c1bb67ed22caf98ccd136df2ee46c2deed8830a Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 12 Jul 2023 16:15:31 +0200 Subject: [PATCH 2/6] Improved development of MergeRefToExp; improved documentation; added unit tests for the new development --- DESCRIPTION | 2 +- R/MergeRefToExp.R | 213 ++++++++++++++++++---------- man/CST_MergeRefToExp.Rd | 43 ++++-- man/MergeRefToExp.Rd | 54 ++++--- tests/testthat/test-MergeRefToExp.R | 150 ++++++++++++++++++-- 5 files changed, 341 insertions(+), 121 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4151ba4..4e20983 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -40,4 +40,4 @@ URL: https://earth.bsc.es/gitlab/es/csindicators/ BugReports: https://earth.bsc.es/gitlab/es/csindicators/-/issues Encoding: UTF-8 RoxygenNote: 7.2.0 -Config/testthat/edition: 3 \ No newline at end of file +Config/testthat/edition: 3 diff --git a/R/MergeRefToExp.R b/R/MergeRefToExp.R index 534bb55..4f8e16f 100644 --- a/R/MergeRefToExp.R +++ b/R/MergeRefToExp.R @@ -11,10 +11,19 @@ #'references) could be added at the end of the forecast lead time to cover the #'desired period (e.g.: until the end of summer). #' -#'@param data1 An 's2dv_cube' object as provided function \code{CST_Load} in -#' package CSTools. -#'@param data2 An 's2dv_cube' object as provided function \code{CST_Load} in -#' package CSTools. +#'@param data1 An 's2dv_cube' object with the element 'data' being a +#' multidimensional array of with named dimensions matching the dimensions of +#' parameter 'data2'. Dimensions with the same name in the 'data2' parameter +#' must have the same length or length 1; except for the dimension specified +#' with 'memb_dim', which can be different and in the result will be filled +#' with NA values. It can also have additional dimensions with different names +#' in 'data2'. +#'@param data2 An 's2dv_cube' object with the element 'data' being a +#' multidimensional array of dates with named dimensions matching +#' the dimensions on parameter 'data1'. Dimensions with the same name in the +#' 'data1' parameter must have the same length or length 1, except for the +#' dimension specified with 'memb_dim', which can be different and in the +#' result will be filled with NA values. #'@param start1 A list to define the initial date of the period to select from #' data1 by providing a list of two elements: the initial date of the period #' and the initial month of the period. @@ -30,16 +39,24 @@ #'@param time_dim A character string indicating the name of the temporal #' dimension. By default, it is set to 'ftime'. More than one dimension name #' matching the dimensions provided in the object \code{data$data} can be -#' specified. This dimension is required to subset the data in a requested period. -#'@param sdate_dim A character string indicating the name of the dimension in -#' which the initialization dates are stored. +#' specified. This dimension is required to subset the data in a requested +#' period. #'@param memb_dim A character string indicating the name of the member #' dimension. If the data are not ensemble ones, set as NULL. The default #' value is 'member'. #'@param ncores An integer indicating the number of cores to use in parallel #' computation. #'@return A 's2dv_cube' object containing the indicator in the element -#' \code{data}. +#'\code{data}. The element \code{data} will be a multidimensional array with +#'dimensions named from the combination of 'data1' and 'data2'. The resulting +#'dimensions will be the following: all the same common dimensions between the +#'two arrays plus the different dimensions of each array. If there is any +#'different common dimension but in a dataset it has length 1, it will be added +#'with the maximum dimension. If memb_dim is used, the dimension of the maximum +#'value corresponding to memb_dim of the two data sets will be added; the +#'difference between the dimensions of the set members will be filled with NA. +#'The other elements of the 's2dv_cube' will be updated with the combined +#'information of both datasets. #' #'@examples #'data_dates <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), @@ -64,15 +81,16 @@ #' #'@import multiApply #'@export -CST_MergeRefToExp <- function(data1, data2, start1, end1, start2, end2, - time_dim = 'ftime', sdate_dim = 'sdate', - memb_dim = 'member', ncores = NULL) { +CST_MergeRefToExp <- function(data1, data2, start1 = NULL, end1 = NULL, + start2 = NULL, end2 = NULL, + time_dim = 'ftime', memb_dim = 'member', + ncores = NULL) { # Check 's2dv_cube' if (!inherits(data1, 's2dv_cube')) { - stop("Parameter 'ref' must be of the class 's2dv_cube'.") + stop("Parameter 'data1' must be of the class 's2dv_cube'.") } if (!inherits(data2, 's2dv_cube')) { - stop("Parameter 'data' must be of the class 's2dv_cube'.") + stop("Parameter 'data2' must be of the class 's2dv_cube'.") } # Dates subset of data1 if (!is.null(start1) && !is.null(end1)) { @@ -96,14 +114,37 @@ CST_MergeRefToExp <- function(data1, data2, start1, end1, start2, end2, dates1 <- data1$attrs$Dates dates2 <- data2$attrs$Dates + # data data1$data <- MergeRefToExp(data1 = data1$data, dates1 = dates1, start1 = start1, end1 = end1, data2 = data2$data, dates2 = dates2, start2, end2, time_dim = time_dim, - memb_dim = memb_dim, - sdate_dim = sdate_dim, ncores = ncores) + memb_dim = memb_dim, ncores = ncores) + # dims data1$dims <- dim(data1$data) + # coords + for (i_dim in names(dim(data1$data))) { + if (length(data1$coords[[i_dim]]) != dim(data1$data)[i_dim]) { + data1$coords[[i_dim]] <- NULL + data1$coords[[i_dim]] <- 1:dim(data1$data)[i_dim] + attr(data1$coords[[i_dim]], 'indices') <- TRUE + } else if (length(data1$coords[[i_dim]]) == length(data2$coords[[i_dim]])) { + if (any(as.vector(data1$coords[[i_dim]]) != as.vector(data2$coords[[i_dim]]))) { + data1$coords[[i_dim]] <- NULL + data1$coords[[i_dim]] <- 1:dim(data1$data)[i_dim] + attr(data1$coords[[i_dim]], 'indices') <- TRUE + } else if (!identical(attributes(data1$coords[[i_dim]]), attributes(data2$coords[[i_dim]]))) { + attributes(data1$coords[[i_dim]]) <- NULL + } + } else { + data1$coords[[i_dim]] <- NULL + data1$coords[[i_dim]] <- 1:dim(data1$data)[i_dim] + attr(data1$coords[[i_dim]], 'indices') <- TRUE + } + } + + # Dates if (!is.null(dates1)) { if (!is.null(start1) && !is.null(end1)) { dates1 <- SelectPeriodOnDates(dates1, start = start1, end = end1, @@ -117,7 +158,6 @@ CST_MergeRefToExp <- function(data1, data2, start1, end1, start2, end2, } } - # TO DO CONCATENATE DATES remove_dates_dim <- FALSE if (!is.null(dates1) & !is.null(dates2)) { @@ -134,15 +174,31 @@ CST_MergeRefToExp <- function(data1, data2, start1, end1, start2, end2, } res <- Apply(list(dates1, dates2), target_dims = time_dim, 'c', output_dims = time_dim, ncores = ncores)$output1 + if (inherits(dates1, 'Date')) { data1$attrs$Dates <- as.Date(res, origin = '1970-01-01') } else { - data1$attrs$Dates <- as.POSIXct(res*3600*24, origin = '1970-01-01', tz = 'UTC') + data1$attrs$Dates <- as.POSIXct(res, origin = '1970-01-01', tz = 'UTC') } if (remove_dates_dim) { dim(data1$attrs$Dates) <- NULL } + + # Variable + data1$attrs$Variable$varName <- unique(data1$attrs$Variable$varName, + data2$attrs$Variable$varName) + data1$attrs$Variable$metadata <- intersect(data1$attrs$Variable, data2$attrs$Variable)[[2]] + + # source_files + data1$attrs$source_files <- c(data1$attrs$source_files, data2$attrs$source_files) + + # Datasets + data1$attrs$Datasets <- c(data1$attrs$Datasets, data2$attrs$Datasets) + + # when + data1$attrs$when <- Sys.time() + return(data1) } @@ -155,12 +211,21 @@ CST_MergeRefToExp <- function(data1, data2, start1, end1, start2, end2, #'compute the indicator. The function \code{MergeObs2Exp} takes care of this #'steps. #' -#'@param data1 A multidimensional array with named dimensions. -#'@param dates1 A vector of dates or a multidimensional array of dates with -#' named dimensions matching the dimensions on parameter 'data1'. -#'@param data2 A multidimensional array with named dimensions. -#'@param dates2 A vector of dates or a multidimensional array of dates with -#' named dimensions matching the dimensions on parameter 'data2'. +#'@param data1 A multidimensional array of with named dimensions matching the +#' dimensions of parameter 'data2'. Dimensions with the same name in the +#' 'data2' parameter must have the same length or length 1; except for the +#' dimension specified with 'memb_dim', which can be different and in the +#' result will be filled with NA values. It can also have additional dimensions +#' with different names in 'data2'. +#'@param dates1 A multidimensional array of dates with named dimensions matching +#' the temporal dimensions on parameter 'data1'. +#'@param data2 A multidimensional array of dates with named dimensions matching +#' the dimensions on parameter 'data1'. Dimensions with the same name in the +#' 'data1' parameter must have the same length or length 1, except for the +#' dimension specified with 'memb_dim', which can be different and in the +#' result will be filled with NA values. +#'@param dates2 A multidimensional array of dates with named dimensions matching +#' the temporal dimensions on parameter 'data2'. #'@param start1 A list to define the initial date of the period to select from #' data1 by providing a list of two elements: the initial date of the period #' and the initial month of the period. @@ -178,39 +243,44 @@ CST_MergeRefToExp <- function(data1, data2, start1, end1, start2, end2, #' matching the dimensions provided in the object \code{data$data} can be #' specified. This dimension is required to subset the data in a requested #' period. -#'@param sdate_dim A character string indicating the name of the dimension in -#' which the initialization dates are stored. #'@param memb_dim A character string indicating the name of the member #' dimension. If the data are not ensemble ones, set as NULL. The default #' value is 'member'. #'@param ncores An integer indicating the number of cores to use in parallel #' computation. #' -#'@return A multidimensional array with named dimensions. -#' +#'@return A multidimensional array with dimensions named from the combination of +#''data1' and 'data2'. The resulting dimensions will be the following: all the +#'same common dimensions between the two arrays plus the different dimensions of +#'each array. If there is any different common dimension but in a dataset it has +#'length 1, it will be added with the maximum dimension. If memb_dim is used, +#'the dimension of the maximum value corresponding to memb_dim of the two data +#'sets will be added; the difference between the dimensions of the set members +#'will be filled with NA. +#' #'@examples #'data_dates <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), #' as.Date("01-12-1993","%d-%m-%Y", tz = 'UTC'), "day"), #' seq(as.Date("01-07-1994", "%d-%m-%Y", tz = 'UTC'), #' as.Date("01-12-1994","%d-%m-%Y", tz = 'UTC'), "day")) -#'dim(data_dates) <- c(time = 154, sdate = 2) +#'dim(data_dates) <- c(ftime = 154, sdate = 2) #'ref_dates <- seq(as.Date("01-01-1993", "%d-%m-%Y", tz = 'UTC'), #' as.Date("01-12-1994","%d-%m-%Y", tz = 'UTC'), "day") -#'dim(ref_dates) <- c(time = 350, sdate = 2) -#'ref <- array(1001:1700, c(time = 350, sdate = 2)) -#'data <- array(1:(2*154*2), c(time = 154, sdate = 2, member= 2)) +#'dim(ref_dates) <- c(ftime = 350, sdate = 2) +#'ref <- array(1001:1700, c(ftime = 350, sdate = 2)) +#'data <- array(1:(2*154*2), c(ftime = 154, sdate = 2, member = 2)) #'new_data <- MergeRefToExp(data1 = ref, dates1 = ref_dates, start1 = list(21, 6), #' end1 = list(30, 6), data2 = data, dates2 = data_dates, #' start2 = list(1, 7), end = list(21, 9), -#' time_dim = 'time') +#' time_dim = 'ftime') #' #'@import multiApply #'@importFrom s2dv InsertDim #'@export -MergeRefToExp <- function(data1, dates1, start1 = NULL, end1 = NULL, data2, - dates2, start2 = NULL, end2 = NULL, - time_dim = 'ftime', sdate_dim = 'sdate', - memb_dim = 'member', ncores = NULL) { +MergeRefToExp <- function(data1, data2, dates1 = NULL, dates2 = NULL, + start1 = NULL, end1 = NULL, start2 = NULL, end2 = NULL, + time_dim = 'ftime', memb_dim = 'member', + ncores = NULL) { # Input checks # data if (!is.array(data1) | !is.array(data2)) { @@ -238,13 +308,11 @@ MergeRefToExp <- function(data1, dates1, start1 = NULL, end1 = NULL, data2, } if (memb_dim %in% names(dim(data1))) { if (dim(data1)[memb_dim] == 1) { - print('memb_dim removed data1') data1 <- array(data1, dim = dim(data1)[-which(names(dim(data1)) == memb_dim)]) } } if (memb_dim %in% names(dim(data2))) { if (dim(data2)[memb_dim] == 1) { - print('memb_dim removed data2') data2 <- array(data2, dim = dim(data2)[-which(names(dim(data2)) == memb_dim)]) } } @@ -268,36 +336,30 @@ MergeRefToExp <- function(data1, dates1, start1 = NULL, end1 = NULL, data2, } } - # # Check if data2 has dimension sdate_dim and it should be added to data1: - # if ((sdate_dim %in% names(dim(data2))) && dim(data2)[sdate_dim] > 1 && - # !sdate_dim %in% names(dim(data1))) { - # dim(data1) <- c(length(data1)/dim(data2)[sdate_dim], dim(data2)[sdate_dim]) - # names(dim(data1)) <- c(time_dim, sdate_dim) - # } - # # Check if data1 has dimension sdate_dim and it should be added to data2: - # if ((sdate_dim %in% names(dim(data1))) && dim(data1)[sdate_dim] > 1 && - # !sdate_dim %in% names(dim(data2))) { - # dim(data2) <- c(length(data2)/dim(data1)[sdate_dim], dim(data1)[sdate_dim]) - # names(dim(data2)) <- c(time_dim, sdate_dim) - # } - - # Check if data1 needs to be extended to the length of the dimensions of data2: - if (length(dim(data2)) != length(dim(data1))) { - dif_dims <- which(names(dim(data2)) %in% names(dim(data1)) == FALSE) - if (length(dif_dims) > 0) { - for (i in dif_dims) { - data1 <- InsertDim(data1, posdim = length(dim(data1)), lendim = dim(data2)[i], - name = names(dim(data2))[i]) + # Find common dims and remove the ones not needed + name_data1 <- sort(names(dim(data1))) + name_data2 <- sort(names(dim(data2))) + + commondims <- name_data1[name_data1 %in% name_data2] + commondims <- commondims[-which(commondims == time_dim)] + + if (length(commondims) != 0) { + if (!all(dim(data2)[commondims] == dim(data1)[commondims])) { + dif_common <- commondims[!dim(data2)[commondims] == dim(data1)[commondims]] + if (any(dim(data2)[dif_common] == 1)) { + dim_remove <- dif_common[which(dim(data1)[dif_common] == 1)] + dim(data1) <- dim(data1)[-which(names(dim(data1)) == dim_remove)] + dif_common <- dif_common[-which(dif_common == dim_remove)] } - } - } - # Check if data2 needs to be extended to the length of the dimensions of data1: - if (length(dim(data1)) != length(dim(data2))) { - dif_dims <- which(names(dim(data1)) %in% names(dim(data2)) == FALSE) - if (length(dif_dims) > 0) { - for (i in dif_dims) { - data2 <- InsertDim(data2, posdim = length(dim(data1)), lendim = dim(data1)[i], - name = names(dim(data1))[i]) + if (any(dim(data1)[dif_common] == 1)) { + dim_remove <- dif_common[which(dim(data1)[dif_common] == 1)] + dim(data1) <- dim(data1)[-which(names(dim(data1)) == dim_remove)] + dif_common <- dif_common[-which(dif_common == dim_remove)] + } + if (length(dif_common) != 0) { + stop("Parameters 'data1' and 'data2' have common dimension ", + paste0("'", dif_common, sep = "' "), "with different length and ", + "different of length 1.") } } } @@ -307,11 +369,11 @@ MergeRefToExp <- function(data1, dates1, start1 = NULL, end1 = NULL, data2, if (is.null(dates1)) { warning("Parameter 'dates' is NULL and the average of the ", "full data provided in 'data' is computed.") + } else if (!all(c(is.list(start1), is.list(end1)))) { + warning("Parameter 'start1' and 'end1' must be lists indicating the ", + "day and the month of the period start and end. Full data ", + "will be used.") } else { - if (!any(c(is.list(start1), is.list(end1)))) { - stop("Parameter 'start1' and 'end1' must be lists indicating the ", - "day and the month of the period start and end.") - } if (!is.null(dim(dates1))) { data1 <- SelectPeriodOnData(data = data1, dates = dates1, start = start1, end = end1, time_dim = time_dim, @@ -327,11 +389,11 @@ MergeRefToExp <- function(data1, dates1, start1 = NULL, end1 = NULL, data2, if (is.null(dates2)) { warning("Parameter 'dates2' is NULL and the average of the ", "full data provided in 'data' is computed.") + } else if (!all(c(is.list(start2), is.list(end2)))) { + warning("Parameter 'start2' and 'end2' must be lists indicating the ", + "day and the month of the period start and end. Full data ", + "will be used.") } else { - if (!any(c(is.list(start2), is.list(end2)))) { - stop("Parameter 'start2' and 'end2' must be lists indicating the ", - "day and the month of the period start and end.") - } if (!is.null(dim(dates2))) { data2 <- SelectPeriodOnData(data = data2, dates = dates2, start = start2, end = end2, time_dim = time_dim, @@ -348,6 +410,7 @@ MergeRefToExp <- function(data1, dates1, start1 = NULL, end1 = NULL, data2, data1 <- Apply(list(data1, data2), target_dims = time_dim, fun = 'c', output_dims = time_dim, ncores = ncores)$output1 + if (all(names(dim(data1)) %in% data1dims)) { pos <- match(names(dim(data1)), data1dims) data1 <- aperm(data1, pos) diff --git a/man/CST_MergeRefToExp.Rd b/man/CST_MergeRefToExp.Rd index c15ab9e..f2195f2 100644 --- a/man/CST_MergeRefToExp.Rd +++ b/man/CST_MergeRefToExp.Rd @@ -7,22 +7,30 @@ CST_MergeRefToExp( data1, data2, - start1, - end1, - start2, - end2, + start1 = NULL, + end1 = NULL, + start2 = NULL, + end2 = NULL, time_dim = "ftime", - sdate_dim = "sdate", memb_dim = "member", ncores = NULL ) } \arguments{ -\item{data1}{An 's2dv_cube' object as provided function \code{CST_Load} in -package CSTools.} +\item{data1}{An 's2dv_cube' object with the element 'data' being a +multidimensional array of with named dimensions matching the dimensions of +parameter 'data2'. Dimensions with the same name in the 'data2' parameter +must have the same length or length 1; except for the dimension specified +with 'memb_dim', which can be different and in the result will be filled +with NA values. It can also have additional dimensions with different names +in 'data2'.} -\item{data2}{An 's2dv_cube' object as provided function \code{CST_Load} in -package CSTools.} +\item{data2}{An 's2dv_cube' object with the element 'data' being a +multidimensional array of dates with named dimensions matching +the dimensions on parameter 'data1'. Dimensions with the same name in the +'data1' parameter must have the same length or length 1, except for the +dimension specified with 'memb_dim', which can be different and in the +result will be filled with NA values.} \item{start1}{A list to define the initial date of the period to select from data1 by providing a list of two elements: the initial date of the period @@ -43,10 +51,8 @@ the final month of the period.} \item{time_dim}{A character string indicating the name of the temporal dimension. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be -specified. This dimension is required to subset the data in a requested period.} - -\item{sdate_dim}{A character string indicating the name of the dimension in -which the initialization dates are stored.} +specified. This dimension is required to subset the data in a requested +period.} \item{memb_dim}{A character string indicating the name of the member dimension. If the data are not ensemble ones, set as NULL. The default @@ -57,7 +63,16 @@ computation.} } \value{ A 's2dv_cube' object containing the indicator in the element - \code{data}. +\code{data}. The element \code{data} will be a multidimensional array with +dimensions named from the combination of 'data1' and 'data2'. The resulting +dimensions will be the following: all the same common dimensions between the +two arrays plus the different dimensions of each array. If there is any +different common dimension but in a dataset it has length 1, it will be added +with the maximum dimension. If memb_dim is used, the dimension of the maximum +value corresponding to memb_dim of the two data sets will be added; the +difference between the dimensions of the set members will be filled with NA. +The other elements of the 's2dv_cube' will be updated with the combined +information of both datasets. } \description{ Some indicators are defined for specific temporal periods (e.g.: summer from diff --git a/man/MergeRefToExp.Rd b/man/MergeRefToExp.Rd index c446a10..164606c 100644 --- a/man/MergeRefToExp.Rd +++ b/man/MergeRefToExp.Rd @@ -6,24 +6,37 @@ \usage{ MergeRefToExp( data1, - dates1, + data2, + dates1 = NULL, + dates2 = NULL, start1 = NULL, end1 = NULL, - data2, - dates2, start2 = NULL, end2 = NULL, time_dim = "ftime", - sdate_dim = "sdate", memb_dim = "member", ncores = NULL ) } \arguments{ -\item{data1}{A multidimensional array with named dimensions.} +\item{data1}{A multidimensional array of with named dimensions matching the +dimensions of parameter 'data2'. Dimensions with the same name in the +'data2' parameter must have the same length or length 1; except for the +dimension specified with 'memb_dim', which can be different and in the +result will be filled with NA values. It can also have additional dimensions +with different names in 'data2'.} + +\item{data2}{A multidimensional array of dates with named dimensions matching +the dimensions on parameter 'data1'. Dimensions with the same name in the +'data1' parameter must have the same length or length 1, except for the +dimension specified with 'memb_dim', which can be different and in the +result will be filled with NA values.} + +\item{dates1}{A multidimensional array of dates with named dimensions matching +the temporal dimensions on parameter 'data1'.} -\item{dates1}{A vector of dates or a multidimensional array of dates with -named dimensions matching the dimensions on parameter 'data1'.} +\item{dates2}{A multidimensional array of dates with named dimensions matching +the temporal dimensions on parameter 'data2'.} \item{start1}{A list to define the initial date of the period to select from data1 by providing a list of two elements: the initial date of the period @@ -33,11 +46,6 @@ and the initial month of the period.} data1 by providing a list of two elements: the final day of the period and the final month of the period.} -\item{data2}{A multidimensional array with named dimensions.} - -\item{dates2}{A vector of dates or a multidimensional array of dates with -named dimensions matching the dimensions on parameter 'data2'.} - \item{start2}{A list to define the initial date of the period to select from data2 by providing a list of two elements: the initial date of the period and the initial month of the period.} @@ -52,9 +60,6 @@ matching the dimensions provided in the object \code{data$data} can be specified. This dimension is required to subset the data in a requested period.} -\item{sdate_dim}{A character string indicating the name of the dimension in -which the initialization dates are stored.} - \item{memb_dim}{A character string indicating the name of the member dimension. If the data are not ensemble ones, set as NULL. The default value is 'member'.} @@ -63,7 +68,14 @@ value is 'member'.} computation.} } \value{ -A multidimensional array with named dimensions. +A multidimensional array with dimensions named from the combination of +'data1' and 'data2'. The resulting dimensions will be the following: all the +same common dimensions between the two arrays plus the different dimensions of +each array. If there is any different common dimension but in a dataset it has +length 1, it will be added with the maximum dimension. If memb_dim is used, +the dimension of the maximum value corresponding to memb_dim of the two data +sets will be added; the difference between the dimensions of the set members +will be filled with NA. } \description{ Some indicators are defined for specific temporal periods (e.g.: summer from @@ -78,15 +90,15 @@ data_dates <- c(seq(as.Date("01-07-1993", "\%d-\%m-\%Y", tz = 'UTC'), as.Date("01-12-1993","\%d-\%m-\%Y", tz = 'UTC'), "day"), seq(as.Date("01-07-1994", "\%d-\%m-\%Y", tz = 'UTC'), as.Date("01-12-1994","\%d-\%m-\%Y", tz = 'UTC'), "day")) -dim(data_dates) <- c(time = 154, sdate = 2) +dim(data_dates) <- c(ftime = 154, sdate = 2) ref_dates <- seq(as.Date("01-01-1993", "\%d-\%m-\%Y", tz = 'UTC'), as.Date("01-12-1994","\%d-\%m-\%Y", tz = 'UTC'), "day") -dim(ref_dates) <- c(time = 350, sdate = 2) -ref <- array(1001:1700, c(time = 350, sdate = 2)) -data <- array(1:(2*154*2), c(time = 154, sdate = 2, member= 2)) +dim(ref_dates) <- c(ftime = 350, sdate = 2) +ref <- array(1001:1700, c(ftime = 350, sdate = 2)) +data <- array(1:(2*154*2), c(ftime = 154, sdate = 2, member = 2)) new_data <- MergeRefToExp(data1 = ref, dates1 = ref_dates, start1 = list(21, 6), end1 = list(30, 6), data2 = data, dates2 = data_dates, start2 = list(1, 7), end = list(21, 9), - time_dim = 'time') + time_dim = 'ftime') } diff --git a/tests/testthat/test-MergeRefToExp.R b/tests/testthat/test-MergeRefToExp.R index adbdfd6..effae50 100644 --- a/tests/testthat/test-MergeRefToExp.R +++ b/tests/testthat/test-MergeRefToExp.R @@ -1,6 +1,135 @@ ########################################################################### -test_that("Sanity checks", { +# cube1 +dates_data1 <- c(seq(as.Date("11-07-1993", "%d-%m-%Y", tz = 'UTC'), + as.Date("20-07-1993","%d-%m-%Y", tz = 'UTC'), "day"), + seq(as.Date("11-07-1994", "%d-%m-%Y", tz = 'UTC'), + as.Date("20-07-1994","%d-%m-%Y", tz = 'UTC'), "day")) +dim(dates_data1) <- c(ftime = 10, sdate = 2) +cube1 <- NULL +cube1$data <- array(1:(2*10*2), c(ftime = 10, sdate = 2, member= 2)) +cube1$attrs$Dates <- dates_data1 +class(cube1) <- 's2dv_cube' +ref_dates1 <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), + as.Date("10-07-1993","%d-%m-%Y", tz = 'UTC'), "day"), + seq(as.Date("01-07-1994", "%d-%m-%Y", tz = 'UTC'), + as.Date("10-07-1994","%d-%m-%Y", tz = 'UTC'), "day")) +dim(ref_dates1) <- c(ftime = 10, sdate = 2) +cube_ref <- NULL +cube_ref$data <- array(1001:1700, c(ftime = 10, sdate = 2)) +cube_ref$attrs$Dates <- ref_dates1 +class(cube_ref) <- 's2dv_cube' +start1 <- list(3, 7) +end1 <- list(10, 7) +start2 <- list(11, 7) +end2 <- list(15, 7) + +# dat1 +ref1 <- array(1001:1700, c(ftime = 10, sdate = 2)) +data1 <- array(1:(2*154*2), c(ftime = 11, sdate = 2, member = 2)) + +########################################################################### +test_that("1. Input checks", { + # 's2dv_cube' + expect_error( + CST_MergeRefToExp('a'), + "Parameter 'data1' must be of the class 's2dv_cube'." + ) + expect_error( + CST_MergeRefToExp(cube1, array(10)), + "Parameter 'data2' must be of the class 's2dv_cube'." + ) + # data + expect_error( + MergeRefToExp(10, 10), + "Parameters 'data1' and 'data2' must be arrays." + ) + expect_error( + MergeRefToExp(array(10), array(10)), + "Parameters 'data1' and 'data2' must have named dimensions." + ) + # time_dim + expect_error( + MergeRefToExp(data1 = ref1, data2 = data1, time_dim = 1), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + MergeRefToExp(data1 = ref1, data2 = data1, time_dim = 'time'), + paste0("Parameter 'time_dim' is not found in 'data1' or 'data2' dimension ", + "names.") + ) + # memb_dim + expect_error( + MergeRefToExp(data1 = ref1, data2 = data1, memb_dim = 1), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + MergeRefToExp(data1 = ref1, data2 = data1, memb_dim = 'time'), + paste0("Parameter 'memb_dim' is not found in 'data1' or 'data2' dimension. ", + "Set it to NULL if there is no member dimension.") + ) + # common dimensions + expect_error( + MergeRefToExp(data1 = array(1:12, c(sdate = 2, ftime = 2, var = 3)), + data2 = array(1:16, c(sdate = 2, ftime = 2, var = 4)), + memb_dim = NULL), + paste0("Parameters 'data1' and 'data2' have common dimension 'var' ", + "with different length and different of length 1.") + ) + # dates + expect_warning( + MergeRefToExp(data1 = array(1:4, c(sdate = 2, ftime = 2, lat = 1)), + data2 = array(1:16, c(sdate = 2, ftime = 2, lat = 4)), + memb_dim = NULL, start1 = list(1, 1), end1 = list(3, 1), + start2 = NULL, end2 = NULL), + paste0("Parameter 'dates' is NULL and the average of the ", + "full data provided in 'data' is computed.") + ) + expect_warning( + MergeRefToExp(data1 = ref1, + data2 = data1, dates1 = ref_dates1, dates2 = dates_data1, + start1 = c(3, 7), end1 = end1, + start2 = start2, end2 = end2), + paste0("Parameter 'start1' and 'end1' must be lists indicating the ", + "day and the month of the period start and end. Full data ", + "will be used.") + ) + expect_warning( + MergeRefToExp(data1 = ref1, + data2 = data1, dates1 = as.vector(ref_dates1), + dates2 = dates_data1, start1 = start1, end1 = end1, + start2 = start2, end2 = end2), + paste0("Parameter 'dates1' must have named dimensions if 'start' and ", + "'end' are not NULL. All 'data1' will be used.") + ) +}) + +########################################################################### + +test_that("2. Output checks", { + res1 <- CST_MergeRefToExp(data1 = cube_ref, data2 = cube1, + start1 = start1, end1 = end1, + start2 = start2, end2 = end2) + # dims + expect_equal( + dim(res1$data), + res1$dims + ) + # coords + expect_equal( + names(dim(res1$data)), + names(res1$coords) + ) + # Dates + expect_equal( + dim(res1$data)[c('ftime', 'sdate')], + dim(res1$attrs$Dates) + ) +}) + +########################################################################### + +test_that("3. Output checks", { data_dates <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), as.Date("01-12-1993", "%d-%m-%Y", tz = 'UTC'), "day"), seq(as.Date("01-07-1994", "%d-%m-%Y", tz = 'UTC'), @@ -19,12 +148,12 @@ test_that("Sanity checks", { data$attrs$Dates <- data_dates class(data) <- 's2dv_cube' -suppressWarnings( - expect_equal(CST_MergeRefToExp(data1 = ref, data2 = data, start1 = list(21, 6), - end1 = list(30, 6), start2 = list(1, 7), - end2 = list(21, 9))$attrs$Dates, - SelectPeriodOnDates(ref_dates, start = list(21, 6), end = list(21,9))) -) + suppressWarnings( + expect_equal(CST_MergeRefToExp(data1 = ref, data2 = data, start1 = list(21, 6), + end1 = list(30, 6), start2 = list(1, 7), + end2 = list(21, 9))$attrs$Dates, + SelectPeriodOnDates(ref_dates, start = list(21, 6), end = list(21,9))) + ) output <- array(c(1172:1181, 1:83, 1537:1546, 155:237, 1172:1181, 309:391, 1537:1546, 463:545), c(ftime = 93, sdate = 2, member = 2)) @@ -33,7 +162,8 @@ suppressWarnings( CST_MergeRefToExp(data1 = ref, data2 = data, start1 = list(21, 6), end1 = list(30, 6), start2 = list(1, 7), end2 = list(21, 9))$data, - output) + output + ) # issue 13: One lead time @@ -81,11 +211,11 @@ suppressWarnings( end2 = list(31, 7))$data, output ) - }) -test_that("Seasonal", { +########################################################################### +test_that("4. Test Seasonal", { dates <- NULL hcst.inityear <- 1993 hcst.endyear <- 2017 -- GitLab From 0a670a068a579d5d5cc7ca0227ea573eaaf058fc Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 14 Jul 2023 12:33:25 +0200 Subject: [PATCH 3/6] Update description of MergeRefToExp; correct CST_MergeRefToExp imports and description --- R/MergeRefToExp.R | 20 +++++++++++--------- man/CST_MergeRefToExp.Rd | 11 +++++------ man/MergeRefToExp.Rd | 6 +++++- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/R/MergeRefToExp.R b/R/MergeRefToExp.R index 4f8e16f..abbdbe1 100644 --- a/R/MergeRefToExp.R +++ b/R/MergeRefToExp.R @@ -4,12 +4,11 @@ #'June 21st to September 21st). If the initialization forecast date is later #'than the one required for the indicator (e.g.: July 1st), the user may want to #'merge past observations, or other references, to the forecast (or hindcast) -#'to compute the indicator. The function \code{MergeObs2Exp} takes care of this -#'steps. If the forecast simulation doesn't cover the required period because it -#'is initialized too early (e.g.: Initialization on November 1st the forecast -#'covers until the beginning of June next year), a climatology (or other -#'references) could be added at the end of the forecast lead time to cover the -#'desired period (e.g.: until the end of summer). +#'to compute the indicator. If the forecast simulation doesn't cover the +#'required period because it is initialized too early (e.g.: Initialization on +#'November 1st the forecast covers until the beginning of June next year), a +#'climatology (or other references) could be added at the end of the forecast +#'lead time to cover the desired period (e.g.: until the end of summer). #' #'@param data1 An 's2dv_cube' object with the element 'data' being a #' multidimensional array of with named dimensions matching the dimensions of @@ -78,8 +77,7 @@ #'new_data <- CST_MergeRefToExp(data1 = ref, data2 = data, #' start1 = list(21, 6), end1 = list(30, 6), #' start2 = list(1, 7), end2 = list(21, 9)) -#' -#'@import multiApply +#' #'@export CST_MergeRefToExp <- function(data1, data2, start1 = NULL, end1 = NULL, start2 = NULL, end2 = NULL, @@ -209,7 +207,11 @@ CST_MergeRefToExp <- function(data1, data2, start1 = NULL, end1 = NULL, #'than the one required for the indicator (e.g.: July 1st), the user may want to #'merge past observations, or other reference, to the forecast (or hindcast) to #'compute the indicator. The function \code{MergeObs2Exp} takes care of this -#'steps. +#'steps. If the forecast simulation doesn't cover the required period because it +#'is initialized too early (e.g.: Initialization on November 1st the forecast +#'covers until the beginning of June next year), a climatology (or other +#'references) could be added at the end of the forecast lead time to cover the +#'desired period (e.g.: until the end of summer). #' #'@param data1 A multidimensional array of with named dimensions matching the #' dimensions of parameter 'data2'. Dimensions with the same name in the diff --git a/man/CST_MergeRefToExp.Rd b/man/CST_MergeRefToExp.Rd index f2195f2..67f35a9 100644 --- a/man/CST_MergeRefToExp.Rd +++ b/man/CST_MergeRefToExp.Rd @@ -79,12 +79,11 @@ Some indicators are defined for specific temporal periods (e.g.: summer from June 21st to September 21st). If the initialization forecast date is later than the one required for the indicator (e.g.: July 1st), the user may want to merge past observations, or other references, to the forecast (or hindcast) -to compute the indicator. The function \code{MergeObs2Exp} takes care of this -steps. If the forecast simulation doesn't cover the required period because it -is initialized too early (e.g.: Initialization on November 1st the forecast -covers until the beginning of June next year), a climatology (or other -references) could be added at the end of the forecast lead time to cover the -desired period (e.g.: until the end of summer). +to compute the indicator. If the forecast simulation doesn't cover the +required period because it is initialized too early (e.g.: Initialization on +November 1st the forecast covers until the beginning of June next year), a +climatology (or other references) could be added at the end of the forecast +lead time to cover the desired period (e.g.: until the end of summer). } \examples{ data_dates <- c(seq(as.Date("01-07-1993", "\%d-\%m-\%Y", tz = 'UTC'), diff --git a/man/MergeRefToExp.Rd b/man/MergeRefToExp.Rd index 164606c..bd79c2f 100644 --- a/man/MergeRefToExp.Rd +++ b/man/MergeRefToExp.Rd @@ -83,7 +83,11 @@ June 21st to September 21st). If the initialization forecast date is later than the one required for the indicator (e.g.: July 1st), the user may want to merge past observations, or other reference, to the forecast (or hindcast) to compute the indicator. The function \code{MergeObs2Exp} takes care of this -steps. +steps. If the forecast simulation doesn't cover the required period because it +is initialized too early (e.g.: Initialization on November 1st the forecast +covers until the beginning of June next year), a climatology (or other +references) could be added at the end of the forecast lead time to cover the +desired period (e.g.: until the end of summer). } \examples{ data_dates <- c(seq(as.Date("01-07-1993", "\%d-\%m-\%Y", tz = 'UTC'), -- GitLab From bf9e36fefe8288f05934b7b0ca3aad788e407131 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 14 Jul 2023 14:23:14 +0200 Subject: [PATCH 4/6] Correct error of adding metadata to the resultant s2dv_cube --- R/MergeRefToExp.R | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/R/MergeRefToExp.R b/R/MergeRefToExp.R index abbdbe1..8efa04f 100644 --- a/R/MergeRefToExp.R +++ b/R/MergeRefToExp.R @@ -186,7 +186,10 @@ CST_MergeRefToExp <- function(data1, data2, start1 = NULL, end1 = NULL, # Variable data1$attrs$Variable$varName <- unique(data1$attrs$Variable$varName, data2$attrs$Variable$varName) - data1$attrs$Variable$metadata <- intersect(data1$attrs$Variable, data2$attrs$Variable)[[2]] + names_metadata <- names(data1$attrs$Variable$metadata) + data1$attrs$Variable$metadata <- intersect(data1$attrs$Variable$metadata, + data2$attrs$Variable$metadata) + names(data1$attrs$Variable$metadata) <- names_metadata # source_files data1$attrs$source_files <- c(data1$attrs$source_files, data2$attrs$source_files) @@ -196,6 +199,12 @@ CST_MergeRefToExp <- function(data1, data2, start1 = NULL, end1 = NULL, # when data1$attrs$when <- Sys.time() + + # load_parameters (TO DO: remove with CST_Start) + if (!is.null(c(data1$attrs$load_parameters, data2$attrs$load_parameters))) { + data1$attrs$load_parameters <- list(data1 = data1$attrs$load_parameters, + data2 = data2$attrs$load_parameters) + } return(data1) } -- GitLab From 656acdd712d6d0bb789dee80f91db286784302d0 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 14 Jul 2023 14:35:14 +0200 Subject: [PATCH 5/6] Improve 's2dv_cube' attributes with adding not repeated metadata (added unique()) --- R/MergeRefToExp.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/MergeRefToExp.R b/R/MergeRefToExp.R index 8efa04f..ab626f9 100644 --- a/R/MergeRefToExp.R +++ b/R/MergeRefToExp.R @@ -192,10 +192,10 @@ CST_MergeRefToExp <- function(data1, data2, start1 = NULL, end1 = NULL, names(data1$attrs$Variable$metadata) <- names_metadata # source_files - data1$attrs$source_files <- c(data1$attrs$source_files, data2$attrs$source_files) + data1$attrs$source_files <- unique(c(data1$attrs$source_files, data2$attrs$source_files)) # Datasets - data1$attrs$Datasets <- c(data1$attrs$Datasets, data2$attrs$Datasets) + data1$attrs$Datasets <- unique(c(data1$attrs$Datasets, data2$attrs$Datasets)) # when data1$attrs$when <- Sys.time() -- GitLab From a632e22e5235decefb906c1de817a03fe8234b46 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 18 Jul 2023 18:22:17 +0200 Subject: [PATCH 6/6] Correct development with only allowing memb_dim and time_dim to be different between data1 and data2; improved documentation; added unit tests --- NAMESPACE | 1 - R/MergeRefToExp.R | 248 +++++++++++++--------------- man/CST_MergeRefToExp.Rd | 71 ++++---- man/MergeRefToExp.Rd | 83 +++++----- tests/testthat/test-MergeRefToExp.R | 95 ++++++++--- 5 files changed, 262 insertions(+), 236 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 8795a86..d80accb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -26,7 +26,6 @@ export(TotalTimeExceedingThreshold) export(WindCapacityFactor) export(WindPowerDensity) import(multiApply) -importFrom(s2dv,InsertDim) importFrom(stats,approxfun) importFrom(stats,ecdf) importFrom(stats,quantile) diff --git a/R/MergeRefToExp.R b/R/MergeRefToExp.R index ab626f9..03425c8 100644 --- a/R/MergeRefToExp.R +++ b/R/MergeRefToExp.R @@ -3,59 +3,58 @@ #'Some indicators are defined for specific temporal periods (e.g.: summer from #'June 21st to September 21st). If the initialization forecast date is later #'than the one required for the indicator (e.g.: July 1st), the user may want to -#'merge past observations, or other references, to the forecast (or hindcast) -#'to compute the indicator. If the forecast simulation doesn't cover the -#'required period because it is initialized too early (e.g.: Initialization on -#'November 1st the forecast covers until the beginning of June next year), a -#'climatology (or other references) could be added at the end of the forecast -#'lead time to cover the desired period (e.g.: until the end of summer). +#'merge past observations, or other references, to the forecast (or hindcast) to +#'compute the indicator. If the forecast simulation doesn't cover the required +#'period because it is initialized too early (e.g.: Initialization on November +#'1st the forecast covers until the beginning of June next year), a climatology +#'(or other references) could be added at the end of the forecast lead time to +#'cover the desired period (e.g.: until the end of summer). #' #'@param data1 An 's2dv_cube' object with the element 'data' being a -#' multidimensional array of with named dimensions matching the dimensions of -#' parameter 'data2'. Dimensions with the same name in the 'data2' parameter -#' must have the same length or length 1; except for the dimension specified -#' with 'memb_dim', which can be different and in the result will be filled -#' with NA values. It can also have additional dimensions with different names -#' in 'data2'. +#' multidimensional array with named dimensions. All dimensions must be +#' equal to 'data2' dimensions except for the ones specified with 'memb_dim' +#' and 'time_dim'. If 'start1' and 'end1' are used to subset a period, the +#' Dates must be stored in element '$attrs$Dates' of the object. Dates must +#' have same time dimensions as element 'data'. #'@param data2 An 's2dv_cube' object with the element 'data' being a -#' multidimensional array of dates with named dimensions matching -#' the dimensions on parameter 'data1'. Dimensions with the same name in the -#' 'data1' parameter must have the same length or length 1, except for the -#' dimension specified with 'memb_dim', which can be different and in the -#' result will be filled with NA values. +#' multidimensional array of named dimensions matching the dimensions of +#' parameter 'data1'. All dimensions must be equal to 'data1' except for the +#' ones specified with 'memb_dim' and 'time_dim'. If 'start2' and 'end2' are +#' used to subset a period, the Dates must be stored in element '$attrs$Dates' +#' of the object. Dates must have same time dimensions as element 'data'. #'@param start1 A list to define the initial date of the period to select from -#' data1 by providing a list of two elements: the initial date of the period +#' 'data1' by providing a list of two elements: the initial date of the period #' and the initial month of the period. #'@param end1 A list to define the final date of the period to select from -#' data1 by providing a list of two elements: the final day of the period and +#' 'data1' by providing a list of two elements: the final day of the period and #' the final month of the period. #'@param start2 A list to define the initial date of the period to select from -#' data2 by providing a list of two elements: the initial date of the period +#' 'data2' by providing a list of two elements: the initial date of the period #' and the initial month of the period. #'@param end2 A list to define the final date of the period to select from -#' data2 by providing a list of two elements: the final day of the period and +#' 'data2' by providing a list of two elements: the final day of the period and #' the final month of the period. #'@param time_dim A character string indicating the name of the temporal -#' dimension. By default, it is set to 'ftime'. More than one dimension name -#' matching the dimensions provided in the object \code{data$data} can be -#' specified. This dimension is required to subset the data in a requested +#' dimension that will be used to combine the two arrays. By default, it is set +#' to 'ftime'. Also, it will be used to subset the data in a requested #' period. #'@param memb_dim A character string indicating the name of the member #' dimension. If the data are not ensemble ones, set as NULL. The default #' value is 'member'. #'@param ncores An integer indicating the number of cores to use in parallel #' computation. -#'@return A 's2dv_cube' object containing the indicator in the element -#'\code{data}. The element \code{data} will be a multidimensional array with -#'dimensions named from the combination of 'data1' and 'data2'. The resulting -#'dimensions will be the following: all the same common dimensions between the -#'two arrays plus the different dimensions of each array. If there is any -#'different common dimension but in a dataset it has length 1, it will be added -#'with the maximum dimension. If memb_dim is used, the dimension of the maximum -#'value corresponding to memb_dim of the two data sets will be added; the -#'difference between the dimensions of the set members will be filled with NA. -#'The other elements of the 's2dv_cube' will be updated with the combined -#'information of both datasets. +#'@return An 's2dv_cube' object containing the indicator in the element +#'\code{data}. The element \code{data} will be a multidimensional array created +#'from the combination of 'data1' and 'data2'. The resulting array will contain +#'the following dimensions: the original dimensions of the input data, which are +#'common to both arrays and for the 'time_dim' dimension, the sum of the +#'corresponding dimension of 'data1' and 'data2'. If 'memb_dim' is not null, +#'regarding member dimension, two different situations can occur: (1) in the +#'case that one of the arrays does not have member dimension or is equal to 1, +#'the result will contain the repeated values of itself; (2) in the case that +#'both arrays have member dimension and is greater than 1, all combinations of +#'member dimension will be returned. The other elements of the 's2dv_cube' will +#'be updated with the combined information of both datasets. #' #'@examples #'data_dates <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), @@ -64,7 +63,7 @@ #' as.Date("01-12-1994","%d-%m-%Y", tz = 'UTC'), "day")) #'dim(data_dates) <- c(ftime = 154, sdate = 2) #'data <- NULL -#'data$data <- array(1:(2*154*2), c(ftime = 154, sdate = 2, member= 2)) +#'data$data <- array(1:(2*154*2), c(ftime = 154, sdate = 2, member = 2)) #'data$attrs$Dates<- data_dates #'class(data) <- 's2dv_cube' #'ref_dates <- seq(as.Date("01-01-1993", "%d-%m-%Y", tz = 'UTC'), @@ -132,7 +131,8 @@ CST_MergeRefToExp <- function(data1, data2, start1 = NULL, end1 = NULL, data1$coords[[i_dim]] <- NULL data1$coords[[i_dim]] <- 1:dim(data1$data)[i_dim] attr(data1$coords[[i_dim]], 'indices') <- TRUE - } else if (!identical(attributes(data1$coords[[i_dim]]), attributes(data2$coords[[i_dim]]))) { + } else if (!identical(attributes(data1$coords[[i_dim]]), + attributes(data2$coords[[i_dim]]))) { attributes(data1$coords[[i_dim]]) <- NULL } } else { @@ -214,60 +214,61 @@ CST_MergeRefToExp <- function(data1, data2, start1 = NULL, end1 = NULL, #'Some indicators are defined for specific temporal periods (e.g.: summer from #'June 21st to September 21st). If the initialization forecast date is later #'than the one required for the indicator (e.g.: July 1st), the user may want to -#'merge past observations, or other reference, to the forecast (or hindcast) to -#'compute the indicator. The function \code{MergeObs2Exp} takes care of this -#'steps. If the forecast simulation doesn't cover the required period because it -#'is initialized too early (e.g.: Initialization on November 1st the forecast -#'covers until the beginning of June next year), a climatology (or other -#'references) could be added at the end of the forecast lead time to cover the -#'desired period (e.g.: until the end of summer). +#'merge past observations, or other references, to the forecast (or hindcast) to +#'compute the indicator. If the forecast simulation doesn't cover the required +#'period because it is initialized too early (e.g.: Initialization on November +#'1st the forecast covers until the beginning of June next year), a climatology +#'(or other references) could be added at the end of the forecast lead time to +#'cover the desired period (e.g.: until the end of summer). #' -#'@param data1 A multidimensional array of with named dimensions matching the -#' dimensions of parameter 'data2'. Dimensions with the same name in the -#' 'data2' parameter must have the same length or length 1; except for the -#' dimension specified with 'memb_dim', which can be different and in the -#' result will be filled with NA values. It can also have additional dimensions -#' with different names in 'data2'. +#'@param data1 A multidimensional array with named dimensions. All dimensions +#' must be equal to 'data2' dimensions except for the ones specified with +#' 'memb_dim' and 'time_dim'. #'@param dates1 A multidimensional array of dates with named dimensions matching -#' the temporal dimensions on parameter 'data1'. -#'@param data2 A multidimensional array of dates with named dimensions matching -#' the dimensions on parameter 'data1'. Dimensions with the same name in the -#' 'data1' parameter must have the same length or length 1, except for the -#' dimension specified with 'memb_dim', which can be different and in the -#' result will be filled with NA values. +#' the temporal dimensions of parameter 'data1'. The common dimensions must be +#' equal to 'data1' dimensions. +#'@param data2 A multidimensional array of named dimensions matching the +#' dimensions of parameter 'data1'. All dimensions must be equal to 'data1' +#' except for the ones specified with 'memb_dim' and 'time_dim'. #'@param dates2 A multidimensional array of dates with named dimensions matching -#' the temporal dimensions on parameter 'data2'. +#' the temporal dimensions on parameter 'data2'. The common dimensions must be +#' equal to 'data2' dimensions. #'@param start1 A list to define the initial date of the period to select from -#' data1 by providing a list of two elements: the initial date of the period -#' and the initial month of the period. +#' 'data1' by providing a list of two elements: the initial date of the period +#' and the initial month of the period. The initial date of the period must be +#' included in the 'dates1' array. #'@param end1 A list to define the final date of the period to select from -#' data1 by providing a list of two elements: the final day of the period and -#' the final month of the period. +#' 'data1' by providing a list of two elements: the final day of the period and +#' the final month of the period. The final date of the period must be +#' included in the 'dates1' array. #'@param start2 A list to define the initial date of the period to select from -#' data2 by providing a list of two elements: the initial date of the period -#' and the initial month of the period. +#' 'data2' by providing a list of two elements: the initial date of the period +#' and the initial month of the period. The initial date of the period must be +#' included in the 'dates2' array. #'@param end2 A list to define the final date of the period to select from -#' data2 by providing a list of two elements: the final day of the period and -#' the final month of the period. +#' 'data2' by providing a list of two elements: the final day of the period and +#' the final month of the period. The final date of the period must be +#' included in the 'dates2' array. #'@param time_dim A character string indicating the name of the temporal -#' dimension. By default, it is set to 'ftime'. More than one dimension name -#' matching the dimensions provided in the object \code{data$data} can be -#' specified. This dimension is required to subset the data in a requested +#' dimension that will be used to combine the two arrays. By default, it is set +#' to 'ftime'. Also, it will be used to subset the data in a requested #' period. #'@param memb_dim A character string indicating the name of the member -#' dimension. If the data are not ensemble ones, set as NULL. The default -#' value is 'member'. +#' dimension. If the 'data1' and 'data2' have no member dimension, set it as +#' NULL. It is set as 'member' by default. #'@param ncores An integer indicating the number of cores to use in parallel #' computation. #' -#'@return A multidimensional array with dimensions named from the combination of -#''data1' and 'data2'. The resulting dimensions will be the following: all the -#'same common dimensions between the two arrays plus the different dimensions of -#'each array. If there is any different common dimension but in a dataset it has -#'length 1, it will be added with the maximum dimension. If memb_dim is used, -#'the dimension of the maximum value corresponding to memb_dim of the two data -#'sets will be added; the difference between the dimensions of the set members -#'will be filled with NA. +#'@return A multidimensional array created from the combination of 'data1' and +#''data2'. The resulting array will contain the following dimensions: the +#'original dimensions of the input data, which are common to both arrays and for +#'the 'time_dim' dimension, the sum of the corresponding dimension of 'data1' +#'and 'data2'. If 'memb_dim' is not null, regarding member dimension, two +#'different situations can occur: (1) in the case that one of the arrays does +#'not have member dimension or is equal to 1, the result will contain the +#'repeated values of itself; (2) in the case that both arrays have member +#'dimension and is greater than 1, all combinations of member dimension will be +#'returned. #' #'@examples #'data_dates <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), @@ -286,21 +287,20 @@ CST_MergeRefToExp <- function(data1, data2, start1 = NULL, end1 = NULL, #' time_dim = 'ftime') #' #'@import multiApply -#'@importFrom s2dv InsertDim #'@export MergeRefToExp <- function(data1, data2, dates1 = NULL, dates2 = NULL, start1 = NULL, end1 = NULL, start2 = NULL, end2 = NULL, time_dim = 'ftime', memb_dim = 'member', ncores = NULL) { # Input checks - # data + ## data1 and data2 if (!is.array(data1) | !is.array(data2)) { stop("Parameters 'data1' and 'data2' must be arrays.") } if (is.null(names(dim(data1))) | is.null(names(dim(data2)))) { stop("Parameters 'data1' and 'data2' must have named dimensions.") } - # time_dim + ## time_dim if (!is.character(time_dim)) { stop("Parameter 'time_dim' must be a character string.") } @@ -308,7 +308,9 @@ MergeRefToExp <- function(data1, data2, dates1 = NULL, dates2 = NULL, stop("Parameter 'time_dim' is not found in 'data1' or 'data2' dimension ", "names.") } - # memb_dim + ## memb_dim + data1dims <- names(dim(data1)) + data2dims <- names(dim(data2)) if (!is.null(memb_dim)) { if (!is.character(memb_dim)) { stop("Parameter 'memb_dim' must be a character string.") @@ -317,65 +319,41 @@ MergeRefToExp <- function(data1, data2, dates1 = NULL, dates2 = NULL, stop("Parameter 'memb_dim' is not found in 'data1' or 'data2' dimension. ", "Set it to NULL if there is no member dimension.") } - if (memb_dim %in% names(dim(data1))) { - if (dim(data1)[memb_dim] == 1) { - data1 <- array(data1, dim = dim(data1)[-which(names(dim(data1)) == memb_dim)]) - } - } - if (memb_dim %in% names(dim(data2))) { - if (dim(data2)[memb_dim] == 1) { - data2 <- array(data2, dim = dim(data2)[-which(names(dim(data2)) == memb_dim)]) - } - } - # Add NA to fill member_dim - if (memb_dim %in% names(dim(data1)) & memb_dim %in% names(dim(data2))) { + if ((memb_dim %in% names(dim(data1)) & memb_dim %in% names(dim(data2)))) { if (dim(data1)[memb_dim] != dim(data2)[memb_dim]) { - if (dim(data1)[memb_dim] > dim(data2)[memb_dim]) { - data2 <- Apply(list(data2), target_dims = memb_dim, - fun = function(x, length_new_dim) { - return(c(x, rep(NA, length_new_dim - length(x)))) - }, length_new_dim = dim(data1)[memb_dim], - output_dims = memb_dim)$output1 + if (dim(data1)[memb_dim] == 1) { + data1 <- array(data1, dim = dim(data1)[-which(names(dim(data1)) == memb_dim)]) + } else if (dim(data2)[memb_dim] == 1) { + data2 <- array(data2, dim = dim(data2)[-which(names(dim(data2)) == memb_dim)]) } else { + memb_dim1 <- dim(data1)[memb_dim] data1 <- Apply(list(data1), target_dims = memb_dim, - fun = function(x, length_new_dim) { - return(c(x, rep(NA, length_new_dim - length(x)))) - }, length_new_dim = dim(data2)[memb_dim], - output_dims = memb_dim)$output1 + fun = function(x, memb_rep) { + return(rep(x, each = memb_rep)) + }, memb_rep = dim(data2)[memb_dim], + output_dims = memb_dim, ncores = ncores)$output1 + data2 <- Apply(list(data2), target_dims = memb_dim, + fun = function(x, memb_rep) { + return(rep(x, memb_rep)) + }, memb_rep = memb_dim1, + output_dims = memb_dim, ncores = ncores)$output1 } } } } - - # Find common dims and remove the ones not needed + ## data1 and data2 (2) name_data1 <- sort(names(dim(data1))) name_data2 <- sort(names(dim(data2))) - commondims <- name_data1[name_data1 %in% name_data2] - commondims <- commondims[-which(commondims == time_dim)] + name_data1 <- name_data1[-which(name_data1 %in% c(time_dim, memb_dim))] + name_data2 <- name_data2[-which(name_data2 %in% c(time_dim, memb_dim))] - if (length(commondims) != 0) { - if (!all(dim(data2)[commondims] == dim(data1)[commondims])) { - dif_common <- commondims[!dim(data2)[commondims] == dim(data1)[commondims]] - if (any(dim(data2)[dif_common] == 1)) { - dim_remove <- dif_common[which(dim(data1)[dif_common] == 1)] - dim(data1) <- dim(data1)[-which(names(dim(data1)) == dim_remove)] - dif_common <- dif_common[-which(dif_common == dim_remove)] - } - if (any(dim(data1)[dif_common] == 1)) { - dim_remove <- dif_common[which(dim(data1)[dif_common] == 1)] - dim(data1) <- dim(data1)[-which(names(dim(data1)) == dim_remove)] - dif_common <- dif_common[-which(dif_common == dim_remove)] - } - if (length(dif_common) != 0) { - stop("Parameters 'data1' and 'data2' have common dimension ", - paste0("'", dif_common, sep = "' "), "with different length and ", - "different of length 1.") - } - } + if (!identical(length(name_data1), length(name_data2)) | + !identical(dim(data1)[name_data1], dim(data2)[name_data2])) { + stop(paste0("Parameter 'data1' and 'data2' must have same length of ", + "all dimensions except 'memb_dim'.")) } - - # dates1 + ## dates1 if (!is.null(start1) & !is.null(end1)) { if (is.null(dates1)) { warning("Parameter 'dates' is NULL and the average of the ", @@ -395,7 +373,7 @@ MergeRefToExp <- function(data1, data2, dates1 = NULL, dates2 = NULL, } } } - # dates2 + ## dates2 if (!is.null(start2) & !is.null(end2)) { if (is.null(dates2)) { warning("Parameter 'dates2' is NULL and the average of the ", @@ -416,18 +394,14 @@ MergeRefToExp <- function(data1, data2, dates1 = NULL, dates2 = NULL, } } - data1dims <- names(dim(data1)) - data2dims <- names(dim(data2)) - data1 <- Apply(list(data1, data2), target_dims = time_dim, fun = 'c', output_dims = time_dim, ncores = ncores)$output1 if (all(names(dim(data1)) %in% data1dims)) { - pos <- match(names(dim(data1)), data1dims) + pos <- match(data1dims, names(dim(data1))) data1 <- aperm(data1, pos) - } - if (all(names(dim(data1)) %in% data2dims)) { - pos <- match(names(dim(data1)), data2dims) + } else if (all(names(dim(data1)) %in% data2dims)) { + pos <- match(data2dims, names(dim(data1))) data1 <- aperm(data1, pos) } return(data1) diff --git a/man/CST_MergeRefToExp.Rd b/man/CST_MergeRefToExp.Rd index 67f35a9..bbca8c4 100644 --- a/man/CST_MergeRefToExp.Rd +++ b/man/CST_MergeRefToExp.Rd @@ -18,40 +18,38 @@ CST_MergeRefToExp( } \arguments{ \item{data1}{An 's2dv_cube' object with the element 'data' being a -multidimensional array of with named dimensions matching the dimensions of -parameter 'data2'. Dimensions with the same name in the 'data2' parameter -must have the same length or length 1; except for the dimension specified -with 'memb_dim', which can be different and in the result will be filled -with NA values. It can also have additional dimensions with different names -in 'data2'.} +multidimensional array with named dimensions. All dimensions must be +equal to 'data2' dimensions except for the ones specified with 'memb_dim' +and 'time_dim'. If 'start1' and 'end1' are used to subset a period, the +Dates must be stored in element '$attrs$Dates' of the object. Dates must +have same time dimensions as element 'data'.} \item{data2}{An 's2dv_cube' object with the element 'data' being a -multidimensional array of dates with named dimensions matching -the dimensions on parameter 'data1'. Dimensions with the same name in the -'data1' parameter must have the same length or length 1, except for the -dimension specified with 'memb_dim', which can be different and in the -result will be filled with NA values.} +multidimensional array of named dimensions matching the dimensions of +parameter 'data1'. All dimensions must be equal to 'data1' except for the +ones specified with 'memb_dim' and 'time_dim'. If 'start2' and 'end2' are +used to subset a period, the Dates must be stored in element '$attrs$Dates' +of the object. Dates must have same time dimensions as element 'data'.} \item{start1}{A list to define the initial date of the period to select from -data1 by providing a list of two elements: the initial date of the period +'data1' by providing a list of two elements: the initial date of the period and the initial month of the period.} \item{end1}{A list to define the final date of the period to select from -data1 by providing a list of two elements: the final day of the period and +'data1' by providing a list of two elements: the final day of the period and the final month of the period.} \item{start2}{A list to define the initial date of the period to select from -data2 by providing a list of two elements: the initial date of the period +'data2' by providing a list of two elements: the initial date of the period and the initial month of the period.} \item{end2}{A list to define the final date of the period to select from -data2 by providing a list of two elements: the final day of the period and +'data2' by providing a list of two elements: the final day of the period and the final month of the period.} \item{time_dim}{A character string indicating the name of the temporal -dimension. By default, it is set to 'ftime'. More than one dimension name -matching the dimensions provided in the object \code{data$data} can be -specified. This dimension is required to subset the data in a requested +dimension that will be used to combine the two arrays. By default, it is set +to 'ftime'. Also, it will be used to subset the data in a requested period.} \item{memb_dim}{A character string indicating the name of the member @@ -62,28 +60,29 @@ value is 'member'.} computation.} } \value{ -A 's2dv_cube' object containing the indicator in the element -\code{data}. The element \code{data} will be a multidimensional array with -dimensions named from the combination of 'data1' and 'data2'. The resulting -dimensions will be the following: all the same common dimensions between the -two arrays plus the different dimensions of each array. If there is any -different common dimension but in a dataset it has length 1, it will be added -with the maximum dimension. If memb_dim is used, the dimension of the maximum -value corresponding to memb_dim of the two data sets will be added; the -difference between the dimensions of the set members will be filled with NA. -The other elements of the 's2dv_cube' will be updated with the combined -information of both datasets. +An 's2dv_cube' object containing the indicator in the element +\code{data}. The element \code{data} will be a multidimensional array created +from the combination of 'data1' and 'data2'. The resulting array will contain +the following dimensions: the original dimensions of the input data, which are +common to both arrays and for the 'time_dim' dimension, the sum of the +corresponding dimension of 'data1' and 'data2'. If 'memb_dim' is not null, +regarding member dimension, two different situations can occur: (1) in the +case that one of the arrays does not have member dimension or is equal to 1, +the result will contain the repeated values of itself; (2) in the case that +both arrays have member dimension and is greater than 1, all combinations of +member dimension will be returned. The other elements of the 's2dv_cube' will +be updated with the combined information of both datasets. } \description{ Some indicators are defined for specific temporal periods (e.g.: summer from June 21st to September 21st). If the initialization forecast date is later than the one required for the indicator (e.g.: July 1st), the user may want to -merge past observations, or other references, to the forecast (or hindcast) -to compute the indicator. If the forecast simulation doesn't cover the -required period because it is initialized too early (e.g.: Initialization on -November 1st the forecast covers until the beginning of June next year), a -climatology (or other references) could be added at the end of the forecast -lead time to cover the desired period (e.g.: until the end of summer). +merge past observations, or other references, to the forecast (or hindcast) to +compute the indicator. If the forecast simulation doesn't cover the required +period because it is initialized too early (e.g.: Initialization on November +1st the forecast covers until the beginning of June next year), a climatology +(or other references) could be added at the end of the forecast lead time to +cover the desired period (e.g.: until the end of summer). } \examples{ data_dates <- c(seq(as.Date("01-07-1993", "\%d-\%m-\%Y", tz = 'UTC'), @@ -92,7 +91,7 @@ data_dates <- c(seq(as.Date("01-07-1993", "\%d-\%m-\%Y", tz = 'UTC'), as.Date("01-12-1994","\%d-\%m-\%Y", tz = 'UTC'), "day")) dim(data_dates) <- c(ftime = 154, sdate = 2) data <- NULL -data$data <- array(1:(2*154*2), c(ftime = 154, sdate = 2, member= 2)) +data$data <- array(1:(2*154*2), c(ftime = 154, sdate = 2, member = 2)) data$attrs$Dates<- data_dates class(data) <- 's2dv_cube' ref_dates <- seq(as.Date("01-01-1993", "\%d-\%m-\%Y", tz = 'UTC'), diff --git a/man/MergeRefToExp.Rd b/man/MergeRefToExp.Rd index bd79c2f..e22b52d 100644 --- a/man/MergeRefToExp.Rd +++ b/man/MergeRefToExp.Rd @@ -19,75 +19,76 @@ MergeRefToExp( ) } \arguments{ -\item{data1}{A multidimensional array of with named dimensions matching the -dimensions of parameter 'data2'. Dimensions with the same name in the -'data2' parameter must have the same length or length 1; except for the -dimension specified with 'memb_dim', which can be different and in the -result will be filled with NA values. It can also have additional dimensions -with different names in 'data2'.} +\item{data1}{A multidimensional array with named dimensions. All dimensions +must be equal to 'data2' dimensions except for the ones specified with +'memb_dim' and 'time_dim'.} -\item{data2}{A multidimensional array of dates with named dimensions matching -the dimensions on parameter 'data1'. Dimensions with the same name in the -'data1' parameter must have the same length or length 1, except for the -dimension specified with 'memb_dim', which can be different and in the -result will be filled with NA values.} +\item{data2}{A multidimensional array of named dimensions matching the +dimensions of parameter 'data1'. All dimensions must be equal to 'data1' +except for the ones specified with 'memb_dim' and 'time_dim'.} \item{dates1}{A multidimensional array of dates with named dimensions matching -the temporal dimensions on parameter 'data1'.} +the temporal dimensions of parameter 'data1'. The common dimensions must be +equal to 'data1' dimensions.} \item{dates2}{A multidimensional array of dates with named dimensions matching -the temporal dimensions on parameter 'data2'.} +the temporal dimensions on parameter 'data2'. The common dimensions must be +equal to 'data2' dimensions.} \item{start1}{A list to define the initial date of the period to select from -data1 by providing a list of two elements: the initial date of the period -and the initial month of the period.} +'data1' by providing a list of two elements: the initial date of the period +and the initial month of the period. The initial date of the period must be +included in the 'dates1' array.} \item{end1}{A list to define the final date of the period to select from -data1 by providing a list of two elements: the final day of the period and -the final month of the period.} +'data1' by providing a list of two elements: the final day of the period and +the final month of the period. The final date of the period must be +included in the 'dates1' array.} \item{start2}{A list to define the initial date of the period to select from -data2 by providing a list of two elements: the initial date of the period -and the initial month of the period.} +'data2' by providing a list of two elements: the initial date of the period +and the initial month of the period. The initial date of the period must be +included in the 'dates2' array.} \item{end2}{A list to define the final date of the period to select from -data2 by providing a list of two elements: the final day of the period and -the final month of the period.} +'data2' by providing a list of two elements: the final day of the period and +the final month of the period. The final date of the period must be +included in the 'dates2' array.} \item{time_dim}{A character string indicating the name of the temporal -dimension. By default, it is set to 'ftime'. More than one dimension name -matching the dimensions provided in the object \code{data$data} can be -specified. This dimension is required to subset the data in a requested +dimension that will be used to combine the two arrays. By default, it is set +to 'ftime'. Also, it will be used to subset the data in a requested period.} \item{memb_dim}{A character string indicating the name of the member -dimension. If the data are not ensemble ones, set as NULL. The default -value is 'member'.} +dimension. If the 'data1' and 'data2' have no member dimension, set it as +NULL. It is set as 'member' by default.} \item{ncores}{An integer indicating the number of cores to use in parallel computation.} } \value{ -A multidimensional array with dimensions named from the combination of -'data1' and 'data2'. The resulting dimensions will be the following: all the -same common dimensions between the two arrays plus the different dimensions of -each array. If there is any different common dimension but in a dataset it has -length 1, it will be added with the maximum dimension. If memb_dim is used, -the dimension of the maximum value corresponding to memb_dim of the two data -sets will be added; the difference between the dimensions of the set members -will be filled with NA. +A multidimensional array created from the combination of 'data1' and +'data2'. The resulting array will contain the following dimensions: the +original dimensions of the input data, which are common to both arrays and for +the 'time_dim' dimension, the sum of the corresponding dimension of 'data1' +and 'data2'. If 'memb_dim' is not null, regarding member dimension, two +different situations can occur: (1) in the case that one of the arrays does +not have member dimension or is equal to 1, the result will contain the +repeated values of itself; (2) in the case that both arrays have member +dimension and is greater than 1, all combinations of member dimension will be +returned. } \description{ Some indicators are defined for specific temporal periods (e.g.: summer from June 21st to September 21st). If the initialization forecast date is later than the one required for the indicator (e.g.: July 1st), the user may want to -merge past observations, or other reference, to the forecast (or hindcast) to -compute the indicator. The function \code{MergeObs2Exp} takes care of this -steps. If the forecast simulation doesn't cover the required period because it -is initialized too early (e.g.: Initialization on November 1st the forecast -covers until the beginning of June next year), a climatology (or other -references) could be added at the end of the forecast lead time to cover the -desired period (e.g.: until the end of summer). +merge past observations, or other references, to the forecast (or hindcast) to +compute the indicator. If the forecast simulation doesn't cover the required +period because it is initialized too early (e.g.: Initialization on November +1st the forecast covers until the beginning of June next year), a climatology +(or other references) could be added at the end of the forecast lead time to +cover the desired period (e.g.: until the end of summer). } \examples{ data_dates <- c(seq(as.Date("01-07-1993", "\%d-\%m-\%Y", tz = 'UTC'), diff --git a/tests/testthat/test-MergeRefToExp.R b/tests/testthat/test-MergeRefToExp.R index effae50..57cd425 100644 --- a/tests/testthat/test-MergeRefToExp.R +++ b/tests/testthat/test-MergeRefToExp.R @@ -7,16 +7,16 @@ dates_data1 <- c(seq(as.Date("11-07-1993", "%d-%m-%Y", tz = 'UTC'), as.Date("20-07-1994","%d-%m-%Y", tz = 'UTC'), "day")) dim(dates_data1) <- c(ftime = 10, sdate = 2) cube1 <- NULL -cube1$data <- array(1:(2*10*2), c(ftime = 10, sdate = 2, member= 2)) +cube1$data <- array(1:(2*10*2), c(ftime = 10, sdate = 2, member = 2)) cube1$attrs$Dates <- dates_data1 class(cube1) <- 's2dv_cube' ref_dates1 <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), - as.Date("10-07-1993","%d-%m-%Y", tz = 'UTC'), "day"), - seq(as.Date("01-07-1994", "%d-%m-%Y", tz = 'UTC'), - as.Date("10-07-1994","%d-%m-%Y", tz = 'UTC'), "day")) + as.Date("10-07-1993","%d-%m-%Y", tz = 'UTC'), "day"), + seq(as.Date("01-07-1994", "%d-%m-%Y", tz = 'UTC'), + as.Date("10-07-1994","%d-%m-%Y", tz = 'UTC'), "day")) dim(ref_dates1) <- c(ftime = 10, sdate = 2) cube_ref <- NULL -cube_ref$data <- array(1001:1700, c(ftime = 10, sdate = 2)) +cube_ref$data <- array(1001:1020, c(ftime = 10, sdate = 2)) cube_ref$attrs$Dates <- ref_dates1 class(cube_ref) <- 's2dv_cube' start1 <- list(3, 7) @@ -25,8 +25,13 @@ start2 <- list(11, 7) end2 <- list(15, 7) # dat1 -ref1 <- array(1001:1700, c(ftime = 10, sdate = 2)) -data1 <- array(1:(2*154*2), c(ftime = 11, sdate = 2, member = 2)) +ref1 <- array(1001:1020, c(ftime = 10, sdate = 2, member = 1)) +data1 <- array(1:40, c(ftime = 10, sdate = 2, member = 2)) + + +# dat2 +ref2 <- array(1001:1015, c(ftime = 5, sdate = 1, member = 3)) +data2 <- array(1:6, c(ftime = 3, sdate = 1, member = 2)) ########################################################################### test_that("1. Input checks", { @@ -70,16 +75,23 @@ test_that("1. Input checks", { ) # common dimensions expect_error( - MergeRefToExp(data1 = array(1:12, c(sdate = 2, ftime = 2, var = 3)), + MergeRefToExp(data1 = array(1:12, c(sdate = 2, ftime = 2, dat = 3)), data2 = array(1:16, c(sdate = 2, ftime = 2, var = 4)), memb_dim = NULL), - paste0("Parameters 'data1' and 'data2' have common dimension 'var' ", - "with different length and different of length 1.") + paste0("Parameter 'data1' and 'data2' must have same length of ", + "all dimensions except 'memb_dim'.") + ) + expect_error( + MergeRefToExp(data1 = array(1:12, c(sdate = 2, ftime = 2, dat = 1)), + data2 = array(1:16, c(sdate = 2, ftime = 2)), + memb_dim = NULL), + paste0("Parameter 'data1' and 'data2' must have same length of ", + "all dimensions except 'memb_dim'.") ) # dates expect_warning( MergeRefToExp(data1 = array(1:4, c(sdate = 2, ftime = 2, lat = 1)), - data2 = array(1:16, c(sdate = 2, ftime = 2, lat = 4)), + data2 = array(1:16, c(sdate = 2, ftime = 2, lat = 1)), memb_dim = NULL, start1 = list(1, 1), end1 = list(3, 1), start2 = NULL, end2 = NULL), paste0("Parameter 'dates' is NULL and the average of the ", @@ -106,7 +118,7 @@ test_that("1. Input checks", { ########################################################################### -test_that("2. Output checks", { +test_that("2. Output checks: CST_MergeRefToExp", { res1 <- CST_MergeRefToExp(data1 = cube_ref, data2 = cube1, start1 = start1, end1 = end1, start2 = start2, end2 = end2) @@ -125,11 +137,54 @@ test_that("2. Output checks", { dim(res1$data)[c('ftime', 'sdate')], dim(res1$attrs$Dates) ) + # data + expect_equal( + res1$data[1:8,,1], + res1$data[1:8,,2] + ) }) ########################################################################### -test_that("3. Output checks", { +test_that("3. Output checks: MergeRefToExp", { + # Minimum dimensions + expect_equal( + MergeRefToExp(data1 = array(1:2, c(ftime = 2)), + data2 = array(1, c(ftime = 1)), memb_dim = NULL), + array(c(1,2,1), dim = c(ftime = 3)) + ) + # res2 + res2 <- MergeRefToExp(data1 = ref1, data2 = data1) + ## dims + expect_equal( + dim(res2), + c(ftime = 20, sdate = 2, member = 2) + ) + ## data + expect_equal( + res2[,1,], + array(c(1001:1010, 1:10, 1001:1010, 21:30), dim = c(ftime = 20, member = 2)) + ) + # res3: multiple different members + res3 <- MergeRefToExp(data1 = ref2, data2 = data2) + ## dims + expect_equal( + dim(res3), + c(ftime = 8, sdate = 1, member = 6) + ) + expect_equal( + as.vector(res3[1:5, 1, ]), + c(rep(1001:1005, 2), rep(1006:1010, 2), rep(1011:1015, 2)) + ) + expect_equal( + as.vector(res3[6:8, 1, ]), + rep(c(1:3, 4:6), 3) + ) +}) + +########################################################################### + +test_that("3. Output checks: Dates", { data_dates <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), as.Date("01-12-1993", "%d-%m-%Y", tz = 'UTC'), "day"), seq(as.Date("01-07-1994", "%d-%m-%Y", tz = 'UTC'), @@ -152,7 +207,7 @@ test_that("3. Output checks", { expect_equal(CST_MergeRefToExp(data1 = ref, data2 = data, start1 = list(21, 6), end1 = list(30, 6), start2 = list(1, 7), end2 = list(21, 9))$attrs$Dates, - SelectPeriodOnDates(ref_dates, start = list(21, 6), end = list(21,9))) + SelectPeriodOnDates(ref_dates, start = list(21, 6), end = list(21,9))) ) output <- array(c(1172:1181, 1:83, 1537:1546, 155:237, 1172:1181, 309:391, @@ -168,9 +223,9 @@ test_that("3. Output checks", { # issue 13: One lead time data_dates <- c(as.Date("01-06-1993", "%d-%m-%Y", tz = 'UTC'), - as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), + as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), as.Date("01-06-1994", "%d-%m-%Y", tz = 'UTC'), - as.Date("01-07-1994","%d-%m-%Y", tz = 'UTC')) + as.Date("01-07-1994","%d-%m-%Y", tz = 'UTC')) dim(data_dates) <- c(ftime = 2, sdate = 2) ref_dates <- c(as.Date("01-05-1993", "%d-%m-%Y", tz = 'UTC'), @@ -193,7 +248,6 @@ test_that("3. Output checks", { as.Date("01-07-1994","%d-%m-%Y", tz = 'UTC')) dim(res_dates) <- c(ftime = 3, sdate = 2) - expect_equal( CST_MergeRefToExp(data1 = ref, data2 = data, start1 = list(1, 5), end1 = list(31, 5), start2 = list(1, 6), @@ -223,12 +277,11 @@ test_that("4. Test Seasonal", { dates <- c(dates, format(seq(as.Date(paste0("01-04-",year), "%d-%m-%Y", tz = 'UTC'), as.Date(paste0("01-11-",year), "%d-%m-%Y", - tz = 'UTC'), "day"), - "%Y-%m-%d")) + tz = 'UTC'), "day"), "%Y-%m-%d")) } dates <- as.Date(dates, tz = 'UTC') - dim.dates <- c(ftime=215, sweek = 1, sday = 1, - sdate=(hcst.endyear-hcst.inityear)+1) + dim.dates <- c(ftime = 215, sweek = 1, sday = 1, + sdate = (hcst.endyear - hcst.inityear) + 1) dim(dates) <- dim.dates ref <- NULL ref$data <- array(1:(215*25), c(ftime = 215, sdate = 25)) -- GitLab