diff --git a/DESCRIPTION b/DESCRIPTION index 4151ba48204d6c18077b6610edda77ed8ad2c0a8..4e2098307143c76f3baf0c43e5e4a840f6740ebf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -40,4 +40,4 @@ URL: https://earth.bsc.es/gitlab/es/csindicators/ BugReports: https://earth.bsc.es/gitlab/es/csindicators/-/issues Encoding: UTF-8 RoxygenNote: 7.2.0 -Config/testthat/edition: 3 \ No newline at end of file +Config/testthat/edition: 3 diff --git a/R/MergeRefToExp.R b/R/MergeRefToExp.R index 434cae35c0db9d2d0312c02473a0b5a8ef490078..03425c862da543e7874108a3ef4d1f8bf1aa6893 100644 --- a/R/MergeRefToExp.R +++ b/R/MergeRefToExp.R @@ -3,40 +3,58 @@ #'Some indicators are defined for specific temporal periods (e.g.: summer from #'June 21st to September 21st). If the initialization forecast date is later #'than the one required for the indicator (e.g.: July 1st), the user may want to -#'merge past observations, or other references, to the forecast (or hindcast) -#'to compute the indicator. The function \code{MergeObs2Exp} takes care of this -#'steps. If the forecast simulation doesn't cover the required period because it -#'is initialized too early (e.g.: Initialization on November 1st the forecast -#'covers until the beginning of June next year), a climatology (or other -#'references) could be added at the end of the forecast lead time to cover the -#'desired period (e.g.: until the end of summer). +#'merge past observations, or other references, to the forecast (or hindcast) to +#'compute the indicator. If the forecast simulation doesn't cover the required +#'period because it is initialized too early (e.g.: Initialization on November +#'1st the forecast covers until the beginning of June next year), a climatology +#'(or other references) could be added at the end of the forecast lead time to +#'cover the desired period (e.g.: until the end of summer). #' -#'@param data1 An 's2dv_cube' object as provided function \code{CST_Load} in -#' package CSTools. -#'@param data2 An 's2dv_cube' object as provided function \code{CST_Load} in -#' package CSTools. +#'@param data1 An 's2dv_cube' object with the element 'data' being a +#' multidimensional array with named dimensions. All dimensions must be +#' equal to 'data2' dimensions except for the ones specified with 'memb_dim' +#' and 'time_dim'. If 'start1' and 'end1' are used to subset a period, the +#' Dates must be stored in element '$attrs$Dates' of the object. Dates must +#' have same time dimensions as element 'data'. +#'@param data2 An 's2dv_cube' object with the element 'data' being a +#' multidimensional array of named dimensions matching the dimensions of +#' parameter 'data1'. All dimensions must be equal to 'data1' except for the +#' ones specified with 'memb_dim' and 'time_dim'. If 'start2' and 'end2' are +#' used to subset a period, the Dates must be stored in element '$attrs$Dates' +#' of the object. Dates must have same time dimensions as element 'data'. #'@param start1 A list to define the initial date of the period to select from -#' data1 by providing a list of two elements: the initial date of the period +#' 'data1' by providing a list of two elements: the initial date of the period #' and the initial month of the period. #'@param end1 A list to define the final date of the period to select from -#' data1 by providing a list of two elements: the final day of the period and +#' 'data1' by providing a list of two elements: the final day of the period and #' the final month of the period. #'@param start2 A list to define the initial date of the period to select from -#' data2 by providing a list of two elements: the initial date of the period +#' 'data2' by providing a list of two elements: the initial date of the period #' and the initial month of the period. #'@param end2 A list to define the final date of the period to select from -#' data2 by providing a list of two elements: the final day of the period and +#' 'data2' by providing a list of two elements: the final day of the period and #' the final month of the period. #'@param time_dim A character string indicating the name of the temporal -#' dimension. By default, it is set to 'ftime'. More than one dimension name -#' matching the dimensions provided in the object \code{data$data} can be -#' specified. This dimension is required to subset the data in a requested period. -#'@param sdate_dim A character string indicating the name of the dimension in -#' which the initialization dates are stored. +#' dimension that will be used to combine the two arrays. By default, it is set +#' to 'ftime'. Also, it will be used to subset the data in a requested +#' period. +#'@param memb_dim A character string indicating the name of the member +#' dimension. If the data are not ensemble ones, set as NULL. The default +#' value is 'member'. #'@param ncores An integer indicating the number of cores to use in parallel #' computation. -#'@return A 's2dv_cube' object containing the indicator in the element -#' \code{data}. +#'@return An 's2dv_cube' object containing the indicator in the element +#'\code{data}. The element \code{data} will be a multidimensional array created +#'from the combination of 'data1' and 'data2'. The resulting array will contain +#'the following dimensions: the original dimensions of the input data, which are +#'common to both arrays and for the 'time_dim' dimension, the sum of the +#'corresponding dimension of 'data1' and 'data2'. If 'memb_dim' is not null, +#'regarding member dimension, two different situations can occur: (1) in the +#'case that one of the arrays does not have member dimension or is equal to 1, +#'the result will contain the repeated values of itself; (2) in the case that +#'both arrays have member dimension and is greater than 1, all combinations of +#'member dimension will be returned. The other elements of the 's2dv_cube' will +#'be updated with the combined information of both datasets. #' #'@examples #'data_dates <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), @@ -45,7 +63,7 @@ #' as.Date("01-12-1994","%d-%m-%Y", tz = 'UTC'), "day")) #'dim(data_dates) <- c(ftime = 154, sdate = 2) #'data <- NULL -#'data$data <- array(1:(2*154*2), c(ftime = 154, sdate = 2, member= 2)) +#'data$data <- array(1:(2*154*2), c(ftime = 154, sdate = 2, member = 2)) #'data$attrs$Dates<- data_dates #'class(data) <- 's2dv_cube' #'ref_dates <- seq(as.Date("01-01-1993", "%d-%m-%Y", tz = 'UTC'), @@ -58,88 +76,136 @@ #'new_data <- CST_MergeRefToExp(data1 = ref, data2 = data, #' start1 = list(21, 6), end1 = list(30, 6), #' start2 = list(1, 7), end2 = list(21, 9)) -#' -#'@import multiApply +#' #'@export -CST_MergeRefToExp <- function(data1, data2, start1, end1, start2, end2, - time_dim = 'ftime', sdate_dim = 'sdate', +CST_MergeRefToExp <- function(data1, data2, start1 = NULL, end1 = NULL, + start2 = NULL, end2 = NULL, + time_dim = 'ftime', memb_dim = 'member', ncores = NULL) { # Check 's2dv_cube' if (!inherits(data1, 's2dv_cube')) { - stop("Parameter 'ref' must be of the class 's2dv_cube'.") + stop("Parameter 'data1' must be of the class 's2dv_cube'.") } if (!inherits(data2, 's2dv_cube')) { - stop("Parameter 'data' must be of the class 's2dv_cube'.") + stop("Parameter 'data2' must be of the class 's2dv_cube'.") } # Dates subset of data1 - dates1 <- NULL if (!is.null(start1) && !is.null(end1)) { if (is.null(dim(data1$attrs$Dates))) { warning("Dimensions in 'data1' element 'attrs$Dates' are missed and ", "all data would be used.") - start <- NULL - end <- NULL - } else { - dates1 <- data1$attrs$Dates + start1 <- NULL + end1 <- NULL } } # Dates subset of data2 - dates2 <- NULL if (!is.null(start2) && !is.null(end2)) { if (is.null(dim(data2$attrs$Dates))) { warning("Dimensions in 'data2' element 'attrs$Dates' are missed and ", "all data would be used.") - start <- NULL - end <- NULL - } else { - dates2 <- data2$attrs$Dates + start2 <- NULL + end2 <- NULL } } + dates1 <- data1$attrs$Dates + dates2 <- data2$attrs$Dates + + # data data1$data <- MergeRefToExp(data1 = data1$data, dates1 = dates1, start1 = start1, end1 = end1, data2 = data2$data, dates2 = dates2, - start2, end2, time_dim = time_dim, - sdate_dim = sdate_dim, ncores = ncores) + start2, end2, time_dim = time_dim, + memb_dim = memb_dim, ncores = ncores) + # dims + data1$dims <- dim(data1$data) + + # coords + for (i_dim in names(dim(data1$data))) { + if (length(data1$coords[[i_dim]]) != dim(data1$data)[i_dim]) { + data1$coords[[i_dim]] <- NULL + data1$coords[[i_dim]] <- 1:dim(data1$data)[i_dim] + attr(data1$coords[[i_dim]], 'indices') <- TRUE + } else if (length(data1$coords[[i_dim]]) == length(data2$coords[[i_dim]])) { + if (any(as.vector(data1$coords[[i_dim]]) != as.vector(data2$coords[[i_dim]]))) { + data1$coords[[i_dim]] <- NULL + data1$coords[[i_dim]] <- 1:dim(data1$data)[i_dim] + attr(data1$coords[[i_dim]], 'indices') <- TRUE + } else if (!identical(attributes(data1$coords[[i_dim]]), + attributes(data2$coords[[i_dim]]))) { + attributes(data1$coords[[i_dim]]) <- NULL + } + } else { + data1$coords[[i_dim]] <- NULL + data1$coords[[i_dim]] <- 1:dim(data1$data)[i_dim] + attr(data1$coords[[i_dim]], 'indices') <- TRUE + } + } + + # Dates if (!is.null(dates1)) { - data1$attrs$Dates <- SelectPeriodOnDates(dates1, start = start1, end = end1, - time_dim = time_dim) + if (!is.null(start1) && !is.null(end1)) { + dates1 <- SelectPeriodOnDates(dates1, start = start1, end = end1, + time_dim = time_dim) + } } if (!is.null(dates2)) { - data2$attrs$Dates <- SelectPeriodOnDates(dates2, start = start2, - end = end2, time_dim = time_dim) + if ((!is.null(start2) && !is.null(end2))) { + dates2 <- SelectPeriodOnDates(dates2, start = start2, + end = end2, time_dim = time_dim) + } } - # TO DO CONCATENATE DATES - remove_dates1_dim <- FALSE - remove_dates2_dim <- FALSE - if (!is.null(data1$attrs$Dates) & !is.null(data2$attrs$Dates)) { - if (is.null(dim(data1$attrs$Dates))) { - remove_dates1_dim <- TRUE - dim(data1$attrs$Dates) <- length(data1$attrs$Dates) - names(dim(data1$attrs$Dates)) <- time_dim + remove_dates_dim <- FALSE + + if (!is.null(dates1) & !is.null(dates2)) { + if (is.null(dim(dates1))) { + remove_dates_dim <- TRUE + dim(dates1) <- length(dates1) + names(dim(dates1)) <- time_dim } - if (is.null(dim(data2$attrs$Dates))) { - remove_dates2_dim <- TRUE - dim(data2$attrs$Dates) <- length(data2$attrs$Dates) - names(dim(data2$attrs$Dates)) <- time_dim + if (is.null(dim(dates2))) { + remove_dates_dim <- TRUE + dim(dates2) <- length(dates2) + names(dim(dates2)) <- time_dim } } - res <- Apply(list(data1$attrs$Dates, data2$attrs$Dates), target_dims = time_dim, - c, output_dims = time_dim, ncores = ncores)$output1 - if (inherits(data1$attrs$Dates, 'Date')) { + res <- Apply(list(dates1, dates2), target_dims = time_dim, + 'c', output_dims = time_dim, ncores = ncores)$output1 + + if (inherits(dates1, 'Date')) { data1$attrs$Dates <- as.Date(res, origin = '1970-01-01') } else { - data1$attrs$Dates <- as.POSIXct(res*3600*24, origin = '1970-01-01', tz = 'UTC') + data1$attrs$Dates <- as.POSIXct(res, origin = '1970-01-01', tz = 'UTC') } - if (remove_dates1_dim) { + if (remove_dates_dim) { dim(data1$attrs$Dates) <- NULL } - if (remove_dates2_dim) { - dim(data2$attrs$Dates) <- NULL - } + # Variable + data1$attrs$Variable$varName <- unique(data1$attrs$Variable$varName, + data2$attrs$Variable$varName) + names_metadata <- names(data1$attrs$Variable$metadata) + data1$attrs$Variable$metadata <- intersect(data1$attrs$Variable$metadata, + data2$attrs$Variable$metadata) + names(data1$attrs$Variable$metadata) <- names_metadata + + # source_files + data1$attrs$source_files <- unique(c(data1$attrs$source_files, data2$attrs$source_files)) + + # Datasets + data1$attrs$Datasets <- unique(c(data1$attrs$Datasets, data2$attrs$Datasets)) + + # when + data1$attrs$when <- Sys.time() + + # load_parameters (TO DO: remove with CST_Start) + if (!is.null(c(data1$attrs$load_parameters, data2$attrs$load_parameters))) { + data1$attrs$load_parameters <- list(data1 = data1$attrs$load_parameters, + data2 = data2$attrs$load_parameters) + } + return(data1) } @@ -148,128 +214,195 @@ CST_MergeRefToExp <- function(data1, data2, start1, end1, start2, end2, #'Some indicators are defined for specific temporal periods (e.g.: summer from #'June 21st to September 21st). If the initialization forecast date is later #'than the one required for the indicator (e.g.: July 1st), the user may want to -#'merge past observations, or other reference, to the forecast (or hindcast) to -#'compute the indicator. The function \code{MergeObs2Exp} takes care of this -#'steps. +#'merge past observations, or other references, to the forecast (or hindcast) to +#'compute the indicator. If the forecast simulation doesn't cover the required +#'period because it is initialized too early (e.g.: Initialization on November +#'1st the forecast covers until the beginning of June next year), a climatology +#'(or other references) could be added at the end of the forecast lead time to +#'cover the desired period (e.g.: until the end of summer). #' -#'@param data1 A multidimensional array with named dimensions. -#'@param dates1 A vector of dates or a multidimensional array of dates with -#' named dimensions matching the dimensions on parameter 'data1'. -#'@param data2 A multidimensional array with named dimensions. -#'@param dates2 A vector of dates or a multidimensional array of dates with -#' named dimensions matching the dimensions on parameter 'data2'. +#'@param data1 A multidimensional array with named dimensions. All dimensions +#' must be equal to 'data2' dimensions except for the ones specified with +#' 'memb_dim' and 'time_dim'. +#'@param dates1 A multidimensional array of dates with named dimensions matching +#' the temporal dimensions of parameter 'data1'. The common dimensions must be +#' equal to 'data1' dimensions. +#'@param data2 A multidimensional array of named dimensions matching the +#' dimensions of parameter 'data1'. All dimensions must be equal to 'data1' +#' except for the ones specified with 'memb_dim' and 'time_dim'. +#'@param dates2 A multidimensional array of dates with named dimensions matching +#' the temporal dimensions on parameter 'data2'. The common dimensions must be +#' equal to 'data2' dimensions. #'@param start1 A list to define the initial date of the period to select from -#' data1 by providing a list of two elements: the initial date of the period -#' and the initial month of the period. +#' 'data1' by providing a list of two elements: the initial date of the period +#' and the initial month of the period. The initial date of the period must be +#' included in the 'dates1' array. #'@param end1 A list to define the final date of the period to select from -#' data1 by providing a list of two elements: the final day of the period and -#' the final month of the period. +#' 'data1' by providing a list of two elements: the final day of the period and +#' the final month of the period. The final date of the period must be +#' included in the 'dates1' array. #'@param start2 A list to define the initial date of the period to select from -#' data2 by providing a list of two elements: the initial date of the period -#' and the initial month of the period. +#' 'data2' by providing a list of two elements: the initial date of the period +#' and the initial month of the period. The initial date of the period must be +#' included in the 'dates2' array. #'@param end2 A list to define the final date of the period to select from -#' data2 by providing a list of two elements: the final day of the period and -#' the final month of the period. +#' 'data2' by providing a list of two elements: the final day of the period and +#' the final month of the period. The final date of the period must be +#' included in the 'dates2' array. #'@param time_dim A character string indicating the name of the temporal -#' dimension. By default, it is set to 'ftime'. More than one dimension name -#' matching the dimensions provided in the object \code{data$data} can be -#' specified. This dimension is required to subset the data in a requested +#' dimension that will be used to combine the two arrays. By default, it is set +#' to 'ftime'. Also, it will be used to subset the data in a requested #' period. -#'@param sdate_dim A character string indicating the name of the dimension in -#' which the initialization dates are stored. +#'@param memb_dim A character string indicating the name of the member +#' dimension. If the 'data1' and 'data2' have no member dimension, set it as +#' NULL. It is set as 'member' by default. #'@param ncores An integer indicating the number of cores to use in parallel #' computation. #' -#'@return A multidimensional array with named dimensions. -#' +#'@return A multidimensional array created from the combination of 'data1' and +#''data2'. The resulting array will contain the following dimensions: the +#'original dimensions of the input data, which are common to both arrays and for +#'the 'time_dim' dimension, the sum of the corresponding dimension of 'data1' +#'and 'data2'. If 'memb_dim' is not null, regarding member dimension, two +#'different situations can occur: (1) in the case that one of the arrays does +#'not have member dimension or is equal to 1, the result will contain the +#'repeated values of itself; (2) in the case that both arrays have member +#'dimension and is greater than 1, all combinations of member dimension will be +#'returned. +#' #'@examples #'data_dates <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), #' as.Date("01-12-1993","%d-%m-%Y", tz = 'UTC'), "day"), #' seq(as.Date("01-07-1994", "%d-%m-%Y", tz = 'UTC'), #' as.Date("01-12-1994","%d-%m-%Y", tz = 'UTC'), "day")) -#'dim(data_dates) <- c(time = 154, sdate = 2) +#'dim(data_dates) <- c(ftime = 154, sdate = 2) #'ref_dates <- seq(as.Date("01-01-1993", "%d-%m-%Y", tz = 'UTC'), #' as.Date("01-12-1994","%d-%m-%Y", tz = 'UTC'), "day") -#'dim(ref_dates) <- c(time = 350, sdate = 2) -#'ref <- array(1001:1700, c(time = 350, sdate = 2)) -#'data <- array(1:(2*154*2), c(time = 154, sdate = 2, member= 2)) +#'dim(ref_dates) <- c(ftime = 350, sdate = 2) +#'ref <- array(1001:1700, c(ftime = 350, sdate = 2)) +#'data <- array(1:(2*154*2), c(ftime = 154, sdate = 2, member = 2)) #'new_data <- MergeRefToExp(data1 = ref, dates1 = ref_dates, start1 = list(21, 6), #' end1 = list(30, 6), data2 = data, dates2 = data_dates, #' start2 = list(1, 7), end = list(21, 9), -#' time_dim = 'time') +#' time_dim = 'ftime') #' #'@import multiApply #'@export -MergeRefToExp <- function(data1, dates1, start1, end1, data2, dates2, start2, - end2, time_dim = 'ftime', sdate_dim = 'sdate', +MergeRefToExp <- function(data1, data2, dates1 = NULL, dates2 = NULL, + start1 = NULL, end1 = NULL, start2 = NULL, end2 = NULL, + time_dim = 'ftime', memb_dim = 'member', ncores = NULL) { # Input checks - # data - if (!is.array(data1)) { - dim(data1) <- c(length(data1)) - names(dim(data1)) <- time_dim + ## data1 and data2 + if (!is.array(data1) | !is.array(data2)) { + stop("Parameters 'data1' and 'data2' must be arrays.") } - if (!is.array(data2)) { - dim(data2) <- c(length(data2)) - names(dim(data2)) <- time_dim + if (is.null(names(dim(data1))) | is.null(names(dim(data2)))) { + stop("Parameters 'data1' and 'data2' must have named dimensions.") } - # dates - if (!is.null(dates1) & !is.null(dates2)) { - if (is.null(dim(dates1))) { - warning("Dimensions in 'dates1' element are missed and ", - "all data would be used.") - dim(dates1) <- length(dates1) - names(dim(dates1)) <- time_dim + ## time_dim + if (!is.character(time_dim)) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data1)) | !time_dim %in% names(dim(data2))) { + stop("Parameter 'time_dim' is not found in 'data1' or 'data2' dimension ", + "names.") + } + ## memb_dim + data1dims <- names(dim(data1)) + data2dims <- names(dim(data2)) + if (!is.null(memb_dim)) { + if (!is.character(memb_dim)) { + stop("Parameter 'memb_dim' must be a character string.") } - if (is.null(dim(dates2))) { - warning("Dimensions in 'dates2' element are missed and ", - "all data would be used.") - dim(dates2) <- length(dates2) - names(dim(dates2)) <- time_dim + if (!memb_dim %in% names(dim(data1)) & !memb_dim %in% names(dim(data2))) { + stop("Parameter 'memb_dim' is not found in 'data1' or 'data2' dimension. ", + "Set it to NULL if there is no member dimension.") + } + if ((memb_dim %in% names(dim(data1)) & memb_dim %in% names(dim(data2)))) { + if (dim(data1)[memb_dim] != dim(data2)[memb_dim]) { + if (dim(data1)[memb_dim] == 1) { + data1 <- array(data1, dim = dim(data1)[-which(names(dim(data1)) == memb_dim)]) + } else if (dim(data2)[memb_dim] == 1) { + data2 <- array(data2, dim = dim(data2)[-which(names(dim(data2)) == memb_dim)]) + } else { + memb_dim1 <- dim(data1)[memb_dim] + data1 <- Apply(list(data1), target_dims = memb_dim, + fun = function(x, memb_rep) { + return(rep(x, each = memb_rep)) + }, memb_rep = dim(data2)[memb_dim], + output_dims = memb_dim, ncores = ncores)$output1 + data2 <- Apply(list(data2), target_dims = memb_dim, + fun = function(x, memb_rep) { + return(rep(x, memb_rep)) + }, memb_rep = memb_dim1, + output_dims = memb_dim, ncores = ncores)$output1 + } + } } - data1 <- SelectPeriodOnData(data1, dates = dates1, start = start1, - end = end1, time_dim = time_dim, ncores = ncores) } + ## data1 and data2 (2) + name_data1 <- sort(names(dim(data1))) + name_data2 <- sort(names(dim(data2))) - # Check if data2 has dimension sdate_dim and it should be added to data1: - if ((sdate_dim %in% names(dim(data2))) && dim(data2)[sdate_dim] > 1 && - !sdate_dim %in% names(dim(data1))) { - dim(data1) <- c(length(data1)/dim(data2)[sdate_dim], dim(data2)[sdate_dim]) - names(dim(data1)) <- c(time_dim, sdate_dim) - } - # Check if data1 has dimension sdate_dim and it should be added to data2: - if ((sdate_dim %in% names(dim(data1))) && dim(data1)[sdate_dim] > 1 && - !sdate_dim %in% names(dim(data2))) { - dim(data2) <- c(length(data2)/dim(data1)[sdate_dim], dim(data1)[sdate_dim]) - names(dim(data2)) <- c(time_dim, sdate_dim) + name_data1 <- name_data1[-which(name_data1 %in% c(time_dim, memb_dim))] + name_data2 <- name_data2[-which(name_data2 %in% c(time_dim, memb_dim))] + + if (!identical(length(name_data1), length(name_data2)) | + !identical(dim(data1)[name_data1], dim(data2)[name_data2])) { + stop(paste0("Parameter 'data1' and 'data2' must have same length of ", + "all dimensions except 'memb_dim'.")) } - # Check if data1 needs to be extended to the length of the dimensions of data2: - if (length(dim(data2)) != length(dim(data1))) { - dif_dims <- which(names(dim(data2)) %in% names(dim(data1)) == FALSE) - if (length(dif_dims) > 0) { - for (i in dif_dims) { - data1 <- .insertdim(data1, posdim = i, lendim = dim(data2)[i], - name = names(dim(data2))[i]) + ## dates1 + if (!is.null(start1) & !is.null(end1)) { + if (is.null(dates1)) { + warning("Parameter 'dates' is NULL and the average of the ", + "full data provided in 'data' is computed.") + } else if (!all(c(is.list(start1), is.list(end1)))) { + warning("Parameter 'start1' and 'end1' must be lists indicating the ", + "day and the month of the period start and end. Full data ", + "will be used.") + } else { + if (!is.null(dim(dates1))) { + data1 <- SelectPeriodOnData(data = data1, dates = dates1, start = start1, + end = end1, time_dim = time_dim, + ncores = ncores) + } else { + warning("Parameter 'dates1' must have named dimensions if 'start' and ", + "'end' are not NULL. All 'data1' will be used.") } } } - # Check if data2 needs to be extended to the length of the dimensions of data1: - if (length(dim(data1)) != length(dim(data2))) { - dif_dims <- which(names(dim(data1)) %in% names(dim(data2)) == FALSE) - if (length(dif_dims) > 0) { - for (i in dif_dims) { - data2 <- .insertdim(data2, posdim = i, lendim = dim(data1)[i], - name = names(dim(data1))[i]) + ## dates2 + if (!is.null(start2) & !is.null(end2)) { + if (is.null(dates2)) { + warning("Parameter 'dates2' is NULL and the average of the ", + "full data provided in 'data' is computed.") + } else if (!all(c(is.list(start2), is.list(end2)))) { + warning("Parameter 'start2' and 'end2' must be lists indicating the ", + "day and the month of the period start and end. Full data ", + "will be used.") + } else { + if (!is.null(dim(dates2))) { + data2 <- SelectPeriodOnData(data = data2, dates = dates2, start = start2, + end = end2, time_dim = time_dim, + ncores = ncores) + } else { + warning("Parameter 'dates2' must have named dimensions if 'start2' and ", + "'end2' are not NULL. All 'data2' will be used.") } } } - if (!is.null(dates2)) { - data2 <- SelectPeriodOnData(data2, dates = dates2, start = start2, - end = end2, time_dim = time_dim, ncores = ncores) - } + data1 <- Apply(list(data1, data2), target_dims = time_dim, fun = 'c', output_dims = time_dim, ncores = ncores)$output1 + + if (all(names(dim(data1)) %in% data1dims)) { + pos <- match(data1dims, names(dim(data1))) + data1 <- aperm(data1, pos) + } else if (all(names(dim(data1)) %in% data2dims)) { + pos <- match(data2dims, names(dim(data1))) + data1 <- aperm(data1, pos) + } return(data1) } - - diff --git a/man/CST_MergeRefToExp.Rd b/man/CST_MergeRefToExp.Rd index 9f9a3b977ce032bb44b6eb7724574738992e5d65..bbca8c4faf98efbb29aa8cdffd560ea0585e91a9 100644 --- a/man/CST_MergeRefToExp.Rd +++ b/man/CST_MergeRefToExp.Rd @@ -7,64 +7,82 @@ CST_MergeRefToExp( data1, data2, - start1, - end1, - start2, - end2, + start1 = NULL, + end1 = NULL, + start2 = NULL, + end2 = NULL, time_dim = "ftime", - sdate_dim = "sdate", + memb_dim = "member", ncores = NULL ) } \arguments{ -\item{data1}{An 's2dv_cube' object as provided function \code{CST_Load} in -package CSTools.} +\item{data1}{An 's2dv_cube' object with the element 'data' being a +multidimensional array with named dimensions. All dimensions must be +equal to 'data2' dimensions except for the ones specified with 'memb_dim' +and 'time_dim'. If 'start1' and 'end1' are used to subset a period, the +Dates must be stored in element '$attrs$Dates' of the object. Dates must +have same time dimensions as element 'data'.} -\item{data2}{An 's2dv_cube' object as provided function \code{CST_Load} in -package CSTools.} +\item{data2}{An 's2dv_cube' object with the element 'data' being a +multidimensional array of named dimensions matching the dimensions of +parameter 'data1'. All dimensions must be equal to 'data1' except for the +ones specified with 'memb_dim' and 'time_dim'. If 'start2' and 'end2' are +used to subset a period, the Dates must be stored in element '$attrs$Dates' +of the object. Dates must have same time dimensions as element 'data'.} \item{start1}{A list to define the initial date of the period to select from -data1 by providing a list of two elements: the initial date of the period +'data1' by providing a list of two elements: the initial date of the period and the initial month of the period.} \item{end1}{A list to define the final date of the period to select from -data1 by providing a list of two elements: the final day of the period and +'data1' by providing a list of two elements: the final day of the period and the final month of the period.} \item{start2}{A list to define the initial date of the period to select from -data2 by providing a list of two elements: the initial date of the period +'data2' by providing a list of two elements: the initial date of the period and the initial month of the period.} \item{end2}{A list to define the final date of the period to select from -data2 by providing a list of two elements: the final day of the period and +'data2' by providing a list of two elements: the final day of the period and the final month of the period.} \item{time_dim}{A character string indicating the name of the temporal -dimension. By default, it is set to 'ftime'. More than one dimension name -matching the dimensions provided in the object \code{data$data} can be -specified. This dimension is required to subset the data in a requested period.} +dimension that will be used to combine the two arrays. By default, it is set +to 'ftime'. Also, it will be used to subset the data in a requested +period.} -\item{sdate_dim}{A character string indicating the name of the dimension in -which the initialization dates are stored.} +\item{memb_dim}{A character string indicating the name of the member +dimension. If the data are not ensemble ones, set as NULL. The default +value is 'member'.} \item{ncores}{An integer indicating the number of cores to use in parallel computation.} } \value{ -A 's2dv_cube' object containing the indicator in the element - \code{data}. +An 's2dv_cube' object containing the indicator in the element +\code{data}. The element \code{data} will be a multidimensional array created +from the combination of 'data1' and 'data2'. The resulting array will contain +the following dimensions: the original dimensions of the input data, which are +common to both arrays and for the 'time_dim' dimension, the sum of the +corresponding dimension of 'data1' and 'data2'. If 'memb_dim' is not null, +regarding member dimension, two different situations can occur: (1) in the +case that one of the arrays does not have member dimension or is equal to 1, +the result will contain the repeated values of itself; (2) in the case that +both arrays have member dimension and is greater than 1, all combinations of +member dimension will be returned. The other elements of the 's2dv_cube' will +be updated with the combined information of both datasets. } \description{ Some indicators are defined for specific temporal periods (e.g.: summer from June 21st to September 21st). If the initialization forecast date is later than the one required for the indicator (e.g.: July 1st), the user may want to -merge past observations, or other references, to the forecast (or hindcast) -to compute the indicator. The function \code{MergeObs2Exp} takes care of this -steps. If the forecast simulation doesn't cover the required period because it -is initialized too early (e.g.: Initialization on November 1st the forecast -covers until the beginning of June next year), a climatology (or other -references) could be added at the end of the forecast lead time to cover the -desired period (e.g.: until the end of summer). +merge past observations, or other references, to the forecast (or hindcast) to +compute the indicator. If the forecast simulation doesn't cover the required +period because it is initialized too early (e.g.: Initialization on November +1st the forecast covers until the beginning of June next year), a climatology +(or other references) could be added at the end of the forecast lead time to +cover the desired period (e.g.: until the end of summer). } \examples{ data_dates <- c(seq(as.Date("01-07-1993", "\%d-\%m-\%Y", tz = 'UTC'), @@ -73,7 +91,7 @@ data_dates <- c(seq(as.Date("01-07-1993", "\%d-\%m-\%Y", tz = 'UTC'), as.Date("01-12-1994","\%d-\%m-\%Y", tz = 'UTC'), "day")) dim(data_dates) <- c(ftime = 154, sdate = 2) data <- NULL -data$data <- array(1:(2*154*2), c(ftime = 154, sdate = 2, member= 2)) +data$data <- array(1:(2*154*2), c(ftime = 154, sdate = 2, member = 2)) data$attrs$Dates<- data_dates class(data) <- 's2dv_cube' ref_dates <- seq(as.Date("01-01-1993", "\%d-\%m-\%Y", tz = 'UTC'), diff --git a/man/MergeRefToExp.Rd b/man/MergeRefToExp.Rd index e6b40c8c57fdff75e2e16ed51aa3c1d6c7d38d76..e22b52d03277ea497ed333b2fdcfa8120fb95281 100644 --- a/man/MergeRefToExp.Rd +++ b/man/MergeRefToExp.Rd @@ -6,82 +6,104 @@ \usage{ MergeRefToExp( data1, - dates1, - start1, - end1, data2, - dates2, - start2, - end2, + dates1 = NULL, + dates2 = NULL, + start1 = NULL, + end1 = NULL, + start2 = NULL, + end2 = NULL, time_dim = "ftime", - sdate_dim = "sdate", + memb_dim = "member", ncores = NULL ) } \arguments{ -\item{data1}{A multidimensional array with named dimensions.} +\item{data1}{A multidimensional array with named dimensions. All dimensions +must be equal to 'data2' dimensions except for the ones specified with +'memb_dim' and 'time_dim'.} -\item{dates1}{A vector of dates or a multidimensional array of dates with -named dimensions matching the dimensions on parameter 'data1'.} +\item{data2}{A multidimensional array of named dimensions matching the +dimensions of parameter 'data1'. All dimensions must be equal to 'data1' +except for the ones specified with 'memb_dim' and 'time_dim'.} -\item{start1}{A list to define the initial date of the period to select from -data1 by providing a list of two elements: the initial date of the period -and the initial month of the period.} +\item{dates1}{A multidimensional array of dates with named dimensions matching +the temporal dimensions of parameter 'data1'. The common dimensions must be +equal to 'data1' dimensions.} -\item{end1}{A list to define the final date of the period to select from -data1 by providing a list of two elements: the final day of the period and -the final month of the period.} +\item{dates2}{A multidimensional array of dates with named dimensions matching +the temporal dimensions on parameter 'data2'. The common dimensions must be +equal to 'data2' dimensions.} -\item{data2}{A multidimensional array with named dimensions.} +\item{start1}{A list to define the initial date of the period to select from +'data1' by providing a list of two elements: the initial date of the period +and the initial month of the period. The initial date of the period must be +included in the 'dates1' array.} -\item{dates2}{A vector of dates or a multidimensional array of dates with -named dimensions matching the dimensions on parameter 'data2'.} +\item{end1}{A list to define the final date of the period to select from +'data1' by providing a list of two elements: the final day of the period and +the final month of the period. The final date of the period must be +included in the 'dates1' array.} \item{start2}{A list to define the initial date of the period to select from -data2 by providing a list of two elements: the initial date of the period -and the initial month of the period.} +'data2' by providing a list of two elements: the initial date of the period +and the initial month of the period. The initial date of the period must be +included in the 'dates2' array.} \item{end2}{A list to define the final date of the period to select from -data2 by providing a list of two elements: the final day of the period and -the final month of the period.} +'data2' by providing a list of two elements: the final day of the period and +the final month of the period. The final date of the period must be +included in the 'dates2' array.} \item{time_dim}{A character string indicating the name of the temporal -dimension. By default, it is set to 'ftime'. More than one dimension name -matching the dimensions provided in the object \code{data$data} can be -specified. This dimension is required to subset the data in a requested +dimension that will be used to combine the two arrays. By default, it is set +to 'ftime'. Also, it will be used to subset the data in a requested period.} -\item{sdate_dim}{A character string indicating the name of the dimension in -which the initialization dates are stored.} +\item{memb_dim}{A character string indicating the name of the member +dimension. If the 'data1' and 'data2' have no member dimension, set it as +NULL. It is set as 'member' by default.} \item{ncores}{An integer indicating the number of cores to use in parallel computation.} } \value{ -A multidimensional array with named dimensions. +A multidimensional array created from the combination of 'data1' and +'data2'. The resulting array will contain the following dimensions: the +original dimensions of the input data, which are common to both arrays and for +the 'time_dim' dimension, the sum of the corresponding dimension of 'data1' +and 'data2'. If 'memb_dim' is not null, regarding member dimension, two +different situations can occur: (1) in the case that one of the arrays does +not have member dimension or is equal to 1, the result will contain the +repeated values of itself; (2) in the case that both arrays have member +dimension and is greater than 1, all combinations of member dimension will be +returned. } \description{ Some indicators are defined for specific temporal periods (e.g.: summer from June 21st to September 21st). If the initialization forecast date is later than the one required for the indicator (e.g.: July 1st), the user may want to -merge past observations, or other reference, to the forecast (or hindcast) to -compute the indicator. The function \code{MergeObs2Exp} takes care of this -steps. +merge past observations, or other references, to the forecast (or hindcast) to +compute the indicator. If the forecast simulation doesn't cover the required +period because it is initialized too early (e.g.: Initialization on November +1st the forecast covers until the beginning of June next year), a climatology +(or other references) could be added at the end of the forecast lead time to +cover the desired period (e.g.: until the end of summer). } \examples{ data_dates <- c(seq(as.Date("01-07-1993", "\%d-\%m-\%Y", tz = 'UTC'), as.Date("01-12-1993","\%d-\%m-\%Y", tz = 'UTC'), "day"), seq(as.Date("01-07-1994", "\%d-\%m-\%Y", tz = 'UTC'), as.Date("01-12-1994","\%d-\%m-\%Y", tz = 'UTC'), "day")) -dim(data_dates) <- c(time = 154, sdate = 2) +dim(data_dates) <- c(ftime = 154, sdate = 2) ref_dates <- seq(as.Date("01-01-1993", "\%d-\%m-\%Y", tz = 'UTC'), as.Date("01-12-1994","\%d-\%m-\%Y", tz = 'UTC'), "day") -dim(ref_dates) <- c(time = 350, sdate = 2) -ref <- array(1001:1700, c(time = 350, sdate = 2)) -data <- array(1:(2*154*2), c(time = 154, sdate = 2, member= 2)) +dim(ref_dates) <- c(ftime = 350, sdate = 2) +ref <- array(1001:1700, c(ftime = 350, sdate = 2)) +data <- array(1:(2*154*2), c(ftime = 154, sdate = 2, member = 2)) new_data <- MergeRefToExp(data1 = ref, dates1 = ref_dates, start1 = list(21, 6), end1 = list(30, 6), data2 = data, dates2 = data_dates, start2 = list(1, 7), end = list(21, 9), - time_dim = 'time') + time_dim = 'ftime') } diff --git a/tests/testthat/test-MergeRefToExp.R b/tests/testthat/test-MergeRefToExp.R index adbdfd6f42f1994d0bdf3ba5626df49b1a11cc7d..57cd42596e55f208d729a044550aacbeeb915380 100644 --- a/tests/testthat/test-MergeRefToExp.R +++ b/tests/testthat/test-MergeRefToExp.R @@ -1,6 +1,190 @@ ########################################################################### -test_that("Sanity checks", { +# cube1 +dates_data1 <- c(seq(as.Date("11-07-1993", "%d-%m-%Y", tz = 'UTC'), + as.Date("20-07-1993","%d-%m-%Y", tz = 'UTC'), "day"), + seq(as.Date("11-07-1994", "%d-%m-%Y", tz = 'UTC'), + as.Date("20-07-1994","%d-%m-%Y", tz = 'UTC'), "day")) +dim(dates_data1) <- c(ftime = 10, sdate = 2) +cube1 <- NULL +cube1$data <- array(1:(2*10*2), c(ftime = 10, sdate = 2, member = 2)) +cube1$attrs$Dates <- dates_data1 +class(cube1) <- 's2dv_cube' +ref_dates1 <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), + as.Date("10-07-1993","%d-%m-%Y", tz = 'UTC'), "day"), + seq(as.Date("01-07-1994", "%d-%m-%Y", tz = 'UTC'), + as.Date("10-07-1994","%d-%m-%Y", tz = 'UTC'), "day")) +dim(ref_dates1) <- c(ftime = 10, sdate = 2) +cube_ref <- NULL +cube_ref$data <- array(1001:1020, c(ftime = 10, sdate = 2)) +cube_ref$attrs$Dates <- ref_dates1 +class(cube_ref) <- 's2dv_cube' +start1 <- list(3, 7) +end1 <- list(10, 7) +start2 <- list(11, 7) +end2 <- list(15, 7) + +# dat1 +ref1 <- array(1001:1020, c(ftime = 10, sdate = 2, member = 1)) +data1 <- array(1:40, c(ftime = 10, sdate = 2, member = 2)) + + +# dat2 +ref2 <- array(1001:1015, c(ftime = 5, sdate = 1, member = 3)) +data2 <- array(1:6, c(ftime = 3, sdate = 1, member = 2)) + +########################################################################### +test_that("1. Input checks", { + # 's2dv_cube' + expect_error( + CST_MergeRefToExp('a'), + "Parameter 'data1' must be of the class 's2dv_cube'." + ) + expect_error( + CST_MergeRefToExp(cube1, array(10)), + "Parameter 'data2' must be of the class 's2dv_cube'." + ) + # data + expect_error( + MergeRefToExp(10, 10), + "Parameters 'data1' and 'data2' must be arrays." + ) + expect_error( + MergeRefToExp(array(10), array(10)), + "Parameters 'data1' and 'data2' must have named dimensions." + ) + # time_dim + expect_error( + MergeRefToExp(data1 = ref1, data2 = data1, time_dim = 1), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + MergeRefToExp(data1 = ref1, data2 = data1, time_dim = 'time'), + paste0("Parameter 'time_dim' is not found in 'data1' or 'data2' dimension ", + "names.") + ) + # memb_dim + expect_error( + MergeRefToExp(data1 = ref1, data2 = data1, memb_dim = 1), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + MergeRefToExp(data1 = ref1, data2 = data1, memb_dim = 'time'), + paste0("Parameter 'memb_dim' is not found in 'data1' or 'data2' dimension. ", + "Set it to NULL if there is no member dimension.") + ) + # common dimensions + expect_error( + MergeRefToExp(data1 = array(1:12, c(sdate = 2, ftime = 2, dat = 3)), + data2 = array(1:16, c(sdate = 2, ftime = 2, var = 4)), + memb_dim = NULL), + paste0("Parameter 'data1' and 'data2' must have same length of ", + "all dimensions except 'memb_dim'.") + ) + expect_error( + MergeRefToExp(data1 = array(1:12, c(sdate = 2, ftime = 2, dat = 1)), + data2 = array(1:16, c(sdate = 2, ftime = 2)), + memb_dim = NULL), + paste0("Parameter 'data1' and 'data2' must have same length of ", + "all dimensions except 'memb_dim'.") + ) + # dates + expect_warning( + MergeRefToExp(data1 = array(1:4, c(sdate = 2, ftime = 2, lat = 1)), + data2 = array(1:16, c(sdate = 2, ftime = 2, lat = 1)), + memb_dim = NULL, start1 = list(1, 1), end1 = list(3, 1), + start2 = NULL, end2 = NULL), + paste0("Parameter 'dates' is NULL and the average of the ", + "full data provided in 'data' is computed.") + ) + expect_warning( + MergeRefToExp(data1 = ref1, + data2 = data1, dates1 = ref_dates1, dates2 = dates_data1, + start1 = c(3, 7), end1 = end1, + start2 = start2, end2 = end2), + paste0("Parameter 'start1' and 'end1' must be lists indicating the ", + "day and the month of the period start and end. Full data ", + "will be used.") + ) + expect_warning( + MergeRefToExp(data1 = ref1, + data2 = data1, dates1 = as.vector(ref_dates1), + dates2 = dates_data1, start1 = start1, end1 = end1, + start2 = start2, end2 = end2), + paste0("Parameter 'dates1' must have named dimensions if 'start' and ", + "'end' are not NULL. All 'data1' will be used.") + ) +}) + +########################################################################### + +test_that("2. Output checks: CST_MergeRefToExp", { + res1 <- CST_MergeRefToExp(data1 = cube_ref, data2 = cube1, + start1 = start1, end1 = end1, + start2 = start2, end2 = end2) + # dims + expect_equal( + dim(res1$data), + res1$dims + ) + # coords + expect_equal( + names(dim(res1$data)), + names(res1$coords) + ) + # Dates + expect_equal( + dim(res1$data)[c('ftime', 'sdate')], + dim(res1$attrs$Dates) + ) + # data + expect_equal( + res1$data[1:8,,1], + res1$data[1:8,,2] + ) +}) + +########################################################################### + +test_that("3. Output checks: MergeRefToExp", { + # Minimum dimensions + expect_equal( + MergeRefToExp(data1 = array(1:2, c(ftime = 2)), + data2 = array(1, c(ftime = 1)), memb_dim = NULL), + array(c(1,2,1), dim = c(ftime = 3)) + ) + # res2 + res2 <- MergeRefToExp(data1 = ref1, data2 = data1) + ## dims + expect_equal( + dim(res2), + c(ftime = 20, sdate = 2, member = 2) + ) + ## data + expect_equal( + res2[,1,], + array(c(1001:1010, 1:10, 1001:1010, 21:30), dim = c(ftime = 20, member = 2)) + ) + # res3: multiple different members + res3 <- MergeRefToExp(data1 = ref2, data2 = data2) + ## dims + expect_equal( + dim(res3), + c(ftime = 8, sdate = 1, member = 6) + ) + expect_equal( + as.vector(res3[1:5, 1, ]), + c(rep(1001:1005, 2), rep(1006:1010, 2), rep(1011:1015, 2)) + ) + expect_equal( + as.vector(res3[6:8, 1, ]), + rep(c(1:3, 4:6), 3) + ) +}) + +########################################################################### + +test_that("3. Output checks: Dates", { data_dates <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), as.Date("01-12-1993", "%d-%m-%Y", tz = 'UTC'), "day"), seq(as.Date("01-07-1994", "%d-%m-%Y", tz = 'UTC'), @@ -19,12 +203,12 @@ test_that("Sanity checks", { data$attrs$Dates <- data_dates class(data) <- 's2dv_cube' -suppressWarnings( - expect_equal(CST_MergeRefToExp(data1 = ref, data2 = data, start1 = list(21, 6), - end1 = list(30, 6), start2 = list(1, 7), - end2 = list(21, 9))$attrs$Dates, - SelectPeriodOnDates(ref_dates, start = list(21, 6), end = list(21,9))) -) + suppressWarnings( + expect_equal(CST_MergeRefToExp(data1 = ref, data2 = data, start1 = list(21, 6), + end1 = list(30, 6), start2 = list(1, 7), + end2 = list(21, 9))$attrs$Dates, + SelectPeriodOnDates(ref_dates, start = list(21, 6), end = list(21,9))) + ) output <- array(c(1172:1181, 1:83, 1537:1546, 155:237, 1172:1181, 309:391, 1537:1546, 463:545), c(ftime = 93, sdate = 2, member = 2)) @@ -33,14 +217,15 @@ suppressWarnings( CST_MergeRefToExp(data1 = ref, data2 = data, start1 = list(21, 6), end1 = list(30, 6), start2 = list(1, 7), end2 = list(21, 9))$data, - output) + output + ) # issue 13: One lead time data_dates <- c(as.Date("01-06-1993", "%d-%m-%Y", tz = 'UTC'), - as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), + as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), as.Date("01-06-1994", "%d-%m-%Y", tz = 'UTC'), - as.Date("01-07-1994","%d-%m-%Y", tz = 'UTC')) + as.Date("01-07-1994","%d-%m-%Y", tz = 'UTC')) dim(data_dates) <- c(ftime = 2, sdate = 2) ref_dates <- c(as.Date("01-05-1993", "%d-%m-%Y", tz = 'UTC'), @@ -63,7 +248,6 @@ suppressWarnings( as.Date("01-07-1994","%d-%m-%Y", tz = 'UTC')) dim(res_dates) <- c(ftime = 3, sdate = 2) - expect_equal( CST_MergeRefToExp(data1 = ref, data2 = data, start1 = list(1, 5), end1 = list(31, 5), start2 = list(1, 6), @@ -81,11 +265,11 @@ suppressWarnings( end2 = list(31, 7))$data, output ) - }) -test_that("Seasonal", { +########################################################################### +test_that("4. Test Seasonal", { dates <- NULL hcst.inityear <- 1993 hcst.endyear <- 2017 @@ -93,12 +277,11 @@ test_that("Seasonal", { dates <- c(dates, format(seq(as.Date(paste0("01-04-",year), "%d-%m-%Y", tz = 'UTC'), as.Date(paste0("01-11-",year), "%d-%m-%Y", - tz = 'UTC'), "day"), - "%Y-%m-%d")) + tz = 'UTC'), "day"), "%Y-%m-%d")) } dates <- as.Date(dates, tz = 'UTC') - dim.dates <- c(ftime=215, sweek = 1, sday = 1, - sdate=(hcst.endyear-hcst.inityear)+1) + dim.dates <- c(ftime = 215, sweek = 1, sday = 1, + sdate = (hcst.endyear - hcst.inityear) + 1) dim(dates) <- dim.dates ref <- NULL ref$data <- array(1:(215*25), c(ftime = 215, sdate = 25))