From b4708bf08aa5beff500752987dc1654bf82040b1 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 23 Nov 2022 15:49:51 +0100 Subject: [PATCH 01/13] Develop dat_dim parameter for BiasCorrection and update Analogs documentation --- R/CST_BiasCorrection.R | 197 +++++++++++++++++------ man/Analogs.Rd | 72 +++++---- man/BiasCorrection.Rd | 18 ++- man/CST_Analogs.Rd | 51 +++--- man/CST_BiasCorrection.Rd | 19 ++- tests/testthat/test-CST_BiasCorrection.R | 155 +++++++++++++++++- 6 files changed, 392 insertions(+), 120 deletions(-) diff --git a/R/CST_BiasCorrection.R b/R/CST_BiasCorrection.R index 20b51082..408bbb41 100644 --- a/R/CST_BiasCorrection.R +++ b/R/CST_BiasCorrection.R @@ -7,18 +7,27 @@ #' #'@param exp An object of class \code{s2dv_cube} as returned by \code{CST_Load} #' function, containing the seasonal forecast experiment data in the element -#' named \code{$data} +#' named \code{$data} with at least time and member dimensions. #'@param obs An object of class \code{s2dv_cube} as returned by \code{CST_Load} -#' function, containing the observed data in the element named \code{$data}. +#' function, containing the observed data in the element named \code{$data} +#' with at least time dimension. #'@param exp_cor An object of class \code{s2dv_cube} as returned by -#' \code{CST_Load} function, containing the seasonl forecast experiment to be -#' corrected. If it is NULL, the 'exp' forecast will be corrected. +#' \code{CST_Load} function, containing the seasonal forecast experiment to be +#' corrected with at least time dimension. If it is NULL, the 'exp' forecast +#' will be corrected. The dimensions must be the same as 'exp' except +#' dataset dimension. If there is only one corrected dataset, it should not +#' have dataset dimension. If there is a corresponding corrected dataset for +#' each 'exp' forecast, the dataset dimension must have the same length as in +#' 'exp'. The default value is NULL. #'@param na.rm A logical value indicating whether missing values should be #' stripped before the computation proceeds, by default it is set to FALSE. #'@param memb_dim A character string indicating the name of the member #' dimension. By default, it is set to 'member'. #'@param sdate_dim A character string indicating the name of the start date #' dimension. By default, it is set to 'sdate'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. #'@param ncores An integer that indicates the number of cores for parallel #' computations using multiApply function. The default value is NULL. #'@return An object of class \code{s2dv_cube} containing the bias corrected @@ -45,8 +54,8 @@ #'@import multiApply #'@export CST_BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, - memb_dim = 'member', sdate_dim = 'sdate', - ncores = NULL) { + memb_dim = 'member', sdate_dim = 'sdate', + dat_dim = NULL, ncores = NULL) { if (!inherits(exp, 's2dv_cube') || !inherits(obs, 's2dv_cube')) { stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", "as output by CSTools::CST_Load.") @@ -62,7 +71,7 @@ CST_BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, } BiasCorrected <- BiasCorrection(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, - memb_dim = memb_dim, sdate_dim = sdate_dim, + memb_dim = memb_dim, sdate_dim = sdate_dim, dat_dim = dat_dim, na.rm = na.rm, ncores = ncores) pos <- match(dimnames, names(dim(BiasCorrected))) @@ -92,19 +101,25 @@ CST_BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, #'standard deviation and mean to that of the reference dataset. #' #'@param exp A multidimensional array with named dimensions containing the -#' seasonal forecast experiment data with at least 'member' and 'sdate' -#' dimensions. +#' seasonal forecast experiment data with at least time and member dimensions. #'@param obs A multidimensional array with named dimensions containing the -#' observed data with at least 'sdate' dimension. +#' observed data with at least time dimension. #'@param exp_cor A multidimensional array with named dimensions containing the -#' seasonl forecast experiment to be corrected. If it is NULL, the 'exp' -#' forecast will be corrected. +#' seasonal forecast experiment to be corrected with at least time and member +#' dimension. If it is NULL, the 'exp' forecast will be corrected. The +#' dimensions must be the same as 'exp' except dataset dimension. If there is +#' only one corrected dataset, it should not have dataset dimension. If there +#' is a corresponding corrected dataset for each 'exp' forecast, the dataset +#' dimension must have the same length as in 'exp'. The default value is NULL. #'@param na.rm A logical value indicating whether missing values should be #' stripped before the computation proceeds, by default it is set to FALSE. #'@param memb_dim A character string indicating the name of the member #' dimension. By default, it is set to 'member'. #'@param sdate_dim A character string indicating the name of the start date #' dimension. By default, it is set to 'sdate'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. #'@param ncores An integer that indicates the number of cores for parallel #' computations using multiApply function. The default value is NULL. #' @@ -127,7 +142,7 @@ CST_BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, #'@export BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, memb_dim = 'member', sdate_dim = 'sdate', - ncores = NULL) { + dat_dim = NULL, ncores = NULL) { # Check inputs ## exp, obs if (!is.array(exp) || !is.numeric(exp)) { @@ -174,6 +189,57 @@ BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, if (dim(obs)[memb_dim] != 1) { stop("If parameter 'obs' has dimension 'memb_dim' its length must be equal to 1.") } + } else { + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = memb_dim) + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp, obs, and exp_cor (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of all dimensions", + " except 'memb_dim' and 'dat_dim'.")) + } + if (!is.null(exp_cor)) { + name_exp_cor <- sort(names(dim(exp_cor))) + name_exp <- sort(names(dim(exp))) + if (!is.null(dat_dim)) { + if (dat_dim %in% exp_cordims) { + if (!identical(dim(exp)[dat_dim], dim(exp_cor)[dat_dim])) { + stop(paste0("If parameter 'exp_cor' has dataset dimension, it must be", + " equal to dataset dimension of 'exp'.")) + } + name_exp_cor <- name_exp_cor[-which(name_exp_cor == dat_dim)] + target_dims_cor <- c(memb_dim, sdate_dim, dat_dim) + } else { + target_dims_cor <- c(memb_dim, sdate_dim) + } + name_exp <- name_exp[-which(name_exp == dat_dim)] + } else { + target_dims_cor <- c(memb_dim, sdate_dim) + } + if (!identical(length(name_exp), length(name_exp_cor)) | + !identical(dim(exp)[name_exp], dim(exp_cor)[name_exp_cor])) { + stop(paste0("Parameter 'exp' and 'exp_cor' must have the same length of ", + "all dimensions except 'dat_dim' if there is ", + "only one reference dataset.")) + } } ## na.rm if (!is.logical(na.rm)) { @@ -191,57 +257,94 @@ BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, stop("Parameter 'ncores' must be either NULL or a positive integer.") } } - - target_dims_obs <- sdate_dim - if (memb_dim %in% names(dim(obs))) { - target_dims_obs <- c(memb_dim, target_dims_obs) - } if (is.null(exp_cor)) { BiasCorrected <- Apply(data = list(var_obs = obs, var_exp = exp), - target_dims = list(target_dims_obs, - c(memb_dim, sdate_dim)), - fun = .sbc, + target_dims = list(c(memb_dim, sdate_dim, dat_dim), + c(memb_dim, sdate_dim, dat_dim)), + fun = .sbc, dat_dim = dat_dim, na.rm = na.rm, ncores = ncores)$output1 } else { BiasCorrected <- Apply(data = list(var_obs = obs, var_exp = exp, var_cor = exp_cor), - target_dims = list(target_dims_obs, - c(memb_dim, sdate_dim), - c(memb_dim, sdate_dim)), - fun = .sbc, - output_dims = c(memb_dim, sdate_dim), + target_dims = list(c(memb_dim, sdate_dim, dat_dim), + c(memb_dim, sdate_dim, dat_dim), + target_dims_cor), + fun = .sbc, dat_dim = dat_dim, na.rm = na.rm, ncores = ncores)$output1 } + if (!is.null(dat_dim)) { + pos <- match(c(names(dim(exp))[-which(names(dim(exp)) == dat_dim)], 'nexp', 'nobs'), + names(dim(BiasCorrected))) + BiasCorrected <- aperm(BiasCorrected, pos) + } + return(BiasCorrected) } -.sbc <- function(var_obs, var_exp , var_cor = NULL, na.rm = FALSE) { +.sbc <- function(var_obs, var_exp, var_cor = NULL, dat_dim = NULL, na.rm = FALSE) { - ntime <- dim(var_exp)[2] - corrected <- NA * var_exp + # exp: [memb, sdate, (dat)] + # obs: [memb, sdate, (dat)] + # ref: [memb, sdate, (dat)] or NULL - if (is.null(var_cor)) { - for (t in 1:ntime) { - # parameters - sd_obs <- sd(var_obs[-t], na.rm = na.rm) - sd_exp <- sd(var_exp[, -t], na.rm = na.rm) - clim_exp <- mean(var_exp[, -t], na.rm = na.rm) - clim_obs <- mean(var_obs[-t], na.rm = na.rm) - - # bias corrected forecast - corrected[, t] <- ((var_exp[, t] - clim_exp) * (sd_obs / sd_exp)) + clim_obs + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + var_exp <- InsertDim(var_exp, posdim = 3, lendim = 1, name = 'dataset') + var_obs <- InsertDim(var_obs, posdim = 3, lendim = 1, name = 'dataset') + if (!is.null(var_cor)) { + var_cor <- InsertDim(var_cor, posdim = 3, lendim = 1, name = 'dataset') } } else { - # parameters - sd_obs <- sd(var_obs, na.rm = na.rm) - sd_exp <- sd(var_exp, na.rm = na.rm) - clim_exp <- mean(var_exp, na.rm = na.rm) - clim_obs <- mean(var_obs, na.rm = na.rm) - - # bias corrected forecast - corrected <- ((var_cor - clim_exp) * (sd_obs / sd_exp)) + clim_obs + nexp <- as.numeric(dim(var_exp)[dat_dim]) + nobs <- as.numeric(dim(var_obs)[dat_dim]) + } + + if (!is.null(var_cor)) { + if (length(dim(var_cor)) == 2) { # ref: [memb, sdate] + cor_dat_dim <- FALSE + } else { + cor_dat_dim <- TRUE + } + } + + ntime <- dim(var_exp)[2] + corrected <- array(dim = c(dim(var_exp)[1:2], nexp = nexp, nobs = nobs)) + + for (i in 1:nexp) { + for (j in 1:nobs) { + if (is.null(var_cor)) { + for (t in 1:ntime) { + # parameters + sd_obs <- sd(var_obs[, -t, j], na.rm = na.rm) + sd_exp <- sd(var_exp[, -t, i], na.rm = na.rm) + clim_exp <- mean(var_exp[, -t, i], na.rm = na.rm) + clim_obs <- mean(var_obs[, -t, j], na.rm = na.rm) + + # bias corrected forecast + corrected[, t, i, j] <- ((var_exp[, t, i] - clim_exp) * (sd_obs / sd_exp)) + clim_obs + } + } else { + # parameters + sd_obs <- sd(var_obs[, , j], na.rm = na.rm) + sd_exp <- sd(var_exp[, , i], na.rm = na.rm) + clim_exp <- mean(var_exp[, , i], na.rm = na.rm) + clim_obs <- mean(var_obs[, , j], na.rm = na.rm) + + # bias corrected forecast + if (isTRUE(cor_dat_dim)) { + corrected[, , i, j] <- ((var_cor[, , i] - clim_exp) * (sd_obs / sd_exp)) + clim_obs + } else { + corrected[, , i, j] <- ((var_cor - clim_exp) * (sd_obs / sd_exp)) + clim_obs + } + } + } + } + + if (is.null(dat_dim)) { + dim(corrected) <- dim(var_exp)[1:2] } return(corrected) diff --git a/man/Analogs.Rd b/man/Analogs.Rd index fc26a552..a7addc73 100644 --- a/man/Analogs.Rd +++ b/man/Analogs.Rd @@ -20,56 +20,56 @@ Analogs( region = NULL, nAnalogs = NULL, AnalogsInfo = FALSE, - ncores = 1 + ncores = NULL ) } \arguments{ -\item{expL}{an array of N named dimensions containing the experimental field +\item{expL}{An array of N named dimensions containing the experimental field on the large scale for which the analog is aimed. This field is used to in all the criterias. If parameter 'expVar' is not provided, the function will return the expL analog. The element 'data' in the 's2dv_cube' object must -have, at least, latitudinal and longitudinal dimensions. The object is expect -to be already subset for the desired large scale region.} +have, at least, latitudinal and longitudinal dimensions. The object is +expect to be already subset for the desired large scale region.} -\item{obsL}{an array of N named dimensions containing the observational field +\item{obsL}{An array of N named dimensions containing the observational field on the large scale. The element 'data' in the 's2dv_cube' object must have the same latitudinal and longitudinal dimensions as parameter 'expL' and a single temporal dimension with the maximum number of available observations.} -\item{time_obsL}{a character string indicating the date of the observations +\item{time_obsL}{A character string indicating the date of the observations in the format "dd/mm/yyyy". Reference time to search for analogs.} -\item{time_expL}{an array of N named dimensions (coinciding with time +\item{time_expL}{An array of N named dimensions (coinciding with time dimensions in expL) of character string(s) indicating the date(s) of the experiment in the format "dd/mm/yyyy". Time(s) to find the analogs.} -\item{lonL}{a vector containing the longitude of parameter 'expL'.} +\item{lonL}{A vector containing the longitude of parameter 'expL'.} -\item{latL}{a vector containing the latitude of parameter 'expL'.} +\item{latL}{A vector containing the latitude of parameter 'expL'.} -\item{expVar}{an array of N named dimensions containing the experimental +\item{expVar}{An array of N named dimensions containing the experimental field on the local scale, usually a different variable to the parameter 'expL'. If it is not NULL (by default, NULL), the returned field by this function will be the analog of parameter 'expVar'.} -\item{obsVar}{an array of N named dimensions containing the field of the +\item{obsVar}{An array of N named dimensions containing the field of the same variable as the passed in parameter 'expVar' for the same region.} \item{criteria}{a character string indicating the criteria to be used for the selection of analogs: -\itemize{ -\item{Large_dist} minimum Euclidean distance in the large scale pattern; -\item{Local_dist} minimum Euclidean distance in the large scale pattern -and minimum Euclidean distance in the local scale pattern; and -\item{Local_cor} minimum Euclidean distance in the large scale pattern, -minimum Euclidean distance in the local scale pattern and highest -correlation in the local variable to downscale.}} - -\item{excludeTime}{an array of N named dimensions (coinciding with time + \itemize{\item{Large_dist} minimum Euclidean distance in the large scale pattern; + \item{Local_dist} minimum Euclidean distance in the large scale pattern + and minimum Euclidean distance in the local scale pattern; and + \item{Local_cor} minimum Euclidean distance in the large scale pattern, + minimum Euclidean distance in the local scale pattern and highest + correlation in the local variable to downscale.}} + +\item{excludeTime}{An array of N named dimensions (coinciding with time dimensions in expL) of character string(s) indicating the date(s) of the observations in the format "dd/mm/yyyy" to be excluded during the search of -analogs. It can be NULL but if expL is not a forecast (time_expL contained in -time_obsL),by default time_expL will be removed during the search of analogs.} +analogs. It can be NULL but if expL is not a forecast (time_expL contained +in time_obsL), by default time_expL will be removed during the search of +analogs.} \item{lonVar}{a vector containing the longitude of parameter 'expVar'.} @@ -88,23 +88,25 @@ NULL for 'Local_dist' and 'Local_cor' the default value will be set at the length of 'time_obsL'. If AnalogsInfo is FALSE the function returns just the best analog.} -\item{AnalogsInfo}{TRUE to get a list with two elements: 1) the downscaled -field and 2) the AnalogsInfo which contains: a) the number of the best +\item{AnalogsInfo}{A logical value. If it is TRUE it returns a list +with two elements: 1) the downscaled field and +2) the AnalogsInfo which contains: a) the number of the best analogs, b) the corresponding value of the metric used in the selected criteria (distance values for Large_dist and Local_dist,correlation values -for Local_cor), c)dates of the analogs). The analogs are listed in decreasing -order, the first one is the best analog (i.e if the selected criteria is -Local_cor the best analog will be the one with highest correlation, while for -Large_dist criteria the best analog will be the day with minimum Euclidean -distance). Set to FALSE to get a single analog, the best analog, for instance -for downscaling.} +for Local_cor), c)dates of the analogs). The analogs are listed in +decreasing order, the first one is the best analog (i.e if the selected +criteria is Local_cor the best analog will be the one with highest +correlation, while for Large_dist criteria the best analog will be the day +with minimum Euclidean distance). Set to FALSE to get a single analog, the +best analog, for instance for downscaling.} \item{ncores}{the number of cores to use in parallel computation.} } \value{ -AnalogsFields, dowscaled values of the best analogs for the criteria -selected. If AnalogsInfo is set to TRUE the function also returns a -list with the dowsncaled field and the Analogs Information. +An array with the dowscaled values of the best analogs for the criteria +selected. If 'AnalogsInfo' is set to TRUE it returns a list with an array +of the dowsncaled field and the analogs information in elements 'analogs', +'metric' and 'dates'. } \description{ This function perform a downscaling using Analogs. To compute @@ -156,7 +158,7 @@ downscale_field <- Analogs(expL = expSLP, obsL = obsSLP, obsVar = obs.pr, obsSLP <- c(rnorm(1:1980), expSLP * 1.5) dim(obsSLP) <- c(lat = 4, lon = 5, time = 100) time_obsSLP <- paste(rep("01", 100), rep("01", 100), 1920 : 2019, sep = "-") -downscale_field<- Analogs(expL = expSLP, obsL = obsSLP, time_obsSLP, +downscale_field <- Analogs(expL = expSLP, obsL = obsSLP, time_obsSLP, nAnalogs = 5, time_expL = "01-01-2003", AnalogsInfo = TRUE, excludeTime = "01-01-2003") @@ -170,7 +172,7 @@ downscale_field <- Analogs(expL = expSLP, obsL = obsSLP, obsVar = obs.pr, # Example 5: Downscaling using criteria 'Local_dist' and 2 variables: # analogs of local scale using criteria 2 -region=c(lonmin = -1 ,lonmax = 2, latmin = 30, latmax = 33) +region = c(lonmin = -1 ,lonmax = 2, latmin = 30, latmax = 33) Local_scale <- Analogs(expL = expSLP, obsL = obsSLP, time_obsL = time_obsSLP, obsVar = obs.pr, criteria = "Local_dist", lonL = seq(-1, 5, 1.5),latL = seq(30, 35, 1.5), diff --git a/man/BiasCorrection.Rd b/man/BiasCorrection.Rd index d944a32f..04be3e25 100644 --- a/man/BiasCorrection.Rd +++ b/man/BiasCorrection.Rd @@ -11,20 +11,24 @@ BiasCorrection( na.rm = FALSE, memb_dim = "member", sdate_dim = "sdate", + dat_dim = NULL, ncores = NULL ) } \arguments{ \item{exp}{A multidimensional array with named dimensions containing the -seasonal forecast experiment data with at least 'member' and 'sdate' -dimensions.} +seasonal forecast experiment data with at least time and member dimensions.} \item{obs}{A multidimensional array with named dimensions containing the -observed data with at least 'sdate' dimension.} +observed data with at least time dimension.} \item{exp_cor}{A multidimensional array with named dimensions containing the -seasonl forecast experiment to be corrected. If it is NULL, the 'exp' -forecast will be corrected.} +seasonal forecast experiment to be corrected with at least time and member +dimension. If it is NULL, the 'exp' forecast will be corrected. The +dimensions must be the same as 'exp' except dataset dimension. If there is +only one corrected dataset, it should not have dataset dimension. If there +is a corresponding corrected dataset for each 'exp' forecast, the dataset +dimension must have the same length as in 'exp'. The default value is NULL.} \item{na.rm}{A logical value indicating whether missing values should be stripped before the computation proceeds, by default it is set to FALSE.} @@ -35,6 +39,10 @@ dimension. By default, it is set to 'member'.} \item{sdate_dim}{A character string indicating the name of the start date dimension. By default, it is set to 'sdate'.} +\item{dat_dim}{A character string indicating the name of dataset dimension. +The length of this dimension can be different between 'exp' and 'obs'. +The default value is NULL.} + \item{ncores}{An integer that indicates the number of cores for parallel computations using multiApply function. The default value is NULL.} } diff --git a/man/CST_Analogs.Rd b/man/CST_Analogs.Rd index 7a67b0fb..16c71522 100644 --- a/man/CST_Analogs.Rd +++ b/man/CST_Analogs.Rd @@ -20,30 +20,30 @@ CST_Analogs( ) } \arguments{ -\item{expL}{an 's2dv_cube' object containing the experimental field on the +\item{expL}{An 's2dv_cube' object containing the experimental field on the large scale for which the analog is aimed. This field is used to in all the criterias. If parameter 'expVar' is not provided, the function will return the expL analog. The element 'data' in the 's2dv_cube' object must have, at least, latitudinal and longitudinal dimensions. The object is expect to be already subset for the desired large scale region.} -\item{obsL}{an 's2dv_cube' object containing the observational field on the +\item{obsL}{An 's2dv_cube' object containing the observational field on the large scale. The element 'data' in the 's2dv_cube' object must have the same latitudinal and longitudinal dimensions as parameter 'expL' and a temporal dimension with the maximum number of available observations.} -\item{expVar}{an 's2dv_cube' object containing the experimental field on the +\item{expVar}{An 's2dv_cube' object containing the experimental field on the local scale, usually a different variable to the parameter 'expL'. If it is not NULL (by default, NULL), the returned field by this function will be the analog of parameter 'expVar'.} -\item{obsVar}{an 's2dv_cube' containing the field of the same variable as the +\item{obsVar}{An 's2dv_cube' containing the field of the same variable as the passed in parameter 'expVar' for the same region.} -\item{region}{a vector of length four indicating the minimum longitude, +\item{region}{A vector of length four indicating the minimum longitude, the maximum longitude, the minimum latitude and the maximum latitude.} -\item{criteria}{a character string indicating the criteria to be used for the +\item{criteria}{A character string indicating the criteria to be used for the selection of analogs: \itemize{ \item{Large_dist} minimum Euclidean distance in the large scale pattern; @@ -55,21 +55,21 @@ correlation in the local variable to downscale.} Criteria 'Large_dist' is recommended for CST_Analogs, for an advanced use of the criterias 'Local_dist' and 'Local_cor' use 'Analogs' function.} -\item{excludeTime}{an array of N named dimensions (coinciding with time +\item{excludeTime}{An array of N named dimensions (coinciding with time dimensions in expL)of character string(s) indicating the date(s) of the observations in the format "dd/mm/yyyy" to be excluded during the search of analogs. It can be NULL but if expL is not a forecast (time_expL contained in time_obsL), by default time_expL will be removed during the search of analogs.} -\item{time_expL}{a character string indicating the date of the experiment +\item{time_expL}{A character string indicating the date of the experiment in the same format than time_obsL (i.e. "yyyy-mm-dd"). By default it is NULL and dates are taken from element \code{$Dates$start} from expL.} -\item{time_obsL}{a character string indicating the date of the observations +\item{time_obsL}{A character string indicating the date of the observations in the date format (i.e. "yyyy-mm-dd"). By default it is NULL and dates are taken from element \code{$Dates$start} from obsL.} -\item{nAnalogs}{number of Analogs to be selected to apply the criterias +\item{nAnalogs}{Number of Analogs to be selected to apply the criterias 'Local_dist' or 'Local_cor'. This is not the necessary the number of analogs that the user can get, but the number of events with minimum distance in which perform the search of the best Analog. The default value for the @@ -79,22 +79,24 @@ NULL for 'Local_dist' and 'Local_cor' the default value will be set at the length of 'time_obsL'. If AnalogsInfo is FALSE the function returns just the best analog.} -\item{AnalogsInfo}{TRUE to get a list with two elements: 1) the downscaled -field and 2) the AnalogsInfo which contains: a) the number of the best -analogs, b) the corresponding value of the metric used in the selected -criteria (distance values for Large_dist and Local_dist,correlation values -for Local_cor), c)dates of the analogs). The analogs are listed in decreasing -order, the first one is the best analog (i.e if the selected criteria is -Local_cor the best analog will be the one with highest correlation, while for -Large_dist criteria the best analog will be the day with minimum Euclidean -distance). Set to FALSE to get a single analog, the best analog, for instance -for downscaling.} +\item{AnalogsInfo}{A logical value. TRUE to get a list with two elements: +1) the downscaled field and 2) the AnalogsInfo which contains: +a) the number of the best analogs, b) the corresponding value of the metric +used in the selected criteria (distance values for Large_dist and Local_dist, +correlation values for Local_cor), c)dates of the analogs). The analogs are +listed in decreasing order, the first one is the best analog (i.e if the +selected criteria is Local_cor the best analog will be the one with highest +correlation, while for Large_dist criteria the best analog will be the day +with minimum Euclidean distance). Set to FALSE to get a single analog, the +best analog, for instance for downscaling.} \item{ncores}{The number of cores to use in parallel computation} } \value{ -An 'array' object containing the dowscaled values of the best -analogs. +An 's2dv_cube' object containing an array with the dowscaled values of +the best analogs in element 'data'. If 'AnalogsInfo' is TRUE, 'data' is a list +with an array of the downscaled fields and the analogs information in +elements 'analogs', 'metric' and 'dates'. } \description{ This function perform a downscaling using Analogs. To compute @@ -124,10 +126,10 @@ function within 'CSTools' package. } \examples{ expL <- rnorm(1:200) -dim(expL) <- c(member=10,lat = 4, lon = 5) +dim(expL) <- c(member = 10,lat = 4, lon = 5) obsL <- c(rnorm(1:180),expL[1,,]*1.2) dim(obsL) <- c(time = 10,lat = 4, lon = 5) -time_obsL <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") +time_obsL <- paste(rep("01", 10), rep("01", 10), 1994:2003, sep = "-") time_expL <- time_obsL[1] lon <- seq(-1,5,1.5) lat <- seq(30,35,1.5) @@ -137,6 +139,7 @@ obsL <- s2dv_cube(data = obsL, lat = lat, lon = lon, Dates = list(start = time_obsL, end = time_obsL)) region <- c(min(lon), max(lon), min(lat), max(lat)) downscaled_field <- CST_Analogs(expL = expL, obsL = obsL, region = region) + } \references{ Yiou, P., T. Salameh, P. Drobinski, L. Menut, R. Vautard, diff --git a/man/CST_BiasCorrection.Rd b/man/CST_BiasCorrection.Rd index fb96babd..ce7df579 100644 --- a/man/CST_BiasCorrection.Rd +++ b/man/CST_BiasCorrection.Rd @@ -11,20 +11,27 @@ CST_BiasCorrection( na.rm = FALSE, memb_dim = "member", sdate_dim = "sdate", + dat_dim = NULL, ncores = NULL ) } \arguments{ \item{exp}{An object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal forecast experiment data in the element -named \code{$data}} +named \code{$data} with at least time and member dimensions.} \item{obs}{An object of class \code{s2dv_cube} as returned by \code{CST_Load} -function, containing the observed data in the element named \code{$data}.} +function, containing the observed data in the element named \code{$data} +with at least time dimension.} \item{exp_cor}{An object of class \code{s2dv_cube} as returned by -\code{CST_Load} function, containing the seasonl forecast experiment to be -corrected. If it is NULL, the 'exp' forecast will be corrected.} +\code{CST_Load} function, containing the seasonal forecast experiment to be +corrected with at least time dimension. If it is NULL, the 'exp' forecast +will be corrected. The dimensions must be the same as 'exp' except +dataset dimension. If there is only one corrected dataset, it should not +have dataset dimension. If there is a corresponding corrected dataset for +each 'exp' forecast, the dataset dimension must have the same length as in +'exp'. The default value is NULL.} \item{na.rm}{A logical value indicating whether missing values should be stripped before the computation proceeds, by default it is set to FALSE.} @@ -35,6 +42,10 @@ dimension. By default, it is set to 'member'.} \item{sdate_dim}{A character string indicating the name of the start date dimension. By default, it is set to 'sdate'.} +\item{dat_dim}{A character string indicating the name of dataset dimension. +The length of this dimension can be different between 'exp' and 'obs'. +The default value is NULL.} + \item{ncores}{An integer that indicates the number of cores for parallel computations using multiApply function. The default value is NULL.} } diff --git a/tests/testthat/test-CST_BiasCorrection.R b/tests/testthat/test-CST_BiasCorrection.R index 826fcf12..62740f0d 100644 --- a/tests/testthat/test-CST_BiasCorrection.R +++ b/tests/testthat/test-CST_BiasCorrection.R @@ -18,16 +18,12 @@ attr(obs, 'class') <- 's2dv_cube' exp1 <- list(data = array(1:20, dim = c(time = 20))) class(exp1) <- 's2dv_cube' - obs1 <- list(data = array(1:20, dim = c(time = 20))) class(obs1) <- 's2dv_cube' - exp1_2 <- list(data = array(1:20, dim = c(20))) class(exp1_2) <- 's2dv_cube' - obs1_2 <- list(data = array(1:20, dim = c(20))) class(obs1_2) <- 's2dv_cube' - exp_cor1 <- list(data = array(1:20, dim = c(20))) class(exp_cor1) <- 's2dv_cube' @@ -49,9 +45,35 @@ obs4 <- array(1:200, dim = c(time = 5, members = 1, lat = 2, lon = 5)) obs4_1 <- obs4 obs4_1[1,1,1,1] <- NA +# dat5 +set.seed(1) +exp5 <- array(rnorm(80), dim = c(member = 2, sdate = 10, lat = 2, dataset = 2)) +exp5_1 <- array(rnorm(40), dim = c(member = 2, sdate = 10, lat = 2)) +set.seed(2) +obs5 <- array(rnorm(60), dim = c(sdate = 10, lat = 2, dataset = 3)) +obs5_1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2)) +set.seed(3) +exp_cor5 <- array(rnorm(20), dim = c(member = 2, sdate = 10, lat = 2)) + +# dat6 +set.seed(1) +exp6 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) +exp6_1 <- array(exp6, dim = c(member = 2, sdate = 10, dataset = 1)) +exp6_2 <- exp6_1 +exp6_2[1] <- NA +set.seed(2) +obs6 <- array(rnorm(10), dim = c(member = 1, sdate = 10)) +obs6_1 <- array(obs6, dim = c(member = 1, sdate = 10, dataset = 1)) +obs6_2 <- obs6_1 +obs6_2[c(1, 3)] <- NA +set.seed(3) +exp_cor6 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) + + + ############################################## -test_that("1. Inpput checks", { +test_that("1. Input checks", { # s2dv_cube expect_error( CST_BiasCorrection(exp = 1), @@ -121,6 +143,41 @@ test_that("1. Inpput checks", { BiasCorrection(exp = exp3, obs = obs3_3), paste0("If parameter 'obs' has dimension 'memb_dim' its length must be equal to 1.") ) + ## dat_dim + expect_error( + BiasCorrection(exp = exp3, obs = obs3, dat_dim = 1), + paste0("Parameter 'dat_dim' must be a character string.") + ) + expect_error( + BiasCorrection(exp = exp3, obs = obs3, dat_dim = 'dataset'), + paste0("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + ) + ## exp, obs, and exp_cor (2) + expect_error( + BiasCorrection(exp = array(1:6, c(sdate = 3, member = 2, dataset = 2, lon = 1)), + obs = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 2)), + dat_dim = 'dataset'), + paste0("Parameter 'exp' and 'obs' must have same length of all dimensions", + " except 'memb_dim' and 'dat_dim'.") + ) + expect_error( + BiasCorrection(exp = array(1:6, c(sdate = 3, member = 2, dataset = 2, lon = 1)), + obs = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 1)), + exp_cor = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 1)), + dat_dim = 'dataset'), + paste0("If parameter 'exp_cor' has dataset dimension, it must be", + " equal to dataset dimension of 'exp'.") + ) + expect_error( + BiasCorrection(exp = array(1:6, c(sdate = 3, member = 2, dataset = 2, lon = 1)), + obs = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 1)), + exp_cor = array(1:6, c(sdate = 3, member = 1, lon = 1)), + dat_dim = 'dataset'), + paste0("Parameter 'exp' and 'exp_cor' must have the same length of ", + "all dimensions except 'dat_dim' if there is ", + "only one reference dataset.") + ) ## na.rm expect_warning( CST_BiasCorrection(exp = exp, obs = obs, na.rm = 1), @@ -182,3 +239,91 @@ test_that("2. Output checks: dat1", { ) ) }) + +############################################## + +test_that("3. Output checks: dat5", { + expect_equal( + dim(BiasCorrection(exp5, obs5, memb_dim = 'member', dat_dim = 'dataset')), + c(member = 2, sdate = 10, lat = 2, nexp = 2, nobs = 3) + ) + expect_equal( + dim(BiasCorrection(exp5, obs5, exp_cor5, memb_dim = 'member', dat_dim = 'dataset')), + c(member = 2, sdate = 10, lat = 2, nexp = 2, nobs = 3) + ) + expect_equal( + as.vector(BiasCorrection(exp5, obs5, memb_dim = 'member', dat_dim = 'dataset'))[5:10], + c(0.1466060, -0.9764600, 0.6914021, 0.9330733, 0.6567210, -0.3036642), + tolerance = 0.0001 + ) + expect_equal( + as.vector(BiasCorrection(exp5, obs5, exp_cor5, memb_dim = 'member', dat_dim = 'dataset'))[5:10], + c(0.21682367, 0.03815268, 0.09778966, 1.20997987, -1.30893321, 1.37258011), + tolerance = 0.0001 + ) + expect_equal( + as.vector(BiasCorrection(exp5[, , , 1], obs5[, , 1], memb_dim = 'member'))[1:5], + as.vector(BiasCorrection(exp5, obs5, memb_dim = 'member', dat_dim = 'dataset')[, , , 1, 1][1:5]) + ) + expect_equal( + as.vector(BiasCorrection(exp5[, , , 1], obs5[, , 1], exp_cor5, memb_dim = 'member'))[1:5], + as.vector(BiasCorrection(exp5, obs5, exp_cor5, memb_dim = 'member', dat_dim = 'dataset')[, , , 1, 1][1:5]) + ) + expect_equal( + as.vector(BiasCorrection(exp5, obs5, exp_cor5, memb_dim = 'member', dat_dim = 'dataset', na.rm = TRUE))[1:5], + c(-1.0318284, -0.3098404, 0.2847780, -1.2369666, 0.2168237), + tolerance = 0.0001 + ) +}) + +############################################## +test_that("4. Output checks: dat6", { + expect_equal( + dim(BiasCorrection(exp6, obs6)), + c(member = 2, sdate = 10) + ) + expect_equal( + as.vector(BiasCorrection(exp6, obs6))[1:5], + c(-0.5430181, 0.2807323, -0.9954539, 1.9298249, 0.1466060), + tolerance = 0.0001 + ) + expect_equal( + as.vector(BiasCorrection(exp6, obs6, exp_cor6))[1:5], + c(-1.0318284, -0.3098404, 0.2847780, -1.2369666, 0.2168237), + tolerance = 0.0001 + ) + expect_equal( + dim(BiasCorrection(exp6_1, obs6_1, dat_dim = 'dataset')), + c(member = 2, sdate = 10, nexp = 1, nobs = 1) + ) + expect_equal( + as.vector(BiasCorrection(exp6_1, obs6_1, dat_dim = 'dataset')), + as.vector(BiasCorrection(exp6, obs6)), + tolerance = 0.0001 + ) + expect_equal( + as.vector(BiasCorrection(exp6_1, obs6_1, exp_cor6, dat_dim = 'dataset')), + as.vector(BiasCorrection(exp6, obs6, exp_cor6)), + tolerance = 0.0001 + ) + expect_equal( + suppressWarnings( + as.vector(BiasCorrection(exp6_1, obs6_2, dat_dim = 'dataset')) + ), + rep(as.numeric(NA), 20) + ) + expect_equal( + suppressWarnings( + as.vector(BiasCorrection(exp6_1, obs6_2, dat_dim = 'dataset', na.rm = T))[5:10] + ), + c(0.2644706, -0.8392515, 0.6458045, 0.8511290, 0.5959483, -0.2908764), + tolerance = 0.0001 + ) + expect_equal( + suppressWarnings( + as.vector(BiasCorrection(exp6_2, obs6_2, exp_cor6, dat_dim = 'dataset', na.rm = T))[5:10] + ), + c(0.14077312, -0.02076059, 0.03315629, 1.03867041, -1.23864029, 1.18567478), + tolerance = 0.0001 + ) +}) -- GitLab From 3db1ead272dc777e6679c8ab5b4d7d026acfe0ad Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 25 Nov 2022 17:52:24 +0100 Subject: [PATCH 02/13] Develop dat_dim for Calibration() --- R/CST_Calibration.R | 768 +++++++++++++++++--------- man/CST_Calibration.Rd | 77 ++- man/Calibration.Rd | 149 +++-- tests/testthat/test-CST_Calibration.R | 306 +++++++++- 4 files changed, 945 insertions(+), 355 deletions(-) diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index f03d5d05..df5aaebc 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -2,22 +2,65 @@ #' #'@author Verónica Torralba, \email{veronica.torralba@bsc.es} #'@author Bert Van Schaeybroeck, \email{bertvs@meteo.be} -#'@description Equivalent to function \code{Calibration} but for objects of class \code{s2dv_cube}. +#'@description Equivalent to function \code{Calibration} but for objects of +#'class \code{s2dv_cube}. #' -#'@param exp an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal hindcast experiment data in the element named \code{$data}. The hindcast is used to calibrate the forecast in case the forecast is provided; if not, the same hindcast will be calibrated instead. -#'@param obs an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the observed data in the element named \code{$data}. -#'@param exp_cor an optional object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal forecast experiment data in the element named \code{$data}. If the forecast is provided, it will be calibrated using the hindcast and observations; if not, the hindcast will be calibrated instead. -#'@param cal.method is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or \code{rpc-based}. Default value is \code{mse_min}. -#'@param eval.method is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation. In case the forecast is provided, any chosen eval.method is over-ruled and a third option is used. -#'@param multi.model is a boolean that is used only for the \code{mse_min} method. If multi-model ensembles or ensembles of different sizes are used, it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences between the two approaches are generally small but may become large when using small ensemble sizes. Using multi.model when the calibration method is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result. -#'@param na.fill is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned. -#'@param na.rm is a boolean that indicates whether to remove the NA values or not. The default value is \code{TRUE}. See Details section for further information about its use and compatibility with \code{na.fill}. -#'@param apply_to is a character string that indicates whether to apply the calibration to all the forecast (\code{"all"}) or only to those where the correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}. -#'@param alpha is a numeric value indicating the significance level for the correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}. -#'@param memb_dim is a character string indicating the name of the member dimension. By default, it is set to 'member'. -#'@param sdate_dim is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'. -#'@param ncores is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one. -#'@return an object of class \code{s2dv_cube} containing the calibrated forecasts in the element \code{$data} with the same dimensions as the one in the exp object. +#'@param exp An object of class \code{s2dv_cube} as returned by \code{CST_Load} +#' function, containing the seasonal hindcast experiment data in the element +#' named \code{$data}. The hindcast is used to calibrate the forecast in case +#' the forecast is provided; if not, the same hindcast will be calibrated +#' instead. +#'@param obs An object of class \code{s2dv_cube} as returned by \code{CST_Load} +#' function, containing the observed data in the element named \code{$data}. +#'@param exp_cor An optional object of class \code{s2dv_cube} as returned by +#' \code{CST_Load} function, containing the seasonal forecast experiment data +#' in the element named \code{$data}. If the forecast is provided, it will be +#' calibrated using the hindcast and observations; if not, the hindcast will +#' be calibrated instead. The dimensions must be the same as 'exp' except +#' dataset dimension. If there is only one corrected dataset, it should not +#' have dataset dimension. If there is a corresponding corrected dataset for +#' each 'exp' forecast, the dataset dimension must have the same length as in +#' 'exp'. The default value is NULL. +#'@param cal.method A character string indicating the calibration method used, +#' can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or +#' \code{rpc-based}. Default value is \code{mse_min}. +#'@param eval.method A character string indicating the sampling method used, it +#' can be either \code{in-sample} or \code{leave-one-out}. Default value is the +#' \code{leave-one-out} cross validation. In case the forecast is provided, any +#' chosen eval.method is over-ruled and a third option is used. +#'@param multi.model A boolean that is used only for the \code{mse_min} +#' method. If multi-model ensembles or ensembles of different sizes are used, +#' it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences +#' between the two approaches are generally small but may become large when +#' using small ensemble sizes. Using multi.model when the calibration method is +#' \code{bias}, \code{evmos} or \code{crps_min} will not affect the result. +#'@param na.fill A boolean that indicates what happens in case calibration is +#' not possible or will yield unreliable results. This happens when three or +#' less forecasts-observation pairs are available to perform the training phase +#' of the calibration. By default \code{na.fill} is set to true such that NA +#' values will be returned. If \code{na.fill} is set to false, the uncorrected +#' data will be returned. +#'@param na.rm A boolean that indicates whether to remove the NA values or not. +#' The default value is \code{TRUE}. See Details section for further +#' information about its use and compatibility with \code{na.fill}. +#'@param apply_to A character string that indicates whether to apply the +#' calibration to all the forecast (\code{"all"}) or only to those where the +#' correlation between the ensemble mean and the observations is statistically +#' significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}. +#'@param alpha A numeric value indicating the significance level for the +#' correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}. +#'@param memb_dim A character string indicating the name of the member dimension. +#' By default, it is set to 'member'. +#'@param sdate_dim A character string indicating the name of the start date +#' dimension. By default, it is set to 'sdate'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param ncores An integer that indicates the number of cores for parallel +#' computations using multiApply function. The default value is one. +#'@return An object of class \code{s2dv_cube} containing the calibrated forecasts +#' in the element \code{$data} with the same dimensions as the one in the exp +#' object. #' #'@importFrom s2dv InsertDim #'@import abind @@ -61,79 +104,144 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", eval.method = "leave-one-out", multi.model = FALSE, na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL, - memb_dim = 'member', sdate_dim = 'sdate', ncores = 1) { - - if(!missing(multi.model) & !(cal.method == "mse_min")){ - warning(paste0("The multi.model parameter is ignored when using the calibration method ", cal.method)) - } - - if(is.null(exp_cor)){ #exp will be used to calibrate and will also be calibrated: "calibrate hindcast" - if (!inherits(exp, "s2dv_cube") || !inherits(obs, "s2dv_cube")) { + memb_dim = 'member', sdate_dim = 'sdate', dat_dim = NULL, ncores = NULL) { + + if (!inherits(exp, "s2dv_cube") || !inherits(obs, "s2dv_cube")) { stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", - "as output by CSTools::CST_Load.") + "as output by CSTools::CST_Load.") + } + if (!is.null(exp_cor)) { + if (!inherits(exp_cor, "s2dv_cube")) { + stop("Parameter 'exp', 'obs' and 'exp_cor' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") } - exp$data <- Calibration(exp = exp$data, obs = obs$data, exp_cor = NULL, - cal.method = cal.method, - eval.method = eval.method, - multi.model = multi.model, - na.fill = na.fill, na.rm = na.rm, - apply_to = apply_to, alpha = alpha, - memb_dim = memb_dim, sdate_dim = sdate_dim, - ncores = ncores) + dimnames <- names(dim(exp_cor$data)) + # if exp_cor is provided, it will be calibrated: "calibrate forecast instead of hindcast" + # if exp_cor is provided, eval.method is overrruled (because if exp_cor is provided, the + # train data will be all data of "exp" and the evalutaion data will be all data of "exp_cor"; + # no need for "leave-one-out" or "in-sample") + eval.method = "hindcast-vs-forecast" + + } else { + dimnames <- names(dim(exp$data)) + } + + if (!missing(multi.model) & !(cal.method == "mse_min")) { + warning(paste0("The multi.model parameter is ignored when using the calibration method ", cal.method)) + } + + Calibration <- Calibration(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, + cal.method = cal.method, eval.method = eval.method, multi.model = multi.model, + na.fill = na.fill, na.rm = na.rm, apply_to = apply_to, alpha = alpha, + memb_dim = memb_dim, sdate_dim = sdate_dim, dat_dim = dat_dim, ncores = ncores) + + pos <- match(dimnames, names(dim(Calibration))) + Calibration <- aperm(Calibration, pos) + + if (is.null(exp_cor)) { + exp$data <- Calibration exp$Datasets <- c(exp$Datasets, obs$Datasets) exp$source_files <- c(exp$source_files, obs$source_files) - - return(exp) + + return(exp) - }else{ #if exp_cor is provided, it will be calibrated: "calibrate forecast instead of hindcast" - eval.method = "hindcast-vs-forecast" #if exp_cor is provided, eval.method is overrruled (because if exp_cor is provided, the train data will be all data of "exp" and the evalutaion data will be all data of "exp_cor"; no need for "leave-one-out" or "in-sample") - if (!inherits(exp, "s2dv_cube") || !inherits(obs, "s2dv_cube") || !inherits(exp_cor, "s2dv_cube")) { - stop("Parameter 'exp', 'obs' and 'exp_cor' must be of the class 's2dv_cube', ", - "as output by CSTools::CST_Load.") - } - exp_cor$data <- Calibration(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, - cal.method = cal.method, - eval.method = eval.method, - multi.model = multi.model, - na.fill = na.fill, na.rm = na.rm, - apply_to = apply_to, alpha = alpha, - memb_dim = memb_dim, sdate_dim = sdate_dim, - ncores = ncores) - exp_cor$Datasets <- c(exp_cor$Datasets, obs$Datasets) - exp_cor$source_files <- c(exp_cor$source_files, exp$source_files, obs$source_files) - - return(exp_cor) + } else { + exp_cor$data <- Calibration + exp_cor$Datasets <- c(exp_cor$Datasets, exp$Datasets, obs$Datasets) + exp_cor$source_files <- c(exp_cor$source_files, exp$source_files, obs$source_files) - } + return(exp_cor) + } } - - #'Forecast Calibration #' #'@author Verónica Torralba, \email{veronica.torralba@bsc.es} #'@author Bert Van Schaeybroeck, \email{bertvs@meteo.be} -#'@description Five types of member-by-member bias correction can be performed. The \code{"bias"} method corrects the bias only, the \code{"evmos"} method applies a variance inflation technique to ensure the correction of the bias and the correspondence of variance between forecast and observation (Van Schaeybroeck and Vannitsem, 2011). The ensemble calibration methods \code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast variance and the ensemble spread as described in Doblas-Reyes et al. (2005) and Van Schaeybroeck and Vannitsem (2015), respectively. While the \code{"mse_min"} method minimizes a constrained mean-squared error using three parameters, the \code{"crps_min"} method features four parameters and minimizes the Continuous Ranked Probability Score (CRPS). The \code{"rpc-based"} method adjusts the forecast variance ensuring that the ratio of predictable components (RPC) is equal to one, as in Eade et al. (2014). -#'@description Both in-sample or our out-of-sample (leave-one-out cross validation) calibration are possible. -#'@references Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the success of multi-model ensembles in seasonal forecasting-II calibration and combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x -#'@references Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N., Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate predictions underestimate the predictability of the read world? Geophysical Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146 -#'@references Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing through linear regression. Nonlinear Processes in Geophysics, 18(2), 147. doi:10.5194/npg-18-147-2011 -#'@references Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble post-processing using member-by-member approaches: theoretical aspects. Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818. doi:10.1002/qj.2397 +#'@description Five types of member-by-member bias correction can be performed. +#'The \code{"bias"} method corrects the bias only, the \code{"evmos"} method +#'applies a variance inflation technique to ensure the correction of the bias +#'and the correspondence of variance between forecast and observation (Van +#'Schaeybroeck and Vannitsem, 2011). The ensemble calibration methods +#'\code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast +#'variance and the ensemble spread as described in Doblas-Reyes et al. (2005) +#'and Van Schaeybroeck and Vannitsem (2015), respectively. While the +#'\code{"mse_min"} method minimizes a constrained mean-squared error using three +#'parameters, the \code{"crps_min"} method features four parameters and +#'minimizes the Continuous Ranked Probability Score (CRPS). The +#'\code{"rpc-based"} method adjusts the forecast variance ensuring that the +#'ratio of predictable components (RPC) is equal to one, as in Eade et al. +#'(2014). +#'@description Both in-sample or our out-of-sample (leave-one-out cross +#'validation) calibration are possible. +#'@references Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the +#'success of multi-model ensembles in seasonal forecasting-II calibration and +#'combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x +#'@references Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N., +#'Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate +#'predictions underestimate the predictability of the read world? Geophysical +#'Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146 +#'@references Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing +#'through linear regression. Nonlinear Processes in Geophysics, 18(2), +#'147. doi:10.5194/npg-18-147-2011 +#'@references Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble +#'post-processing using member-by-member approaches: theoretical aspects. +#'Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818. +#'doi:10.1002/qj.2397 #' -#'@param exp a multidimensional array with named dimensions (at least 'sdate' and 'member') containing the seasonal hindcast experiment data. The hindcast is used to calibrate the forecast in case the forecast is provided; if not, the same hindcast will be calibrated instead. -#'@param obs a multidimensional array with named dimensions (at least 'sdate') containing the observed data. -#'@param exp_cor an optional multidimensional array with named dimensions (at least 'sdate' and 'member') containing the seasonal forecast experiment data. If the forecast is provided, it will be calibrated using the hindcast and observations; if not, the hindcast will be calibrated instead. -#'@param cal.method is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or \code{rpc-based}. Default value is \code{mse_min}. -#'@param eval.method is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation. In case the forecast is provided, any chosen eval.method is over-ruled and a third option is used. -#'@param multi.model is a boolean that is used only for the \code{mse_min} method. If multi-model ensembles or ensembles of different sizes are used, it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences between the two approaches are generally small but may become large when using small ensemble sizes. Using multi.model when the calibration method is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result. -#'@param na.fill is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned. -#'@param na.rm is a boolean that indicates whether to remove the NA values or not. The default value is \code{TRUE}. -#'@param apply_to is a character string that indicates whether to apply the calibration to all the forecast (\code{"all"}) or only to those where the correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}. -#'@param alpha is a numeric value indicating the significance level for the correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}. -#'@param memb_dim is a character string indicating the name of the member dimension. By default, it is set to 'member'. -#'@param sdate_dim is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'. -#'@param ncores is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one. -#'@return an array containing the calibrated forecasts with the same dimensions as the \code{exp} array. +#'@param exp A multidimensional array with named dimensions (at least 'sdate' +#' and 'member') containing the seasonal hindcast experiment data. The hindcast +#' is used to calibrate the forecast in case the forecast is provided; if not, +#' the same hindcast will be calibrated instead. +#'@param obs A multidimensional array with named dimensions (at least 'sdate') +#' containing the observed data. +#'@param exp_cor An optional multidimensional array with named dimensions (at +#' least 'sdate' and 'member') containing the seasonal forecast experiment +#' data. If the forecast is provided, it will be calibrated using the hindcast +#' and observations; if not, the hindcast will be calibrated instead. The +#' dimensions must be the same as 'exp' except dataset dimension. If there is +#' only one corrected dataset, it should not have dataset dimension. If there +#' is a corresponding corrected dataset for each 'exp' forecast, the dataset +#' dimension must have the same length as in 'exp'. The default value is NULL. +#'@param cal.method A character string indicating the calibration method used, +#' can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} +#' or \code{rpc-based}. Default value is \code{mse_min}. +#'@param eval.method A character string indicating the sampling method used, +#' can be either \code{in-sample} or \code{leave-one-out}. Default value is +#' the \code{leave-one-out} cross validation. In case the forecast is +#' provided, any chosen eval.method is over-ruled and a third option is +#' used. +#'@param multi.model A boolean that is used only for the \code{mse_min} +#' method. If multi-model ensembles or ensembles of different sizes are used, +#' it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences +#' between the two approaches are generally small but may become large when +#' using small ensemble sizes. Using multi.model when the calibration method +#' is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result. +#'@param na.fill A boolean that indicates what happens in case calibration is +#' not possible or will yield unreliable results. This happens when three or +#' less forecasts-observation pairs are available to perform the training phase +#' of the calibration. By default \code{na.fill} is set to true such that NA +#' values will be returned. If \code{na.fill} is set to false, the uncorrected +#' data will be returned. +#'@param na.rm A boolean that indicates whether to remove the NA values or +#' not. The default value is \code{TRUE}. +#'@param apply_to A character string that indicates whether to apply the +#' calibration to all the forecast (\code{"all"}) or only to those where the +#' correlation between the ensemble mean and the observations is statistically +#' significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}. +#'@param alpha A numeric value indicating the significance level for the +#' correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}. +#'@param memb_dim A character string indicating the name of the member +#' dimension. By default, it is set to 'member'. +#'@param sdate_dim A character string indicating the name of the start date +#' dimension. By default, it is set to 'sdate'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param ncores An integer that indicates the number of cores for parallel +#' computation using multiApply function. The default value is NULL (one core). +#'@return an array containing the calibrated forecasts with the same dimensions +#'as the \code{exp} array. #' #'@importFrom s2dv InsertDim MeanDims Reorder #'@import abind @@ -141,9 +249,16 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #'@importFrom ClimProjDiags Subset #' #'@seealso \code{\link{CST_Load}} -#' -#'@details -#'Both the \code{na.fill} and \code{na.rm} parameters can be used to indicate how the function has to handle the NA values. The \code{na.fill} parameter checks whether there are more than three forecast-observations pairs to perform the computation. In case there are three or less pairs, the computation is not carried out, and the value returned by the function depends on the value of this parameter (either NA if \code{na.fill == TRUE} or the uncorrected value if \code{na.fill == TRUE}). On the other hand, \code{na.rm} is used to indicate the function whether to remove the missing values during the computation of the parameters needed to perform the calibration. +#' +#'@details Both the \code{na.fill} and \code{na.rm} parameters can be used to +#'indicate how the function has to handle the NA values. The \code{na.fill} +#'parameter checks whether there are more than three forecast-observations pairs +#'to perform the computation. In case there are three or less pairs, the +#'computation is not carried out, and the value returned by the function depends +#'on the value of this parameter (either NA if \code{na.fill == TRUE} or the +#'uncorrected value if \code{na.fill == TRUE}). On the other hand, \code{na.rm} +#'is used to indicate the function whether to remove the missing values during +#'the computation of the parameters needed to perform the calibration. #' #'@examples #'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) @@ -153,87 +268,144 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #'a <- Calibration(exp = mod1, obs = obs1) #'str(a) #'@export -Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", - eval.method = "leave-one-out", +Calibration <- function(exp, obs, exp_cor = NULL, + cal.method = "mse_min", eval.method = "leave-one-out", multi.model = FALSE, na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL, - memb_dim = 'member', sdate_dim = 'sdate', ncores = 1) { - - dim.exp <- dim(exp) - amt.dims.exp <- length(dim.exp) - dim.obs <- dim(obs) - amt.dims.obs <- length(dim.obs) - dim.names.exp <- names(dim.exp) - dim.names.obs <- names(dim.obs) - if(!is.null(exp_cor)){ - dim.exp_cor <- dim(exp_cor) - amt.dims.exp_cor <- length(dim.exp_cor) - dim.names.exp_cor <- names(dim.exp_cor) - } - if (is.null(memb_dim) || !is.character(memb_dim)) { - stop("Parameter 'memb_dim' should be a character string indicating the", - "name of the dimension where members are stored in 'exp'.") + memb_dim = 'member', sdate_dim = 'sdate', dat_dim = NULL, ncores = NULL) { + + # Check inputs + ## exp, obs + if (!is.array(exp) || !is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") } - if (length(memb_dim) > 1) { - memb_dim <- memb_dim[1] - warning("Parameter 'memb_dim' has length greater than 1 and only", - " the first element will be used.") - } - - if (is.null(sdate_dim) || !is.character(sdate_dim)) { + if (!is.array(obs) || !is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + expdims <- names(dim(exp)) + obsdims <- names(dim(obs)) + if (is.null(expdims)) { + stop("Parameter 'exp' must have dimension names.") + } + if (is.null(obsdims)) { + stop("Parameter 'obs' must have dimension names.") + } + if (any(is.na(exp))) { + warning("Parameter 'exp' contains NA values.") + } + if (any(is.na(obs))) { + warning("Parameter 'obs' contains NA values.") + } + ## exp_cor + if (!is.null(exp_cor)) { + expcordims <- names(dim(exp_cor)) + if (is.null(expcordims)) { + stop("Parameter 'exp_cor' must have dimension names.") + } + if (any(is.na(exp_cor))) { + warning("Parameter 'exp_cor' contains NA values.") + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## sdate_dim and memb_dim + if (!is.character(sdate_dim)) { stop("Parameter 'sdate_dim' should be a character string indicating the", - "name of the dimension where start dates are stored in 'exp'.") + "name of the dimension where start dates are stored in 'exp'.") } if (length(sdate_dim) > 1) { sdate_dim <- sdate_dim[1] warning("Parameter 'sdate_dim' has length greater than 1 and only", " the first element will be used.") } - target.dim.names.exp <- c(memb_dim, sdate_dim) - target.dim.names.obs <- sdate_dim - - if (!all(target.dim.names.exp %in% dim.names.exp)) { - stop("Parameter 'exp' must have the dimensions defined in memb_dim ", - "and sdate_dim.") + if (!is.character(memb_dim)) { + stop("Parameter 'memb_dim' should be a character string indicating the", + "name of the dimension where members are stored in 'exp'.") } - - if(!is.null(exp_cor)){ - if (!all(target.dim.names.exp %in% dim.names.exp_cor)) { - stop("Parameter 'exp_cor' must have the dimensions defined in memb_dim ", - "and sdate_dim.") - } + if (length(memb_dim) > 1) { + memb_dim <- memb_dim[1] + warning("Parameter 'memb_dim' has length greater than 1 and only", + " the first element will be used.") } - if (!all(c(sdate_dim) %in% dim.names.obs)) { + target_dims_exp <- c(memb_dim, sdate_dim, dat_dim) + target_dims_obs <- c(sdate_dim, dat_dim) + + if (!all(target_dims_exp %in% expdims)) { + stop("Parameter 'exp' requires 'sdate_dim' and 'memb_dim' dimensions.") + } + if (!all(target_dims_obs %in% obsdims)) { stop("Parameter 'obs' must have the dimension defined in sdate_dim ", "parameter.") } - - if (any(is.na(exp))) { - warning("Parameter 'exp' contains NA values.") - } - - if(!is.null(exp_cor)){ - if (any(is.na(exp_cor))) { - warning("Parameter 'exp_cor' contains NA values.") + if (memb_dim %in% obsdims) { + if (dim(obs)[memb_dim] != 1) { + warning("Parameter 'obs' has dimension 'memb_dim' with length larger than 1. Only the first member dimension will be used.") } + obs <- Subset(obs, along = memb_dim, indices = 1, drop = "selected") } - - if (any(is.na(obs))) { - warning("Parameter 'obs' contains NA values.") + ## exp, obs, and exp_cor (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] } - - if (memb_dim %in% names(dim(obs))) { - obs <- Subset(obs, along = memb_dim, indices = 1, drop = "selected") + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of all dimensions", + " except 'memb_dim' and 'dat_dim'.")) } - data.set.sufficiently.large.out <- - Apply(data = list(exp = exp, obs = obs), - target_dims = list(exp = target.dim.names.exp, obs = target.dim.names.obs), - ncores = ncores, - fun = .data.set.sufficiently.large)$output1 + if (!is.null(exp_cor)) { + name_exp_cor <- sort(names(dim(exp_cor))) + name_exp <- sort(names(dim(exp))) + if (!is.null(dat_dim)) { + if (dat_dim %in% expcordims) { + if (!identical(dim(exp)[dat_dim], dim(exp_cor)[dat_dim])) { + stop(paste0("If parameter 'exp_cor' has dataset dimension, it must be", + " equal to dataset dimension of 'exp'.")) + } + name_exp_cor <- name_exp_cor[-which(name_exp_cor == dat_dim)] + target_dims_cor <- c(memb_dim, sdate_dim, dat_dim) + } else { + target_dims_cor <- c(memb_dim, sdate_dim) + } + name_exp <- name_exp[-which(name_exp == dat_dim)] + } else { + target_dims_cor <- c(memb_dim, sdate_dim) + } + if (!identical(length(name_exp), length(name_exp_cor)) | + !identical(dim(exp)[name_exp], dim(exp_cor)[name_exp_cor])) { + stop(paste0("Parameter 'exp' and 'exp_cor' must have the same length of ", + "all dimensions except 'dat_dim' if there is ", + "only one reference dataset.")) + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + ## data.set.sufficiently.large + data.set.sufficiently.large.out <- Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = target_dims_exp, + obs = target_dims_obs), + ncores = ncores, + fun = .data.set.sufficiently.large)$output1 - if(!all(data.set.sufficiently.large.out)){ - if(na.fill){ + if (!all(data.set.sufficiently.large.out)) { + if (na.fill) { warning("Some forecast data could not be corrected due to data lack", " and is replaced with NA values") } else { @@ -241,10 +413,18 @@ Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", " and is replaced with uncorrected values") } } - + ## na.rm if (!na.rm %in% c(TRUE,FALSE)) { stop("Parameter 'na.rm' must be TRUE or FALSE.") } + if (length(na.rm) > 1) { + na.rm <- na.rm[1] + warning("Paramter 'na.rm' has length greater than 1, and only the fist element is used.") + } + ## cal.method, apply_to, alpha + if (!any(cal.method %in% c('bias', 'evmos', 'mse_min', 'crps_min', 'rpc-based'))) { + stop("Parameter 'cal.method' must be a character string indicating the calibration method used.") + } if (cal.method == 'rpc-based') { if (is.null(apply_to)) { apply_to <- 'sign' @@ -261,156 +441,195 @@ Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", } } } - - if(is.null(exp_cor)){ - calibrated <- Apply(data = list(exp = exp, obs = obs), - cal.method = cal.method, - eval.method = eval.method, - multi.model = multi.model, - na.fill = na.fill, na.rm = na.rm, - apply_to = apply_to, alpha = alpha, - target_dims = list(exp = target.dim.names.exp, obs = target.dim.names.obs), - ncores = ncores, output_dims = target.dim.names.exp, - fun = .cal)$output1 - dexes <- match(names(dim(exp)), names(dim(calibrated))) - calibrated <- aperm(calibrated, dexes) - dimnames(calibrated) <- dimnames(exp)[dexes] - }else{ + ## eval.method + if (!any(eval.method %in% c('in-sample', 'leave-one-out', 'hindcast-vs-forecast'))) { + stop("Parameter 'eval.method' must be a character string indicating the sampling method used ('in-sample', 'leave-one-out' or 'hindcast-vs-forecast').") + } + ## multi.model + if (!multi.model %in% c(TRUE,FALSE)) { + stop("Parameter 'multi.model' must be TRUE or FALSE.") + } + + if (is.null(exp_cor)) { + calibrated <- Apply(data = list(exp = exp, obs = obs), dat_dim = dat_dim, + cal.method = cal.method, eval.method = eval.method, multi.model = multi.model, + na.fill = na.fill, na.rm = na.rm, apply_to = apply_to, alpha = alpha, + target_dims = list(exp = target_dims_exp, obs = target_dims_obs), + ncores = ncores, fun = .cal)$output1 + } else { calibrated <- Apply(data = list(exp = exp, obs = obs, exp_cor = exp_cor), - cal.method = cal.method, - eval.method = eval.method, - multi.model = multi.model, - na.fill = na.fill, na.rm = na.rm, - apply_to = apply_to, alpha = alpha, - target_dims = list(exp = target.dim.names.exp, obs = target.dim.names.obs, exp_cor = target.dim.names.exp), - ncores = ncores, output_dims = target.dim.names.exp, - fun = .cal)$output1 - dexes <- match(names(dim(exp_cor)), names(dim(calibrated))) - calibrated <- aperm(calibrated, dexes) - dimnames(calibrated) <- dimnames(exp_cor)[dexes] - } + dat_dim = dat_dim, cal.method = cal.method, eval.method = eval.method, + multi.model = multi.model, na.fill = na.fill, na.rm = na.rm, + apply_to = apply_to, alpha = alpha, + target_dims = list(exp = target_dims_exp, obs = target_dims_obs, + exp_cor = target_dims_cor), + ncores = ncores, fun = .cal)$output1 + } + if (!is.null(dat_dim)) { + pos <- match(c(names(dim(exp))[-which(names(dim(exp)) == dat_dim)], 'nexp', 'nobs'), + names(dim(calibrated))) + calibrated <- aperm(calibrated, pos) + } return(calibrated) } -.data.set.sufficiently.large <- function(exp, obs){ +.data.set.sufficiently.large <- function(exp, obs) { amt.min.samples <- 3 amt.good.pts <- sum(!is.na(obs) & !apply(exp, c(2), function(x) all(is.na(x)))) return(amt.good.pts > amt.min.samples) } -.make.eval.train.dexes <- function(eval.method, amt.points, amt.points_cor){ - if(eval.method == "leave-one-out"){ +.make.eval.train.dexes <- function(eval.method, amt.points, amt.points_cor) { + if (eval.method == "leave-one-out") { dexes.lst <- lapply(seq(1, amt.points), function(x) return(list(eval.dexes = x, train.dexes = seq(1, amt.points)[-x]))) - } else if (eval.method == "in-sample"){ + } else if (eval.method == "in-sample") { dexes.lst <- list(list(eval.dexes = seq(1, amt.points), train.dexes = seq(1, amt.points))) - } else if (eval.method == "hindcast-vs-forecast"){ + } else if (eval.method == "hindcast-vs-forecast") { dexes.lst <- list(list(eval.dexes = seq(1,amt.points_cor), train.dexes = seq(1, amt.points))) } else { - stop(paste0("unknown sampling method: ",eval.method)) + stop(paste0("unknown sampling method: ", eval.method)) } return(dexes.lst) } -.cal <- function(exp, obs, exp_cor = NULL, cal.method, eval.method, multi.model, na.fill, na.rm, apply_to, alpha) { - if(is.null(exp_cor)){ - exp_cor <- exp ## generate a copy of exp so that the same function can run - ## when exp_cor is provided and when it's not - } - obs <- as.vector(obs) - dims.fc <- dim(exp) - dims.fc_cor <- dim(exp_cor) ## new line - amt.mbr <- dims.fc[1] - amt.sdate <- dims.fc[2] - amt.sdate_cor <- dims.fc_cor[2] ## new line - var.cor.fc <- NA * exp_cor ## modified line (exp_cor instead of exp); - ## in case of exp_cor not provided, at this point exp_cor - ## is already the same as exp so no change - names(dim(var.cor.fc)) <- dims.fc - - if(!.data.set.sufficiently.large(exp = exp, obs = obs)){ - if(na.fill){ - return(var.cor.fc) - } else { - var.cor.fc[] <- exp[] - return(var.cor.fc) - } - } - eval.train.dexeses <- .make.eval.train.dexes(eval.method, amt.points = amt.sdate, - amt.points_cor = amt.sdate_cor) - amt.resamples <- length(eval.train.dexeses) - for (i.sample in seq(1, amt.resamples)) { - # defining training (tr) and evaluation (ev) subsets - eval.dexes <- eval.train.dexeses[[i.sample]]$eval.dexes - train.dexes <- eval.train.dexeses[[i.sample]]$train.dexes - - fc.ev <- exp_cor[ , eval.dexes, drop = FALSE] ## modified line (exp_cor instead of exp) - ## fc.ev is used to evaluate (not train; train should be done with exp (hindcast)) - fc.tr <- exp[ , train.dexes] - obs.tr <- obs[train.dexes , drop = FALSE] - - if(cal.method == "bias"){ - var.cor.fc[ , eval.dexes] <- fc.ev + mean(obs.tr, na.rm = na.rm) - mean(fc.tr, na.rm = na.rm) - } else if(cal.method == "evmos"){ # forecast correction implemented - #calculate ensemble and observational characteristics - quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) - #calculate value for regression parameters - init.par <- c(.calc.evmos.par(quant.obs.fc.tr, na.rm = na.rm)) - #correct evaluation subset - var.cor.fc[ , eval.dexes] <- .correct.evmos.fc(fc.ev , init.par, na.rm = na.rm) - } else if (cal.method == "mse_min"){ - #calculate ensemble and observational characteristics - quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) - #calculate value for regression parameters - init.par <- .calc.mse.min.par(quant.obs.fc.tr, multi.model, na.rm = na.rm) - #correct evaluation subset - var.cor.fc[ , eval.dexes] <- .correct.mse.min.fc(fc.ev , init.par, na.rm = na.rm) - } else if (cal.method == "crps_min"){ - #calculate ensemble and observational characteristics - quant.obs.fc.tr <- .calc.obs.fc.quant.ext(obs = obs.tr, fc = fc.tr, na.rm = na.rm) - #calculate initial value for regression parameters - init.par <- c(.calc.mse.min.par(quant.obs.fc.tr, na.rm = na.rm), 0.001) - init.par[3] <- sqrt(init.par[3]) - #calculate regression parameters on training dataset - optim.tmp <- optim(par = init.par, - fn = .calc.crps.opt, - gr = .calc.crps.grad.opt, - quant.obs.fc = quant.obs.fc.tr, - na.rm = na.rm, - method = "BFGS") - - mbm.par <- optim.tmp$par - #correct evaluation subset - var.cor.fc[ , eval.dexes] <- .correct.crps.min.fc(fc.ev , mbm.par, na.rm = na.rm) - } else if (cal.method == 'rpc-based') { - ens_mean.ev <- multiApply::Apply(data = fc.ev, target_dims = names(amt.mbr), fun = mean, na.rm = na.rm)$output1 ## Ensemble mean - ens_mean.tr <- multiApply::Apply(data = fc.tr, target_dims = names(amt.mbr), fun = mean, na.rm = na.rm)$output1 ## Ensemble mean - ens_spread.tr <- multiApply::Apply(data = list(fc.tr, ens_mean.tr), target_dims = names(amt.sdate), fun = "-")$output1 ## Ensemble spread - exp_mean.tr <- mean(fc.tr, na.rm = na.rm) ## Mean (climatology) - var_signal.tr <- var(ens_mean.tr, na.rm = na.rm) ## Ensemble mean variance - var_noise.tr <- var(as.vector(ens_spread.tr), na.rm = na.rm) ## Variance of ensemble members about ensemble mean (= spread) - var_obs.tr <- var(obs.tr, na.rm = na.rm) ## Variance in the observations - r.tr <- cor(x = ens_mean.tr, y = obs.tr, method = 'pearson', use = ifelse(test = isTRUE(na.rm), yes = "pairwise.complete.obs", no = "everything")) ## Correlation between observations and the ensemble mean - if ((apply_to == 'all') || (apply_to == 'sign' && cor.test(ens_mean.tr, obs.tr, method = 'pearson', alternative = 'greater')$p.value < alpha)) { - ens_mean_cal <- (ens_mean.ev - exp_mean.tr) * r.tr * sqrt(var_obs.tr) / sqrt(var_signal.tr) + exp_mean.tr - var.cor.fc[ , eval.dexes] <- s2dv::Reorder(data = multiApply::Apply(data = list(exp = fc.ev, ens_mean = ens_mean.ev, ens_mean_cal = ens_mean_cal), target_dims = names(amt.sdate), fun = .CalibrationMembersRPC, var_obs = var_obs.tr, var_noise = var_noise.tr, r = r.tr)$output1, - order = names(dims.fc)) - dim(var.cor.fc) <- dims.fc - } else { ## no significant -> replacing with observed climatology - var.cor.fc[ , eval.dexes] <- array(data = mean(obs.tr, na.rm = na.rm), dim = dim(fc.ev)) - } +.cal <- function(exp, obs, exp_cor = NULL, dat_dim = NULL, cal.method = "mse_min", eval.method = "leave-one-out", + multi.model = FALSE, na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL) { + + # exp: [memb_dim, sdate_dim, (dat)] + # obs: [sdate_dim (dat)] + # exp_cor: [memb, sdate, (dat)] or NULL + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + exp <- InsertDim(exp, posdim = 3, lendim = 1, name = 'dataset') + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = 'dataset') + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + if (is.null(exp_cor)) { + exp_cor <- exp + cor_dat_dim <- TRUE + } else { + if (length(dim(exp_cor)) == 2) { # ref: [memb, sdate] + cor_dat_dim <- FALSE } else { - stop("unknown calibration method: ",cal.method) + cor_dat_dim <- TRUE + } + } + + return_data_na <- FALSE + for (i in 1:nexp) { + for (j in 1:nobs) { + if (!.data.set.sufficiently.large(exp = exp[, , i], obs = obs[, j])) { + return_data_na <- TRUE + var.cor.fc <- NA*exp[, , i] + if (!na.fill) { + var.cor.fc[, , i] <- exp[, , i] + } + } + } + } + + + expdims <- dim(exp) + expdims_cor <- dim(exp_cor) + memb <- expdims[1] # memb + sdate <- expdims[2] # sdate + sdate_cor <- expdims_cor[2] + + if (!return_data_na) { + var.cor.fc <- array(dim = c(dim(exp)[1:2], nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + for (j in 1:nobs) { + obs_data <- as.vector(obs[, j]) + exp_data <- exp[, , i] + dim(exp_data) <- dim(exp)[1:2] + if (cor_dat_dim) { + expcor_data <- exp_cor[, , i] + dim(expcor_data) = dim(exp_cor)[1:2] + } else { + expcor_data <- exp_cor + } + eval.train.dexeses <- .make.eval.train.dexes(eval.method = eval.method, amt.points = sdate, + amt.points_cor = sdate_cor) + amt.resamples <- length(eval.train.dexeses) + for (i.sample in seq(1, amt.resamples)) { + + eval.dexes <- eval.train.dexeses[[i.sample]]$eval.dexes + train.dexes <- eval.train.dexeses[[i.sample]]$train.dexes + + fc.ev <- expcor_data[ , eval.dexes, drop = FALSE] + fc.tr <- exp_data[ , train.dexes] + obs.tr <- obs_data[train.dexes , drop = FALSE] + + if (cal.method == "bias") { + var.cor.fc[ , eval.dexes, i, j] <- fc.ev + mean(obs.tr, na.rm = na.rm) - mean(fc.tr, na.rm = na.rm) + } else if (cal.method == "evmos") { + quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) + init.par <- c(.calc.evmos.par(quant.obs.fc.tr, na.rm = na.rm)) + var.cor.fc[ , eval.dexes, i, j] <- .correct.evmos.fc(fc.ev , init.par, na.rm = na.rm) + } else if (cal.method == "mse_min") { + quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) + init.par <- .calc.mse.min.par(quant.obs.fc.tr, multi.model, na.rm = na.rm) + var.cor.fc[ , eval.dexes, i, j] <- .correct.mse.min.fc(fc.ev , init.par, na.rm = na.rm) + } else if (cal.method == "crps_min") { + quant.obs.fc.tr <- .calc.obs.fc.quant.ext(obs = obs.tr, fc = fc.tr, na.rm = na.rm) + init.par <- c(.calc.mse.min.par(quant.obs.fc.tr, na.rm = na.rm), 0.001) + init.par[3] <- sqrt(init.par[3]) + optim.tmp <- optim(par = init.par, fn = .calc.crps.opt, gr = .calc.crps.grad.opt, + quant.obs.fc = quant.obs.fc.tr, na.rm = na.rm, method = "BFGS") + mbm.par <- optim.tmp$par + var.cor.fc[ , eval.dexes, i, j] <- .correct.crps.min.fc(fc.ev , mbm.par, na.rm = na.rm) + } else if (cal.method == 'rpc-based') { + ens_mean.ev <- Apply(data = fc.ev, target_dims = names(memb), fun = mean, na.rm = na.rm)$output1 ## Ensemble mean + ens_mean.tr <- Apply(data = fc.tr, target_dims = names(memb), fun = mean, na.rm = na.rm)$output1 ## Ensemble mean + ens_spread.tr <- Apply(data = list(fc.tr, ens_mean.tr), target_dims = names(sdate), fun = "-")$output1 ## Ensemble spread + exp_mean.tr <- mean(fc.tr, na.rm = na.rm) ## Mean (climatology) + var_signal.tr <- var(ens_mean.tr, na.rm = na.rm) ## Ensemble mean variance + var_noise.tr <- var(as.vector(ens_spread.tr), na.rm = na.rm) ## Variance of ensemble members about ensemble mean (= spread) + var_obs.tr <- var(obs.tr, na.rm = na.rm) ## Variance in the observations + r.tr <- cor(x = ens_mean.tr, y = obs.tr, method = 'pearson', + use = ifelse(test = isTRUE(na.rm), yes = "pairwise.complete.obs", no = "everything")) + ## Correlation between observations and the ensemble mean + if (!is.null(apply_to)) { + if ((apply_to == 'all') || (apply_to == 'sign' && + cor.test(ens_mean.tr, obs.tr, method = 'pearson', alternative = 'greater')$p.value < alpha)) { + ens_mean_cal <- (ens_mean.ev - exp_mean.tr) * r.tr * sqrt(var_obs.tr) / sqrt(var_signal.tr) + exp_mean.tr + var.cor.fc[ , eval.dexes, i, j] <- s2dv::Reorder(data = Apply(data = list(exp = fc.ev, ens_mean = ens_mean.ev, + ens_mean_cal = ens_mean_cal), + target_dims = names(sdate), fun = .CalibrationMembersRPC, + var_obs = var_obs.tr, var_noise = var_noise.tr, r = r.tr)$output1, + order = names(expdims)) + } + } else { + var.cor.fc[ , eval.dexes, i, j] <- array(data = mean(obs.tr, na.rm = na.rm), dim = dim(fc.ev)) + } + } else { + stop("unknown calibration method: ", cal.method) + } + } + } } } + + if (is.null(dat_dim)) { + dim(var.cor.fc) <- dim(exp)[1:2] + } + return(var.cor.fc) } -.calc.obs.fc.quant <- function(obs, fc, na.rm){ #function to calculate different quantities of a series of ensemble forecasts and corresponding observations + +#function to calculate different quantities of a series of ensemble forecasts and corresponding observations +.calc.obs.fc.quant <- function(obs, fc, na.rm) { amt.mbr <- dim(fc)[1] obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr) fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) @@ -430,7 +649,8 @@ Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", ) } -.calc.obs.fc.quant.ext <- function(obs, fc, na.rm){ #extended function to calculate different quantities of a series of ensemble forecasts and corresponding observations +#extended function to calculate different quantities of a series of ensemble forecasts and corresponding observations +.calc.obs.fc.quant.ext <- function(obs, fc, na.rm){ amt.mbr <- dim(fc)[1] obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr) fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) @@ -451,8 +671,8 @@ Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", ) } - -.calc.fc.quant <- function(fc, na.rm){ #function to calculate different quantities of a series of ensemble forecasts +#function to calculate different quantities of a series of ensemble forecasts +.calc.fc.quant <- function(fc, na.rm) { amt.mbr <- dim(fc)[1] fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) fc.ens.av.av <- mean(fc.ens.av, na.rm = na.rm) @@ -480,8 +700,8 @@ Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", ) } -.calc.fc.quant.ext <- function(fc, na.rm){ #extended function to calculate different quantities of a series of ensemble forecasts - +#extended function to calculate different quantities of a series of ensemble forecasts +.calc.fc.quant.ext <- function(fc, na.rm){ amt.mbr <- dim(fc)[1] repmat1.tmp <- InsertDim(fc, posdim = 1, lendim = amt.mbr) repmat2.tmp <- aperm(repmat1.tmp, c(2, 1, 3)) @@ -495,10 +715,9 @@ Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", } #Below are the core or elementary functions to calculate the regression parameters for the different methods -.calc.mse.min.par <- function(quant.obs.fc, multi.model = F, na.rm){ +.calc.mse.min.par <- function(quant.obs.fc, multi.model = F, na.rm) { par.out <- rep(NA, 3) - - if(multi.model){ + if (multi.model) { par.out[3] <- with(quant.obs.fc, obs.sd * sqrt(1. - cor.obs.fc^2) / fc.ens.var.av.sqrt) } else { par.out[3] <- with(quant.obs.fc, obs.sd * sqrt(1. - cor.obs.fc^2) / fc.dev.sd) @@ -508,12 +727,14 @@ Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", return(par.out) } + .calc.evmos.par <- function(quant.obs.fc, na.rm){ par.out <- rep(NA, 2) par.out[2] <- with(quant.obs.fc, obs.sd / fc.sd) par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = na.rm) return(par.out) } + #Below are the core or elementary functions to calculate the functions necessary for the minimization of crps .calc.crps.opt <- function(par, quant.obs.fc, na.rm){ return( @@ -525,10 +746,9 @@ Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", ) } -.calc.crps.grad.opt <- function(par, quant.obs.fc, na.rm){ - sgn1 <- with(quant.obs.fc,sign(obs.per.ens - - (par[1] + par[2] * fc.ens.av.per.ens + - ((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev))) +.calc.crps.grad.opt <- function(par, quant.obs.fc, na.rm) { + sgn1 <- with(quant.obs.fc,sign(obs.per.ens - (par[1] + par[2] * fc.ens.av.per.ens + + ((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev))) sgn2 <- with(quant.obs.fc, sign((par[3])^2 + par[4] / spr.abs.per.ens)) sgn3 <- with(quant.obs.fc,sign((par[3])^2 * spr.abs + par[4])) deriv.par1 <- mean(sgn1, na.rm = na.rm) @@ -543,7 +763,7 @@ Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", } #Below are the core or elementary functions to correct the evaluation set based on the regression parameters -.correct.evmos.fc <- function(fc, par, na.rm){ +.correct.evmos.fc <- function(fc, par, na.rm) { quant.fc.mp <- .calc.fc.quant(fc = fc, na.rm = na.rm) return(with(quant.fc.mp, par[1] + par[2] * fc)) } diff --git a/man/CST_Calibration.Rd b/man/CST_Calibration.Rd index 73cac8ab..6affe1d6 100644 --- a/man/CST_Calibration.Rd +++ b/man/CST_Calibration.Rd @@ -17,41 +17,86 @@ CST_Calibration( alpha = NULL, memb_dim = "member", sdate_dim = "sdate", - ncores = 1 + dat_dim = NULL, + ncores = NULL ) } \arguments{ -\item{exp}{an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal hindcast experiment data in the element named \code{$data}. The hindcast is used to calibrate the forecast in case the forecast is provided; if not, the same hindcast will be calibrated instead.} +\item{exp}{An object of class \code{s2dv_cube} as returned by \code{CST_Load} +function, containing the seasonal hindcast experiment data in the element +named \code{$data}. The hindcast is used to calibrate the forecast in case +the forecast is provided; if not, the same hindcast will be calibrated +instead.} -\item{obs}{an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the observed data in the element named \code{$data}.} +\item{obs}{An object of class \code{s2dv_cube} as returned by \code{CST_Load} +function, containing the observed data in the element named \code{$data}.} -\item{exp_cor}{an optional object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal forecast experiment data in the element named \code{$data}. If the forecast is provided, it will be calibrated using the hindcast and observations; if not, the hindcast will be calibrated instead.} +\item{exp_cor}{An optional object of class \code{s2dv_cube} as returned by +\code{CST_Load} function, containing the seasonal forecast experiment data +in the element named \code{$data}. If the forecast is provided, it will be +calibrated using the hindcast and observations; if not, the hindcast will +be calibrated instead. The dimensions must be the same as 'exp' except +dataset dimension. If there is only one corrected dataset, it should not +have dataset dimension. If there is a corresponding corrected dataset for +each 'exp' forecast, the dataset dimension must have the same length as in +'exp'. The default value is NULL.} -\item{cal.method}{is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or \code{rpc-based}. Default value is \code{mse_min}.} +\item{cal.method}{A character string indicating the calibration method used, +can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or +\code{rpc-based}. Default value is \code{mse_min}.} -\item{eval.method}{is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation. In case the forecast is provided, any chosen eval.method is over-ruled and a third option is used.} +\item{eval.method}{A character string indicating the sampling method used, it +can be either \code{in-sample} or \code{leave-one-out}. Default value is the +\code{leave-one-out} cross validation. In case the forecast is provided, any +chosen eval.method is over-ruled and a third option is used.} -\item{multi.model}{is a boolean that is used only for the \code{mse_min} method. If multi-model ensembles or ensembles of different sizes are used, it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences between the two approaches are generally small but may become large when using small ensemble sizes. Using multi.model when the calibration method is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result.} +\item{multi.model}{A boolean that is used only for the \code{mse_min} +method. If multi-model ensembles or ensembles of different sizes are used, +it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences +between the two approaches are generally small but may become large when +using small ensemble sizes. Using multi.model when the calibration method is +\code{bias}, \code{evmos} or \code{crps_min} will not affect the result.} -\item{na.fill}{is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned.} +\item{na.fill}{A boolean that indicates what happens in case calibration is +not possible or will yield unreliable results. This happens when three or +less forecasts-observation pairs are available to perform the training phase +of the calibration. By default \code{na.fill} is set to true such that NA +values will be returned. If \code{na.fill} is set to false, the uncorrected +data will be returned.} -\item{na.rm}{is a boolean that indicates whether to remove the NA values or not. The default value is \code{TRUE}. See Details section for further information about its use and compatibility with \code{na.fill}.} +\item{na.rm}{A boolean that indicates whether to remove the NA values or not. +The default value is \code{TRUE}. See Details section for further +information about its use and compatibility with \code{na.fill}.} -\item{apply_to}{is a character string that indicates whether to apply the calibration to all the forecast (\code{"all"}) or only to those where the correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}.} +\item{apply_to}{A character string that indicates whether to apply the +calibration to all the forecast (\code{"all"}) or only to those where the +correlation between the ensemble mean and the observations is statistically +significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}.} -\item{alpha}{is a numeric value indicating the significance level for the correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}.} +\item{alpha}{A numeric value indicating the significance level for the +correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}.} -\item{memb_dim}{is a character string indicating the name of the member dimension. By default, it is set to 'member'.} +\item{memb_dim}{A character string indicating the name of the member dimension. +By default, it is set to 'member'.} -\item{sdate_dim}{is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'.} +\item{sdate_dim}{A character string indicating the name of the start date +dimension. By default, it is set to 'sdate'.} -\item{ncores}{is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one.} +\item{dat_dim}{A character string indicating the name of dataset dimension. +The length of this dimension can be different between 'exp' and 'obs'. +The default value is NULL.} + +\item{ncores}{An integer that indicates the number of cores for parallel +computations using multiApply function. The default value is one.} } \value{ -an object of class \code{s2dv_cube} containing the calibrated forecasts in the element \code{$data} with the same dimensions as the one in the exp object. +An object of class \code{s2dv_cube} containing the calibrated forecasts + in the element \code{$data} with the same dimensions as the one in the exp + object. } \description{ -Equivalent to function \code{Calibration} but for objects of class \code{s2dv_cube}. +Equivalent to function \code{Calibration} but for objects of +class \code{s2dv_cube}. } \examples{ # Example 1: diff --git a/man/Calibration.Rd b/man/Calibration.Rd index 8b0b5231..a1cbdee9 100644 --- a/man/Calibration.Rd +++ b/man/Calibration.Rd @@ -17,46 +17,109 @@ Calibration( alpha = NULL, memb_dim = "member", sdate_dim = "sdate", - ncores = 1 + dat_dim = NULL, + ncores = NULL ) } \arguments{ -\item{exp}{a multidimensional array with named dimensions (at least 'sdate' and 'member') containing the seasonal hindcast experiment data. The hindcast is used to calibrate the forecast in case the forecast is provided; if not, the same hindcast will be calibrated instead.} - -\item{obs}{a multidimensional array with named dimensions (at least 'sdate') containing the observed data.} - -\item{exp_cor}{an optional multidimensional array with named dimensions (at least 'sdate' and 'member') containing the seasonal forecast experiment data. If the forecast is provided, it will be calibrated using the hindcast and observations; if not, the hindcast will be calibrated instead.} - -\item{cal.method}{is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or \code{rpc-based}. Default value is \code{mse_min}.} - -\item{eval.method}{is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation. In case the forecast is provided, any chosen eval.method is over-ruled and a third option is used.} - -\item{multi.model}{is a boolean that is used only for the \code{mse_min} method. If multi-model ensembles or ensembles of different sizes are used, it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences between the two approaches are generally small but may become large when using small ensemble sizes. Using multi.model when the calibration method is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result.} - -\item{na.fill}{is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned.} - -\item{na.rm}{is a boolean that indicates whether to remove the NA values or not. The default value is \code{TRUE}.} - -\item{apply_to}{is a character string that indicates whether to apply the calibration to all the forecast (\code{"all"}) or only to those where the correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}.} - -\item{alpha}{is a numeric value indicating the significance level for the correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}.} - -\item{memb_dim}{is a character string indicating the name of the member dimension. By default, it is set to 'member'.} - -\item{sdate_dim}{is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'.} - -\item{ncores}{is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one.} +\item{exp}{A multidimensional array with named dimensions (at least 'sdate' +and 'member') containing the seasonal hindcast experiment data. The hindcast +is used to calibrate the forecast in case the forecast is provided; if not, +the same hindcast will be calibrated instead.} + +\item{obs}{A multidimensional array with named dimensions (at least 'sdate') +containing the observed data.} + +\item{exp_cor}{An optional multidimensional array with named dimensions (at +least 'sdate' and 'member') containing the seasonal forecast experiment +data. If the forecast is provided, it will be calibrated using the hindcast +and observations; if not, the hindcast will be calibrated instead. The +dimensions must be the same as 'exp' except dataset dimension. If there is +only one corrected dataset, it should not have dataset dimension. If there +is a corresponding corrected dataset for each 'exp' forecast, the dataset +dimension must have the same length as in 'exp'. The default value is NULL.} + +\item{cal.method}{A character string indicating the calibration method used, +can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} +or \code{rpc-based}. Default value is \code{mse_min}.} + +\item{eval.method}{A character string indicating the sampling method used, +can be either \code{in-sample} or \code{leave-one-out}. Default value is +the \code{leave-one-out} cross validation. In case the forecast is +provided, any chosen eval.method is over-ruled and a third option is +used.} + +\item{multi.model}{A boolean that is used only for the \code{mse_min} +method. If multi-model ensembles or ensembles of different sizes are used, +it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences +between the two approaches are generally small but may become large when +using small ensemble sizes. Using multi.model when the calibration method +is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result.} + +\item{na.fill}{A boolean that indicates what happens in case calibration is +not possible or will yield unreliable results. This happens when three or +less forecasts-observation pairs are available to perform the training phase +of the calibration. By default \code{na.fill} is set to true such that NA +values will be returned. If \code{na.fill} is set to false, the uncorrected +data will be returned.} + +\item{na.rm}{A boolean that indicates whether to remove the NA values or +not. The default value is \code{TRUE}.} + +\item{apply_to}{A character string that indicates whether to apply the +calibration to all the forecast (\code{"all"}) or only to those where the +correlation between the ensemble mean and the observations is statistically +significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}.} + +\item{alpha}{A numeric value indicating the significance level for the +correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}.} + +\item{memb_dim}{A character string indicating the name of the member +dimension. By default, it is set to 'member'.} + +\item{sdate_dim}{A character string indicating the name of the start date +dimension. By default, it is set to 'sdate'.} + +\item{dat_dim}{A character string indicating the name of dataset dimension. +The length of this dimension can be different between 'exp' and 'obs'. +The default value is NULL.} + +\item{ncores}{An integer that indicates the number of cores for parallel +computation using multiApply function. The default value is NULL (one core).} } \value{ -an array containing the calibrated forecasts with the same dimensions as the \code{exp} array. +an array containing the calibrated forecasts with the same dimensions +as the \code{exp} array. } \description{ -Five types of member-by-member bias correction can be performed. The \code{"bias"} method corrects the bias only, the \code{"evmos"} method applies a variance inflation technique to ensure the correction of the bias and the correspondence of variance between forecast and observation (Van Schaeybroeck and Vannitsem, 2011). The ensemble calibration methods \code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast variance and the ensemble spread as described in Doblas-Reyes et al. (2005) and Van Schaeybroeck and Vannitsem (2015), respectively. While the \code{"mse_min"} method minimizes a constrained mean-squared error using three parameters, the \code{"crps_min"} method features four parameters and minimizes the Continuous Ranked Probability Score (CRPS). The \code{"rpc-based"} method adjusts the forecast variance ensuring that the ratio of predictable components (RPC) is equal to one, as in Eade et al. (2014). - -Both in-sample or our out-of-sample (leave-one-out cross validation) calibration are possible. +Five types of member-by-member bias correction can be performed. +The \code{"bias"} method corrects the bias only, the \code{"evmos"} method +applies a variance inflation technique to ensure the correction of the bias +and the correspondence of variance between forecast and observation (Van +Schaeybroeck and Vannitsem, 2011). The ensemble calibration methods +\code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast +variance and the ensemble spread as described in Doblas-Reyes et al. (2005) +and Van Schaeybroeck and Vannitsem (2015), respectively. While the +\code{"mse_min"} method minimizes a constrained mean-squared error using three +parameters, the \code{"crps_min"} method features four parameters and +minimizes the Continuous Ranked Probability Score (CRPS). The +\code{"rpc-based"} method adjusts the forecast variance ensuring that the +ratio of predictable components (RPC) is equal to one, as in Eade et al. +(2014). + +Both in-sample or our out-of-sample (leave-one-out cross +validation) calibration are possible. } \details{ -Both the \code{na.fill} and \code{na.rm} parameters can be used to indicate how the function has to handle the NA values. The \code{na.fill} parameter checks whether there are more than three forecast-observations pairs to perform the computation. In case there are three or less pairs, the computation is not carried out, and the value returned by the function depends on the value of this parameter (either NA if \code{na.fill == TRUE} or the uncorrected value if \code{na.fill == TRUE}). On the other hand, \code{na.rm} is used to indicate the function whether to remove the missing values during the computation of the parameters needed to perform the calibration. +Both the \code{na.fill} and \code{na.rm} parameters can be used to +indicate how the function has to handle the NA values. The \code{na.fill} +parameter checks whether there are more than three forecast-observations pairs +to perform the computation. In case there are three or less pairs, the +computation is not carried out, and the value returned by the function depends +on the value of this parameter (either NA if \code{na.fill == TRUE} or the +uncorrected value if \code{na.fill == TRUE}). On the other hand, \code{na.rm} +is used to indicate the function whether to remove the missing values during +the computation of the parameters needed to perform the calibration. } \examples{ mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) @@ -67,13 +130,23 @@ a <- Calibration(exp = mod1, obs = obs1) str(a) } \references{ -Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the success of multi-model ensembles in seasonal forecasting-II calibration and combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x - -Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N., Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate predictions underestimate the predictability of the read world? Geophysical Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146 - -Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing through linear regression. Nonlinear Processes in Geophysics, 18(2), 147. doi:10.5194/npg-18-147-2011 - -Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble post-processing using member-by-member approaches: theoretical aspects. Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818. doi:10.1002/qj.2397 +Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the +success of multi-model ensembles in seasonal forecasting-II calibration and +combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x + +Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N., +Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate +predictions underestimate the predictability of the read world? Geophysical +Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146 + +Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing +through linear regression. Nonlinear Processes in Geophysics, 18(2), +147. doi:10.5194/npg-18-147-2011 + +Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble +post-processing using member-by-member approaches: theoretical aspects. +Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818. +doi:10.1002/qj.2397 } \seealso{ \code{\link{CST_Load}} diff --git a/tests/testthat/test-CST_Calibration.R b/tests/testthat/test-CST_Calibration.R index e832a539..bab3aad5 100644 --- a/tests/testthat/test-CST_Calibration.R +++ b/tests/testthat/test-CST_Calibration.R @@ -1,43 +1,295 @@ -context("Generic tests") -test_that("Sanity checks", { +context("CSTools::CST_Calibration tests") + +############################################## + +# dat +library(zeallot) +c(exp, obs) %<-% lonlat_temp + +# dat1 +exp1 <- list(data = array(1:20, dim = c(time = 20))) +class(exp1) <- 's2dv_cube' +obs1 <- list(data = array(1:20, dim = c(time = 20))) +class(obs1) <- 's2dv_cube' +exp1_2 <- list(data = array(1:20, dim = c(20))) +class(exp1_2) <- 's2dv_cube' +obs1_2 <- list(data = array(1:20, dim = c(20))) +class(obs1_2) <- 's2dv_cube' +exp_cor1 <- list(data = array(1:20, dim = c(20))) +class(exp_cor1) <- 's2dv_cube' + +# dat2 +exp2 <- exp +exp2$data[1, 2, 1, 1, 1, 1] <- NA +obs2 <- obs +obs2$data[1, 1, 2, 1, 1, 1] <- NA + +# dat3 +set.seed(1) +exp3 <- array(rnorm(400), dim = c(member = 10, sdate = 10, lat = 2, dataset = 2)) +exp3_1 <- array(rnorm(20), dim = c(member = 10, sdate = 10, lat = 2)) +set.seed(2) +obs3 <- array(rnorm(60), dim = c(sdate = 10, lat = 2, dataset = 3)) +obs3_1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2)) +set.seed(3) +exp_cor3 <- array(rnorm(20), dim = c(member = 10, sdate = 10, lat = 2)) + +# dat4 +set.seed(1) +exp4 <- array(rnorm(200), dim = c(member = 10, sdate = 20)) +exp4_1 <- array(exp4, dim = c(member = 10, sdate = 20, dataset = 1)) +exp4_2 <- exp4_1 +exp4_2[1] <- NA +set.seed(2) +obs4 <- array(rnorm(200), dim = c(sdate = 20)) +obs4_1 <- array(obs4, dim = c(sdate = 20, dataset = 1)) +obs4_2 <- obs4_1 +obs4_2[c(1, 3)] <- NA +set.seed(3) +exp_cor4 <- array(rnorm(200), dim = c(member = 10, sdate = 20)) + +############################################## + +test_that("1. Input checks", { + # s2dv_cube expect_error( CST_Calibration(exp = 1), - "Parameter 'exp' and 'obs' must be of the class 's2dv_cube', " + paste0("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") ) expect_error( - CST_Calibration(obs = 1), - c("argument \"exp\" is missing, with no default") + CST_Calibration(exp = exp, obs = 1), + paste0("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + ) + expect_error( + CST_Calibration(exp = exp1), + 'argument "obs" is missing, with no default' + ) + expect_error( + CST_Calibration(exp = exp1, obs = obs1, exp_cor = 1), + paste0("Parameter 'exp', 'obs' and 'exp_cor' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + ) + # exp and obs + expect_error( + CST_Calibration(exp = exp1_2, obs = obs1), + "Parameter 'exp' must have dimension names." + ) + expect_error( + CST_Calibration(exp = exp1, obs = obs1_2), + "Parameter 'obs' must have dimension names." ) - library(zeallot) - c(exp, obs) %<-% lonlat_temp - cal <- CST_Calibration(exp = exp, obs = obs) - expect_equal(length(cal), 9) - expect_equal(as.numeric(dim(cal$data)), as.numeric(dim(exp$data))) - expect_equal(cal$lat, exp$lat) - expect_equal(cal$lat, obs$lat) - expect_equal(cal$lon, exp$lon) - expect_equal(cal$lon, obs$lon) - # expect_error( - # CST_Calibration(exp = exp, obs = exp), - # "The length of the dimension 'member' in the component 'data' " - # ) - - exp2 <- exp - exp2$data[1, 2, 1, 1, 1, 1] <- NA expect_warning( - CST_Calibration(exp = exp2, obs = obs), + CST_Calibration(exp = exp2, obs = obs2), "Parameter 'exp' contains NA values." ) - - obs2 <- obs - obs2$data[1, 1, 2, 1, 1, 1] <- NA expect_warning( CST_Calibration(exp = exp, obs = obs2), "Parameter 'obs' contains NA values." ) - expect_warning( CST_Calibration(exp = exp2, obs = obs2), - "Parameter 'obs' contains NA values", "Parameter 'exp' contains NA values." + "Parameter 'obs' contains NA values", + "Parameter 'exp' contains NA values." + ) + # exp_cor + expect_error( + CST_Calibration(exp = exp1, obs = obs1, exp_cor = exp_cor1, sdate_dim = 'time'), + "Parameter 'exp_cor' must have dimension names." + ) + # sdate_dim, memb_dim + expect_error( + CST_Calibration(exp = exp1, obs = obs1, sdate_dim = 1), + paste0("Parameter 'sdate_dim' should be a character string indicating the", + "name of the dimension where start dates are stored in 'exp'.") + ) + expect_error( + CST_Calibration(exp = exp, obs = obs, sdate_dim = 'time'), + paste0("Parameter 'exp' requires 'sdate_dim' and 'memb_dim' dimensions.") + ) + expect_error( + Calibration(exp = array(1:20, dim = c(time = 1, member = 1)), + obs = array(1:20, dim = c(time = 2, member = 1)), sdate_dim = 'time'), + paste0("Parameter 'exp' and 'obs' must have same length of all dimensions", + " except 'memb_dim' and 'dat_dim'.") + ) + expect_error( + CST_Calibration(exp = exp1, obs = obs1, sdate_dim = 'time'), + paste0("Parameter 'exp' requires 'sdate_dim' and 'memb_dim' dimensions.") + ) + ## dat_dim + expect_error( + Calibration(exp = exp3, obs = obs3, dat_dim = 1), + paste0("Parameter 'dat_dim' must be a character string.") + ) + ## exp, obs, and exp_cor (2) + expect_error( + Calibration(exp = array(1:6, c(sdate = 3, member = 2, dataset = 2, lon = 1)), + obs = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 2)), + dat_dim = 'dataset'), + paste0("Parameter 'exp' and 'obs' must have same length of all dimensions", + " except 'memb_dim' and 'dat_dim'.") + ) + expect_error( + Calibration(exp = array(1:6, c(sdate = 3, member = 2, dataset = 2, lon = 1)), + obs = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 1)), + exp_cor = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 1)), + dat_dim = 'dataset'), + paste0("If parameter 'exp_cor' has dataset dimension, it must be", + " equal to dataset dimension of 'exp'.") + ) + expect_error( + Calibration(exp = array(1:6, c(sdate = 3, member = 2, dataset = 2, lon = 1)), + obs = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 1)), + exp_cor = array(1:6, c(sdate = 3, member = 1, lon = 1)), dat_dim = 'dataset'), + paste0("Parameter 'exp' and 'exp_cor' must have the same length of ", + "all dimensions except 'dat_dim' if there is ", + "only one reference dataset.") + ) + ## na.rm + expect_warning( + CST_Calibration(exp = exp, obs = obs, na.rm = c(T,F)), + "Paramter 'na.rm' has length greater than 1, and only the fist element is used." + ) + # ncores + expect_error( + CST_Calibration(exp = exp, obs = obs, ncores = TRUE), + "Parameter 'ncores' must be either NULL or a positive integer." + ) +}) +############################################## + +test_that("2. Output checks: dat1", { + expect_error( + CST_Calibration(exp = 1), + "Parameter 'exp' and 'obs' must be of the class 's2dv_cube', " + ) + expect_error( + CST_Calibration(obs = 1), + c("argument \"exp\" is missing, with no default") + ) + cal <- CST_Calibration(exp = exp, obs = obs) + expect_equal( + length(cal), + 9 + ) + expect_equal( + as.numeric(dim(cal$data)), + as.numeric(dim(exp$data)) + ) + expect_equal( + cal$lat, + exp$lat + ) + expect_equal( + cal$lat, + obs$lat + ) + expect_equal( + cal$lon, + exp$lon + ) + expect_equal( + cal$lon, + obs$lon + ) + expect_equal( + dim(cal$data), + c(dataset = 1, member =15, sdate = 6, ftime = 3, lat = 22, lon = 53) + ) + expect_equal( + as.vector(cal$data)[1:5], + c(280.8678, 281.1716, 280.3992, 282.6034, 281.6749), + tolerance = 0.0001 ) }) + +############################################## + +test_that("3. Output checks: dat3", { + expect_equal( + dim(Calibration(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')), + c(member = 10, sdate = 10, lat = 2, nexp = 2, nobs = 3) + ) + expect_equal( + dim(Calibration(exp3, obs3, exp_cor3, memb_dim = 'member', dat_dim = 'dataset')), + c(member = 10, sdate = 10, lat = 2, nexp = 2, nobs = 3) + ) + expect_equal( + as.vector(Calibration(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset'))[5:10], + c(0.5462052, -0.6882557, 0.7157284, 0.9850565, 0.8105716, -0.1353346), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp3, obs3, exp_cor3, memb_dim = 'member', dat_dim = 'dataset'))[5:10], + c(0.6156021, 0.4377728, 0.4971288, 1.6040794, -0.9029668, 1.7659136), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp3[, , , 1], obs3[, , 1], memb_dim = 'member'))[1:5], + as.vector(Calibration(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')[, , , 1, 1][1:5]), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp3[, , , 1], obs3[, , 1], exp_cor3, memb_dim = 'member'))[1:5], + as.vector(Calibration(exp3, obs3, exp_cor3, memb_dim = 'member', dat_dim = 'dataset')[, , , 1, 1][1:5]) + ) + expect_equal( + as.vector(Calibration(exp3, obs3, exp_cor3, memb_dim = 'member', dat_dim = 'dataset', na.rm = TRUE))[1:5], + c(-0.6271675, 0.0914192, 0.6832362, -0.8313392, 0.6156021), + tolerance = 0.0001 + ) +}) + +############################################## +test_that("4. Output checks: dat4", { + expect_equal( + dim(Calibration(exp4, obs4)), + c(member = 10, sdate = 20) + ) + expect_equal( + as.vector(Calibration(exp4, obs4))[1:5], + c(-0.6372505, 0.3261267, -0.8860036, 2.0048627, 0.4995904), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp4, obs4, exp_cor4))[1:5], + c(-0.8226572, -0.0265896, 0.6290395, -1.0488434, 0.5541127), + tolerance = 0.0001 + ) + expect_equal( + dim(Calibration(exp4_1, obs4_1, dat_dim = 'dataset')), + c(member = 10, sdate = 20, nexp = 1, nobs = 1) + ) + expect_equal( + as.vector(Calibration(exp4_1, obs4_1, dat_dim = 'dataset')), + as.vector(Calibration(exp4, obs4)), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp4_1, obs4_1, exp_cor4, dat_dim = 'dataset')), + as.vector(Calibration(exp4, obs4, exp_cor4)), + tolerance = 0.0001 + ) + expect_equal( + suppressWarnings( + as.vector(Calibration(exp4_1, obs4_2, dat_dim = 'dataset', na.rm = FALSE)) + ), + rep(as.numeric(NA), 200) + ) + expect_equal( + suppressWarnings( + as.vector(Calibration(exp4_1, obs4_2, dat_dim = 'dataset', na.rm = T))[5:10] + ), + c(0.4343443, -0.9018916, 0.6178439, 0.9093767, 0.7205064, -0.3033850), + tolerance = 0.0001 + ) + expect_equal( + suppressWarnings( + as.vector(Calibration(exp4_2, obs4_2, exp_cor4, dat_dim = 'dataset', na.rm = T))[5:10] + ), + c(0.4590841, 0.2665937, 0.3308434, 1.5290565, -1.1846831, 1.7042331), + tolerance = 0.0001 + ) +}) -- GitLab From 14680676b17da0385e15babb890819c4f977ce7f Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 25 Nov 2022 18:06:02 +0100 Subject: [PATCH 03/13] Fix pipeline --- R/CST_Calibration.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index df5aaebc..d515fe6a 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -383,12 +383,12 @@ Calibration <- function(exp, obs, exp_cor = NULL, } else { target_dims_cor <- c(memb_dim, sdate_dim) } - if (!identical(length(name_exp), length(name_exp_cor)) | - !identical(dim(exp)[name_exp], dim(exp_cor)[name_exp_cor])) { - stop(paste0("Parameter 'exp' and 'exp_cor' must have the same length of ", - "all dimensions except 'dat_dim' if there is ", - "only one reference dataset.")) - } + # if (!identical(length(name_exp), length(name_exp_cor)) | + # !identical(dim(exp)[name_exp], dim(exp_cor)[name_exp_cor])) { + # stop(paste0("Parameter 'exp' and 'exp_cor' must have the same length of ", + # "all dimensions except 'dat_dim' if there is ", + # "only one reference dataset.")) + # } } ## ncores if (!is.null(ncores)) { -- GitLab From 48223ae63afbcad86f0aaf698ed8e5038ab74592 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 25 Nov 2022 18:19:12 +0100 Subject: [PATCH 04/13] Fix pipeline --- R/CST_Calibration.R | 16 ++++++++-------- man/CST_Calibration.Rd | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index d515fe6a..ef37cd68 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -84,9 +84,9 @@ #' #'# Example 2: #'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) -#'mod2 <- 1 : (1 * 3 * 1 * 5 * 6 * 7) +#'mod2 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) #'dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) -#'dim(mod2) <- c(dataset = 1, member = 3, sdate = 1, ftime = 5, lat = 6, lon = 7) +#'dim(mod2) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) #'obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7) #'dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) #'lon <- seq(0, 30, 5) @@ -383,12 +383,12 @@ Calibration <- function(exp, obs, exp_cor = NULL, } else { target_dims_cor <- c(memb_dim, sdate_dim) } - # if (!identical(length(name_exp), length(name_exp_cor)) | - # !identical(dim(exp)[name_exp], dim(exp_cor)[name_exp_cor])) { - # stop(paste0("Parameter 'exp' and 'exp_cor' must have the same length of ", - # "all dimensions except 'dat_dim' if there is ", - # "only one reference dataset.")) - # } + if (!identical(length(name_exp), length(name_exp_cor)) | + !identical(dim(exp)[name_exp], dim(exp_cor)[name_exp_cor])) { + stop(paste0("Parameter 'exp' and 'exp_cor' must have the same length of ", + "all dimensions except 'dat_dim' if there is ", + "only one reference dataset.")) + } } ## ncores if (!is.null(ncores)) { diff --git a/man/CST_Calibration.Rd b/man/CST_Calibration.Rd index 6affe1d6..e039de08 100644 --- a/man/CST_Calibration.Rd +++ b/man/CST_Calibration.Rd @@ -115,9 +115,9 @@ str(a) # Example 2: mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) -mod2 <- 1 : (1 * 3 * 1 * 5 * 6 * 7) +mod2 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) -dim(mod2) <- c(dataset = 1, member = 3, sdate = 1, ftime = 5, lat = 6, lon = 7) +dim(mod2) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7) dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) lon <- seq(0, 30, 5) -- GitLab From 50d461c9c69a7ea618caabb45c800e3065638ee8 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Mon, 28 Nov 2022 17:43:41 +0100 Subject: [PATCH 05/13] Correct dat_dim development for method rcp-based and added tests --- R/CST_Calibration.R | 53 ++++++++++--------- man/CST_Calibration.Rd | 4 +- man/Calibration.Rd | 10 ++-- tests/testthat/test-CST_Calibration.R | 74 +++++++++++++++++++-------- 4 files changed, 89 insertions(+), 52 deletions(-) diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index ef37cd68..6c23ef83 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -84,9 +84,9 @@ #' #'# Example 2: #'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) -#'mod2 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) +#'mod2 <- 1 : (1 * 3 * 1 * 5 * 6 * 7) #'dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) -#'dim(mod2) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) +#'dim(mod2) <- c(dataset = 1, member = 3, sdate = 1, ftime = 5, lat = 6, lon = 7) #'obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7) #'dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) #'lon <- seq(0, 30, 5) @@ -117,7 +117,7 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", } dimnames <- names(dim(exp_cor$data)) # if exp_cor is provided, it will be calibrated: "calibrate forecast instead of hindcast" - # if exp_cor is provided, eval.method is overrruled (because if exp_cor is provided, the + # if exp_cor is provided, eval.method is overruled (because if exp_cor is provided, the # train data will be all data of "exp" and the evalutaion data will be all data of "exp_cor"; # no need for "leave-one-out" or "in-sample") eval.method = "hindcast-vs-forecast" @@ -198,9 +198,8 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #'@param exp_cor An optional multidimensional array with named dimensions (at #' least 'sdate' and 'member') containing the seasonal forecast experiment #' data. If the forecast is provided, it will be calibrated using the hindcast -#' and observations; if not, the hindcast will be calibrated instead. The -#' dimensions must be the same as 'exp' except dataset dimension. If there is -#' only one corrected dataset, it should not have dataset dimension. If there +#' and observations; if not, the hindcast will be calibrated instead. If there +#' is only one corrected dataset, it should not have dataset dimension. If there #' is a corresponding corrected dataset for each 'exp' forecast, the dataset #' dimension must have the same length as in 'exp'. The default value is NULL. #'@param cal.method A character string indicating the calibration method used, @@ -240,8 +239,10 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #' The default value is NULL. #'@param ncores An integer that indicates the number of cores for parallel #' computation using multiApply function. The default value is NULL (one core). -#'@return an array containing the calibrated forecasts with the same dimensions -#'as the \code{exp} array. +#' +#'@return An array containing the calibrated forecasts with the same dimensions +#'as the \code{exp} array. If \code{exp_cor} is provided the returned array will +#'be with the same dimensions as \code{exp_cor}. #' #'@importFrom s2dv InsertDim MeanDims Reorder #'@import abind @@ -298,6 +299,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, } ## exp_cor if (!is.null(exp_cor)) { + eval.method = "hindcast-vs-forecast" expcordims <- names(dim(exp_cor)) if (is.null(expcordims)) { stop("Parameter 'exp_cor' must have dimension names.") @@ -379,15 +381,15 @@ Calibration <- function(exp, obs, exp_cor = NULL, } else { target_dims_cor <- c(memb_dim, sdate_dim) } - name_exp <- name_exp[-which(name_exp == dat_dim)] } else { target_dims_cor <- c(memb_dim, sdate_dim) } + name_exp <- name_exp[-which(name_exp %in% target_dims_exp)] + name_exp_cor <- name_exp_cor[-which(name_exp_cor %in% target_dims_cor)] if (!identical(length(name_exp), length(name_exp_cor)) | !identical(dim(exp)[name_exp], dim(exp_cor)[name_exp_cor])) { stop(paste0("Parameter 'exp' and 'exp_cor' must have the same length of ", - "all dimensions except 'dat_dim' if there is ", - "only one reference dataset.")) + "all common dimensions except 'dat_dim', 'sdate_dim' and 'memb_dim'.")) } } ## ncores @@ -530,7 +532,11 @@ Calibration <- function(exp, obs, exp_cor = NULL, for (j in 1:nobs) { if (!.data.set.sufficiently.large(exp = exp[, , i], obs = obs[, j])) { return_data_na <- TRUE - var.cor.fc <- NA*exp[, , i] + if (cor_dat_dim) { + var.cor.fc <- NA * exp_cor[, , i] + } else { + var.cor.fc <- NA * exp_cor[, i] + } if (!na.fill) { var.cor.fc[, , i] <- exp[, , i] } @@ -546,7 +552,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, sdate_cor <- expdims_cor[2] if (!return_data_na) { - var.cor.fc <- array(dim = c(dim(exp)[1:2], nexp = nexp, nobs = nobs)) + var.cor.fc <- array(dim = c(dim(exp_cor)[1:2], nexp = nexp, nobs = nobs)) for (i in 1:nexp) { for (j in 1:nobs) { obs_data <- as.vector(obs[, j]) @@ -565,7 +571,6 @@ Calibration <- function(exp, obs, exp_cor = NULL, eval.dexes <- eval.train.dexeses[[i.sample]]$eval.dexes train.dexes <- eval.train.dexeses[[i.sample]]$train.dexes - fc.ev <- expcor_data[ , eval.dexes, drop = FALSE] fc.tr <- exp_data[ , train.dexes] obs.tr <- obs_data[train.dexes , drop = FALSE] @@ -599,16 +604,14 @@ Calibration <- function(exp, obs, exp_cor = NULL, r.tr <- cor(x = ens_mean.tr, y = obs.tr, method = 'pearson', use = ifelse(test = isTRUE(na.rm), yes = "pairwise.complete.obs", no = "everything")) ## Correlation between observations and the ensemble mean - if (!is.null(apply_to)) { - if ((apply_to == 'all') || (apply_to == 'sign' && - cor.test(ens_mean.tr, obs.tr, method = 'pearson', alternative = 'greater')$p.value < alpha)) { - ens_mean_cal <- (ens_mean.ev - exp_mean.tr) * r.tr * sqrt(var_obs.tr) / sqrt(var_signal.tr) + exp_mean.tr - var.cor.fc[ , eval.dexes, i, j] <- s2dv::Reorder(data = Apply(data = list(exp = fc.ev, ens_mean = ens_mean.ev, - ens_mean_cal = ens_mean_cal), - target_dims = names(sdate), fun = .CalibrationMembersRPC, - var_obs = var_obs.tr, var_noise = var_noise.tr, r = r.tr)$output1, - order = names(expdims)) - } + if ((apply_to == 'all') || (apply_to == 'sign' && + cor.test(ens_mean.tr, obs.tr, method = 'pearson', alternative = 'greater')$p.value < alpha)) { + ens_mean_cal <- (ens_mean.ev - exp_mean.tr) * r.tr * sqrt(var_obs.tr) / sqrt(var_signal.tr) + exp_mean.tr + var.cor.fc[ , eval.dexes, i, j] <- s2dv::Reorder(data = Apply(data = list(exp = fc.ev, ens_mean = ens_mean.ev, + ens_mean_cal = ens_mean_cal), + target_dims = names(sdate), fun = .CalibrationMembersRPC, + var_obs = var_obs.tr, var_noise = var_noise.tr, r = r.tr)$output1, + order = names(expdims)[1:2]) } else { var.cor.fc[ , eval.dexes, i, j] <- array(data = mean(obs.tr, na.rm = na.rm), dim = dim(fc.ev)) } @@ -621,7 +624,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, } if (is.null(dat_dim)) { - dim(var.cor.fc) <- dim(exp)[1:2] + dim(var.cor.fc) <- dim(exp_cor)[1:2] } return(var.cor.fc) diff --git a/man/CST_Calibration.Rd b/man/CST_Calibration.Rd index e039de08..6affe1d6 100644 --- a/man/CST_Calibration.Rd +++ b/man/CST_Calibration.Rd @@ -115,9 +115,9 @@ str(a) # Example 2: mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) -mod2 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) +mod2 <- 1 : (1 * 3 * 1 * 5 * 6 * 7) dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) -dim(mod2) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) +dim(mod2) <- c(dataset = 1, member = 3, sdate = 1, ftime = 5, lat = 6, lon = 7) obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7) dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) lon <- seq(0, 30, 5) diff --git a/man/Calibration.Rd b/man/Calibration.Rd index a1cbdee9..d2b49327 100644 --- a/man/Calibration.Rd +++ b/man/Calibration.Rd @@ -33,9 +33,8 @@ containing the observed data.} \item{exp_cor}{An optional multidimensional array with named dimensions (at least 'sdate' and 'member') containing the seasonal forecast experiment data. If the forecast is provided, it will be calibrated using the hindcast -and observations; if not, the hindcast will be calibrated instead. The -dimensions must be the same as 'exp' except dataset dimension. If there is -only one corrected dataset, it should not have dataset dimension. If there +and observations; if not, the hindcast will be calibrated instead. If there +is only one corrected dataset, it should not have dataset dimension. If there is a corresponding corrected dataset for each 'exp' forecast, the dataset dimension must have the same length as in 'exp'. The default value is NULL.} @@ -88,8 +87,9 @@ The default value is NULL.} computation using multiApply function. The default value is NULL (one core).} } \value{ -an array containing the calibrated forecasts with the same dimensions -as the \code{exp} array. +An array containing the calibrated forecasts with the same dimensions +as the \code{exp} array. If \code{exp_cor} is provided the returned array will +be with the same dimensions as \code{exp_cor}. } \description{ Five types of member-by-member bias correction can be performed. diff --git a/tests/testthat/test-CST_Calibration.R b/tests/testthat/test-CST_Calibration.R index bab3aad5..803c8fbe 100644 --- a/tests/testthat/test-CST_Calibration.R +++ b/tests/testthat/test-CST_Calibration.R @@ -27,12 +27,12 @@ obs2$data[1, 1, 2, 1, 1, 1] <- NA # dat3 set.seed(1) exp3 <- array(rnorm(400), dim = c(member = 10, sdate = 10, lat = 2, dataset = 2)) -exp3_1 <- array(rnorm(20), dim = c(member = 10, sdate = 10, lat = 2)) +exp3_1 <- array(rnorm(200), dim = c(member = 10, sdate = 10, lat = 2)) set.seed(2) obs3 <- array(rnorm(60), dim = c(sdate = 10, lat = 2, dataset = 3)) obs3_1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2)) set.seed(3) -exp_cor3 <- array(rnorm(20), dim = c(member = 10, sdate = 10, lat = 2)) +exp_cor3 <- array(rnorm(200), dim = c(member = 10, sdate = 10, lat = 2)) # dat4 set.seed(1) @@ -41,7 +41,7 @@ exp4_1 <- array(exp4, dim = c(member = 10, sdate = 20, dataset = 1)) exp4_2 <- exp4_1 exp4_2[1] <- NA set.seed(2) -obs4 <- array(rnorm(200), dim = c(sdate = 20)) +obs4 <- array(rnorm(20), dim = c(sdate = 20)) obs4_1 <- array(obs4, dim = c(sdate = 20, dataset = 1)) obs4_2 <- obs4_1 obs4_2[c(1, 3)] <- NA @@ -80,19 +80,11 @@ test_that("1. Input checks", { CST_Calibration(exp = exp1, obs = obs1_2), "Parameter 'obs' must have dimension names." ) - expect_warning( - CST_Calibration(exp = exp2, obs = obs2), - "Parameter 'exp' contains NA values." - ) - expect_warning( - CST_Calibration(exp = exp, obs = obs2), - "Parameter 'obs' contains NA values." - ) expect_warning( CST_Calibration(exp = exp2, obs = obs2), "Parameter 'obs' contains NA values", "Parameter 'exp' contains NA values." - ) + ) # exp_cor expect_error( CST_Calibration(exp = exp1, obs = obs1, exp_cor = exp_cor1, sdate_dim = 'time'), @@ -142,12 +134,11 @@ test_that("1. Input checks", { expect_error( Calibration(exp = array(1:6, c(sdate = 3, member = 2, dataset = 2, lon = 1)), obs = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 1)), - exp_cor = array(1:6, c(sdate = 3, member = 1, lon = 1)), dat_dim = 'dataset'), + exp_cor = array(1:6, c(sdate = 3, member = 2, lon = 2)), dat_dim = 'dataset'), paste0("Parameter 'exp' and 'exp_cor' must have the same length of ", - "all dimensions except 'dat_dim' if there is ", - "only one reference dataset.") + "all common dimensions except 'dat_dim', 'sdate_dim' and 'memb_dim'.") ) - ## na.rm + # na.rm expect_warning( CST_Calibration(exp = exp, obs = obs, na.rm = c(T,F)), "Paramter 'na.rm' has length greater than 1, and only the fist element is used." @@ -157,6 +148,11 @@ test_that("1. Input checks", { CST_Calibration(exp = exp, obs = obs, ncores = TRUE), "Parameter 'ncores' must be either NULL or a positive integer." ) + # cal.method + expect_error( + as.vector(Calibration(exp4, obs4, cal.method = 'biass')), + "Parameter 'cal.method' must be a character string indicating the calibration method used." + ) }) ############################################## @@ -223,7 +219,7 @@ test_that("3. Output checks: dat3", { ) expect_equal( as.vector(Calibration(exp3, obs3, exp_cor3, memb_dim = 'member', dat_dim = 'dataset'))[5:10], - c(0.6156021, 0.4377728, 0.4971288, 1.6040794, -0.9029668, 1.7659136), + c(0.4908992, 0.3054365, 0.3673404, 1.5218074, -1.0928551, 1.6905885), tolerance = 0.0001 ) expect_equal( @@ -237,7 +233,7 @@ test_that("3. Output checks: dat3", { ) expect_equal( as.vector(Calibration(exp3, obs3, exp_cor3, memb_dim = 'member', dat_dim = 'dataset', na.rm = TRUE))[1:5], - c(-0.6271675, 0.0914192, 0.6832362, -0.8313392, 0.6156021), + c(-0.80521700, -0.05578454, 0.56143657, -1.01815286, 0.49089916), tolerance = 0.0001 ) }) @@ -255,7 +251,7 @@ test_that("4. Output checks: dat4", { ) expect_equal( as.vector(Calibration(exp4, obs4, exp_cor4))[1:5], - c(-0.8226572, -0.0265896, 0.6290395, -1.0488434, 0.5541127), + c(-0.88657225, -0.08128503, 0.58193721, -1.11537810, 0.50614269), tolerance = 0.0001 ) expect_equal( @@ -289,7 +285,45 @@ test_that("4. Output checks: dat4", { suppressWarnings( as.vector(Calibration(exp4_2, obs4_2, exp_cor4, dat_dim = 'dataset', na.rm = T))[5:10] ), - c(0.4590841, 0.2665937, 0.3308434, 1.5290565, -1.1846831, 1.7042331), + c(0.4583975, 0.2645130, 0.3292279, 1.5361189, -1.1972745, 1.7125642), + tolerance = 0.0001 + ) +}) +############################################## +test_that("5. Output checks: dat4", { + expect_equal( + as.vector(Calibration(exp4, obs4, cal.method = 'bias'))[1:7], + c(-0.4039514, 0.4061457, -0.6131262, 1.8177832, 0.5520101, -0.5979660, 0.7099314), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp4, obs4, cal.method = 'evmos'))[1:7], + c(-0.4860875, 0.4252999, -0.7214164, 2.0134410, 0.5894025, -0.7043606, 0.7670694), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp4, obs4, cal.method = 'mse_min', multi.model = TRUE))[1:7], + c(-0.5932137, 0.3231407, -0.8298251, 1.9199372, 0.4881377, -0.8126764, 0.6667729), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp4, obs4, cal.method = 'crps_min'))[1:7], + c(-0.6381684, 0.3261889, -0.8871746, 2.0066329, 0.4998291, -0.8691275, 0.6878220), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp4, obs4, cal.method = 'rpc-based', alpha = 0.5, apply_to = 'sign'))[1:7], + c(-0.8597528, 0.1036243, -1.1085060, 1.7823603, 0.2770880, -1.0904773, 0.4648899), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp4, obs4, exp_cor4, cal.method = 'rpc-based', alpha = 0.5, apply_to = 'sign'))[1:7], + c(-1.0464936, -0.2412064, 0.4220158, -1.2752995, 0.3462213, 0.1469362, 0.2134538), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp4, obs4, eval.method = "in-sample"))[1:5], + c(-0.7119142, 0.2626203, -0.9635483, 1.9607986, 0.4380930), tolerance = 0.0001 ) }) -- GitLab From 6b5eb3d4c0b49303b567efe6d92cbf1ee881b18d Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 29 Nov 2022 10:32:07 +0100 Subject: [PATCH 06/13] Add tests, improve documentation and improve code --- R/CST_Calibration.R | 207 +++++++++++++++++--------- man/CST_Calibration.Rd | 87 ++++++++--- man/Calibration.Rd | 8 +- tests/testthat/test-CST_Calibration.R | 45 +++++- 4 files changed, 245 insertions(+), 102 deletions(-) diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index 6c23ef83..34fb8809 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -2,25 +2,40 @@ #' #'@author Verónica Torralba, \email{veronica.torralba@bsc.es} #'@author Bert Van Schaeybroeck, \email{bertvs@meteo.be} -#'@description Equivalent to function \code{Calibration} but for objects of -#'class \code{s2dv_cube}. +#'@description Five types of member-by-member bias correction can be performed. +#'The \code{"bias"} method corrects the bias only, the \code{"evmos"} method +#'applies a variance inflation technique to ensure the correction of the bias +#'and the correspondence of variance between forecast and observation (Van +#'Schaeybroeck and Vannitsem, 2011). The ensemble calibration methods +#'\code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast +#'variance and the ensemble spread as described in Doblas-Reyes et al. (2005) +#'and Van Schaeybroeck and Vannitsem (2015), respectively. While the +#'\code{"mse_min"} method minimizes a constrained mean-squared error using three +#'parameters, the \code{"crps_min"} method features four parameters and +#'minimizes the Continuous Ranked Probability Score (CRPS). The +#'\code{"rpc-based"} method adjusts the forecast variance ensuring that the +#'ratio of predictable components (RPC) is equal to one, as in Eade et al. +#'(2014). +#'@description It is equivalent to function \code{Calibration} but for objects +#'of class \code{s2dv_cube}. #' #'@param exp An object of class \code{s2dv_cube} as returned by \code{CST_Load} -#' function, containing the seasonal hindcast experiment data in the element -#' named \code{$data}. The hindcast is used to calibrate the forecast in case -#' the forecast is provided; if not, the same hindcast will be calibrated -#' instead. +#' function with at least 'sdate' and 'member' dimensions, containing the +#' seasonal hindcast experiment data in the element named \code{data}. The +#' hindcast is used to calibrate the forecast in case the forecast is provided; +#' if not, the same hindcast will be calibrated instead. #'@param obs An object of class \code{s2dv_cube} as returned by \code{CST_Load} -#' function, containing the observed data in the element named \code{$data}. +#' function with at least 'sdate' dimension, containing the observed data in +#' the element named \code{$data}. #'@param exp_cor An optional object of class \code{s2dv_cube} as returned by -#' \code{CST_Load} function, containing the seasonal forecast experiment data -#' in the element named \code{$data}. If the forecast is provided, it will be -#' calibrated using the hindcast and observations; if not, the hindcast will -#' be calibrated instead. The dimensions must be the same as 'exp' except -#' dataset dimension. If there is only one corrected dataset, it should not -#' have dataset dimension. If there is a corresponding corrected dataset for -#' each 'exp' forecast, the dataset dimension must have the same length as in -#' 'exp'. The default value is NULL. +#' \code{CST_Load} function with at least 'sdate' and 'member' dimensions, +#' containing the seasonal forecast experiment data in the element named +#' \code{data}. If the forecast is provided, it will be calibrated using the +#' hindcast and observations; if not, the hindcast will be calibrated instead. +#' If there is only one corrected dataset, it should not have dataset dimension. +#' If there is a corresponding corrected dataset for each 'exp' forecast, the +#' dataset dimension must have the same length as in 'exp'. The default value +#' is NULL. #'@param cal.method A character string indicating the calibration method used, #' can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or #' \code{rpc-based}. Default value is \code{mse_min}. @@ -48,7 +63,8 @@ #' correlation between the ensemble mean and the observations is statistically #' significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}. #'@param alpha A numeric value indicating the significance level for the -#' correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}. +#' correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to +#' == "sign"}. #'@param memb_dim A character string indicating the name of the member dimension. #' By default, it is set to 'member'. #'@param sdate_dim A character string indicating the name of the start date @@ -58,15 +74,39 @@ #' The default value is NULL. #'@param ncores An integer that indicates the number of cores for parallel #' computations using multiApply function. The default value is one. -#'@return An object of class \code{s2dv_cube} containing the calibrated forecasts -#' in the element \code{$data} with the same dimensions as the one in the exp -#' object. -#' -#'@importFrom s2dv InsertDim -#'@import abind +#' +#'@return An object of class \code{s2dv_cube} containing the calibrated +#'forecasts in the element \code{data} with the same dimensions as in the 'exp' +#'object. If 'exp_cor' is provided the returned array will be with the same +#'dimensions as 'exp_cor'. #' +#'@details Both the \code{na.fill} and \code{na.rm} parameters can be used to +#'indicate how the function has to handle the NA values. The \code{na.fill} +#'parameter checks whether there are more than three forecast-observations pairs +#'to perform the computation. In case there are three or less pairs, the +#'computation is not carried out, and the value returned by the function depends +#'on the value of this parameter (either NA if \code{na.fill == TRUE} or the +#'uncorrected value if \code{na.fill == TRUE}). On the other hand, \code{na.rm} +#'is used to indicate the function whether to remove the missing values during +#'the computation of the parameters needed to perform the calibration. +#' +#'@references Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the +#'success of multi-model ensembles in seasonal forecasting-II calibration and +#'combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x +#'@references Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N., +#'Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate +#'predictions underestimate the predictability of the read world? Geophysical +#'Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146 +#'@references Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing +#'through linear regression. Nonlinear Processes in Geophysics, 18(2), +#'147. doi:10.5194/npg-18-147-2011 +#'@references Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble +#'post-processing using member-by-member approaches: theoretical aspects. +#'Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818. +#'doi:10.1002/qj.2397 +#' #'@seealso \code{\link{CST_Load}} -#' +#' #'@examples #'# Example 1: #'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) @@ -99,8 +139,11 @@ #'attr(exp_cor, 'class') <- 's2dv_cube' #'a <- CST_Calibration(exp = exp, obs = obs, exp_cor = exp_cor, cal.method = "evmos") #'str(a) +#' +#'@importFrom s2dv InsertDim Reorder +#'@import multiApply +#'@importFrom ClimProjDiags Subset #'@export - CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", eval.method = "leave-one-out", multi.model = FALSE, na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL, @@ -174,20 +217,6 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #'(2014). #'@description Both in-sample or our out-of-sample (leave-one-out cross #'validation) calibration are possible. -#'@references Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the -#'success of multi-model ensembles in seasonal forecasting-II calibration and -#'combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x -#'@references Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N., -#'Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate -#'predictions underestimate the predictability of the read world? Geophysical -#'Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146 -#'@references Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing -#'through linear regression. Nonlinear Processes in Geophysics, 18(2), -#'147. doi:10.5194/npg-18-147-2011 -#'@references Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble -#'post-processing using member-by-member approaches: theoretical aspects. -#'Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818. -#'doi:10.1002/qj.2397 #' #'@param exp A multidimensional array with named dimensions (at least 'sdate' #' and 'member') containing the seasonal hindcast experiment data. The hindcast @@ -229,7 +258,8 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #' correlation between the ensemble mean and the observations is statistically #' significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}. #'@param alpha A numeric value indicating the significance level for the -#' correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}. +#' correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == +#' "sign"}. #'@param memb_dim A character string indicating the name of the member #' dimension. By default, it is set to 'member'. #'@param sdate_dim A character string indicating the name of the start date @@ -241,15 +271,8 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #' computation using multiApply function. The default value is NULL (one core). #' #'@return An array containing the calibrated forecasts with the same dimensions -#'as the \code{exp} array. If \code{exp_cor} is provided the returned array will -#'be with the same dimensions as \code{exp_cor}. -#' -#'@importFrom s2dv InsertDim MeanDims Reorder -#'@import abind -#'@import multiApply -#'@importFrom ClimProjDiags Subset -#' -#'@seealso \code{\link{CST_Load}} +#'as the 'exp' array. If 'exp_cor' is provided the returned array will +#'be with the same dimensions as 'exp_cor'. #' #'@details Both the \code{na.fill} and \code{na.rm} parameters can be used to #'indicate how the function has to handle the NA values. The \code{na.fill} @@ -261,6 +284,23 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #'is used to indicate the function whether to remove the missing values during #'the computation of the parameters needed to perform the calibration. #' +#'@references Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the +#'success of multi-model ensembles in seasonal forecasting-II calibration and +#'combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x +#'@references Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N., +#'Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate +#'predictions underestimate the predictability of the read world? Geophysical +#'Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146 +#'@references Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing +#'through linear regression. Nonlinear Processes in Geophysics, 18(2), +#'147. doi:10.5194/npg-18-147-2011 +#'@references Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble +#'post-processing using member-by-member approaches: theoretical aspects. +#'Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818. +#'doi:10.1002/qj.2397 +#' +#'@seealso \code{\link{CST_Load}} +#' #'@examples #'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) #'dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) @@ -268,6 +308,10 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #'dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) #'a <- Calibration(exp = mod1, obs = obs1) #'str(a) +#' +#'@importFrom s2dv InsertDim Reorder +#'@import multiApply +#'@importFrom ClimProjDiags Subset #'@export Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", eval.method = "leave-one-out", @@ -499,11 +543,12 @@ Calibration <- function(exp, obs, exp_cor = NULL, return(dexes.lst) } -.cal <- function(exp, obs, exp_cor = NULL, dat_dim = NULL, cal.method = "mse_min", eval.method = "leave-one-out", - multi.model = FALSE, na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL) { +.cal <- function(exp, obs, exp_cor = NULL, dat_dim = NULL, cal.method = "mse_min", + eval.method = "leave-one-out", multi.model = FALSE, na.fill = TRUE, + na.rm = TRUE, apply_to = NULL, alpha = NULL) { - # exp: [memb_dim, sdate_dim, (dat)] - # obs: [sdate_dim (dat)] + # exp: [memb, sdate, (dat)] + # obs: [sdate (dat)] # exp_cor: [memb, sdate, (dat)] or NULL if (is.null(dat_dim)) { @@ -517,6 +562,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, } if (is.null(exp_cor)) { + # generate a copy of exp exp_cor <- exp cor_dat_dim <- TRUE } else { @@ -568,7 +614,8 @@ Calibration <- function(exp, obs, exp_cor = NULL, amt.points_cor = sdate_cor) amt.resamples <- length(eval.train.dexeses) for (i.sample in seq(1, amt.resamples)) { - + # defining training (tr) and evaluation (ev) subsets + # fc.ev is used to evaluate (not train; train should be done with exp (hindcast)) eval.dexes <- eval.train.dexeses[[i.sample]]$eval.dexes train.dexes <- eval.train.dexeses[[i.sample]]$train.dexes fc.ev <- expcor_data[ , eval.dexes, drop = FALSE] @@ -577,9 +624,14 @@ Calibration <- function(exp, obs, exp_cor = NULL, if (cal.method == "bias") { var.cor.fc[ , eval.dexes, i, j] <- fc.ev + mean(obs.tr, na.rm = na.rm) - mean(fc.tr, na.rm = na.rm) + # forecast correction implemented } else if (cal.method == "evmos") { + # forecast correction implemented + # ensemble and observational characteristics quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) + # calculate value for regression parameters init.par <- c(.calc.evmos.par(quant.obs.fc.tr, na.rm = na.rm)) + # correct evaluation subset var.cor.fc[ , eval.dexes, i, j] <- .correct.evmos.fc(fc.ev , init.par, na.rm = na.rm) } else if (cal.method == "mse_min") { quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) @@ -589,30 +641,38 @@ Calibration <- function(exp, obs, exp_cor = NULL, quant.obs.fc.tr <- .calc.obs.fc.quant.ext(obs = obs.tr, fc = fc.tr, na.rm = na.rm) init.par <- c(.calc.mse.min.par(quant.obs.fc.tr, na.rm = na.rm), 0.001) init.par[3] <- sqrt(init.par[3]) + # calculate regression parameters on training dataset optim.tmp <- optim(par = init.par, fn = .calc.crps.opt, gr = .calc.crps.grad.opt, - quant.obs.fc = quant.obs.fc.tr, na.rm = na.rm, method = "BFGS") + quant.obs.fc = quant.obs.fc.tr, na.rm = na.rm, method = "BFGS") mbm.par <- optim.tmp$par var.cor.fc[ , eval.dexes, i, j] <- .correct.crps.min.fc(fc.ev , mbm.par, na.rm = na.rm) } else if (cal.method == 'rpc-based') { - ens_mean.ev <- Apply(data = fc.ev, target_dims = names(memb), fun = mean, na.rm = na.rm)$output1 ## Ensemble mean - ens_mean.tr <- Apply(data = fc.tr, target_dims = names(memb), fun = mean, na.rm = na.rm)$output1 ## Ensemble mean - ens_spread.tr <- Apply(data = list(fc.tr, ens_mean.tr), target_dims = names(sdate), fun = "-")$output1 ## Ensemble spread - exp_mean.tr <- mean(fc.tr, na.rm = na.rm) ## Mean (climatology) - var_signal.tr <- var(ens_mean.tr, na.rm = na.rm) ## Ensemble mean variance - var_noise.tr <- var(as.vector(ens_spread.tr), na.rm = na.rm) ## Variance of ensemble members about ensemble mean (= spread) - var_obs.tr <- var(obs.tr, na.rm = na.rm) ## Variance in the observations + # Ensemble mean + ens_mean.ev <- Apply(data = fc.ev, target_dims = names(memb), fun = mean, na.rm = na.rm)$output1 + ens_mean.tr <- Apply(data = fc.tr, target_dims = names(memb), fun = mean, na.rm = na.rm)$output1 + # Ensemble spread + ens_spread.tr <- Apply(data = list(fc.tr, ens_mean.tr), target_dims = names(sdate), fun = "-")$output1 + # Mean (climatology) + exp_mean.tr <- mean(fc.tr, na.rm = na.rm) + # Ensemble mean variance + var_signal.tr <- var(ens_mean.tr, na.rm = na.rm) + # Variance of ensemble members about ensemble mean (= spread) + var_noise.tr <- var(as.vector(ens_spread.tr), na.rm = na.rm) + # Variance in the observations + var_obs.tr <- var(obs.tr, na.rm = na.rm) + # Correlation between observations and the ensemble mean r.tr <- cor(x = ens_mean.tr, y = obs.tr, method = 'pearson', - use = ifelse(test = isTRUE(na.rm), yes = "pairwise.complete.obs", no = "everything")) - ## Correlation between observations and the ensemble mean + use = ifelse(test = isTRUE(na.rm), yes = "pairwise.complete.obs", no = "everything")) if ((apply_to == 'all') || (apply_to == 'sign' && cor.test(ens_mean.tr, obs.tr, method = 'pearson', alternative = 'greater')$p.value < alpha)) { ens_mean_cal <- (ens_mean.ev - exp_mean.tr) * r.tr * sqrt(var_obs.tr) / sqrt(var_signal.tr) + exp_mean.tr - var.cor.fc[ , eval.dexes, i, j] <- s2dv::Reorder(data = Apply(data = list(exp = fc.ev, ens_mean = ens_mean.ev, - ens_mean_cal = ens_mean_cal), + var.cor.fc[ , eval.dexes, i, j] <- Reorder(data = Apply(data = list(exp = fc.ev, ens_mean = ens_mean.ev, + ens_mean_cal = ens_mean_cal), target_dims = names(sdate), fun = .CalibrationMembersRPC, var_obs = var_obs.tr, var_noise = var_noise.tr, r = r.tr)$output1, - order = names(expdims)[1:2]) + order = names(expdims)[1:2]) } else { + # no significant -> replacing with observed climatology var.cor.fc[ , eval.dexes, i, j] <- array(data = mean(obs.tr, na.rm = na.rm), dim = dim(fc.ev)) } } else { @@ -630,8 +690,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, return(var.cor.fc) } - -#function to calculate different quantities of a series of ensemble forecasts and corresponding observations +# Function to calculate different quantities of a series of ensemble forecasts and corresponding observations .calc.obs.fc.quant <- function(obs, fc, na.rm) { amt.mbr <- dim(fc)[1] obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr) @@ -652,7 +711,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, ) } -#extended function to calculate different quantities of a series of ensemble forecasts and corresponding observations +# Extended function to calculate different quantities of a series of ensemble forecasts and corresponding observations .calc.obs.fc.quant.ext <- function(obs, fc, na.rm){ amt.mbr <- dim(fc)[1] obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr) @@ -674,7 +733,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, ) } -#function to calculate different quantities of a series of ensemble forecasts +# Function to calculate different quantities of a series of ensemble forecasts .calc.fc.quant <- function(fc, na.rm) { amt.mbr <- dim(fc)[1] fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) @@ -703,7 +762,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, ) } -#extended function to calculate different quantities of a series of ensemble forecasts +# Extended function to calculate different quantities of a series of ensemble forecasts .calc.fc.quant.ext <- function(fc, na.rm){ amt.mbr <- dim(fc)[1] repmat1.tmp <- InsertDim(fc, posdim = 1, lendim = amt.mbr) @@ -717,7 +776,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, ) } -#Below are the core or elementary functions to calculate the regression parameters for the different methods +# Below are the core or elementary functions to calculate the regression parameters for the different methods .calc.mse.min.par <- function(quant.obs.fc, multi.model = F, na.rm) { par.out <- rep(NA, 3) if (multi.model) { @@ -738,7 +797,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, return(par.out) } -#Below are the core or elementary functions to calculate the functions necessary for the minimization of crps +# Below are the core or elementary functions to calculate the functions necessary for the minimization of crps .calc.crps.opt <- function(par, quant.obs.fc, na.rm){ return( with(quant.obs.fc, @@ -765,7 +824,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, return(c(deriv.par1, deriv.par2, deriv.par3, deriv.par4)) } -#Below are the core or elementary functions to correct the evaluation set based on the regression parameters +# Below are the core or elementary functions to correct the evaluation set based on the regression parameters .correct.evmos.fc <- function(fc, par, na.rm) { quant.fc.mp <- .calc.fc.quant(fc = fc, na.rm = na.rm) return(with(quant.fc.mp, par[1] + par[2] * fc)) diff --git a/man/CST_Calibration.Rd b/man/CST_Calibration.Rd index 6affe1d6..865d9f13 100644 --- a/man/CST_Calibration.Rd +++ b/man/CST_Calibration.Rd @@ -23,23 +23,24 @@ CST_Calibration( } \arguments{ \item{exp}{An object of class \code{s2dv_cube} as returned by \code{CST_Load} -function, containing the seasonal hindcast experiment data in the element -named \code{$data}. The hindcast is used to calibrate the forecast in case -the forecast is provided; if not, the same hindcast will be calibrated -instead.} +function with at least 'sdate' and 'member' dimensions, containing the +seasonal hindcast experiment data in the element named \code{data}. The +hindcast is used to calibrate the forecast in case the forecast is provided; +if not, the same hindcast will be calibrated instead.} \item{obs}{An object of class \code{s2dv_cube} as returned by \code{CST_Load} -function, containing the observed data in the element named \code{$data}.} +function with at least 'sdate' dimension, containing the observed data in +the element named \code{$data}.} \item{exp_cor}{An optional object of class \code{s2dv_cube} as returned by -\code{CST_Load} function, containing the seasonal forecast experiment data -in the element named \code{$data}. If the forecast is provided, it will be -calibrated using the hindcast and observations; if not, the hindcast will -be calibrated instead. The dimensions must be the same as 'exp' except -dataset dimension. If there is only one corrected dataset, it should not -have dataset dimension. If there is a corresponding corrected dataset for -each 'exp' forecast, the dataset dimension must have the same length as in -'exp'. The default value is NULL.} +\code{CST_Load} function with at least 'sdate' and 'member' dimensions, +containing the seasonal forecast experiment data in the element named +\code{data}. If the forecast is provided, it will be calibrated using the +hindcast and observations; if not, the hindcast will be calibrated instead. +If there is only one corrected dataset, it should not have dataset dimension. +If there is a corresponding corrected dataset for each 'exp' forecast, the +dataset dimension must have the same length as in 'exp'. The default value +is NULL.} \item{cal.method}{A character string indicating the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or @@ -74,7 +75,8 @@ correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}.} \item{alpha}{A numeric value indicating the significance level for the -correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}.} +correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to +== "sign"}.} \item{memb_dim}{A character string indicating the name of the member dimension. By default, it is set to 'member'.} @@ -90,13 +92,40 @@ The default value is NULL.} computations using multiApply function. The default value is one.} } \value{ -An object of class \code{s2dv_cube} containing the calibrated forecasts - in the element \code{$data} with the same dimensions as the one in the exp - object. +An object of class \code{s2dv_cube} containing the calibrated +forecasts in the element \code{data} with the same dimensions as in the 'exp' +object. If 'exp_cor' is provided the returned array will be with the same +dimensions as 'exp_cor'. } \description{ -Equivalent to function \code{Calibration} but for objects of -class \code{s2dv_cube}. +Five types of member-by-member bias correction can be performed. +The \code{"bias"} method corrects the bias only, the \code{"evmos"} method +applies a variance inflation technique to ensure the correction of the bias +and the correspondence of variance between forecast and observation (Van +Schaeybroeck and Vannitsem, 2011). The ensemble calibration methods +\code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast +variance and the ensemble spread as described in Doblas-Reyes et al. (2005) +and Van Schaeybroeck and Vannitsem (2015), respectively. While the +\code{"mse_min"} method minimizes a constrained mean-squared error using three +parameters, the \code{"crps_min"} method features four parameters and +minimizes the Continuous Ranked Probability Score (CRPS). The +\code{"rpc-based"} method adjusts the forecast variance ensuring that the +ratio of predictable components (RPC) is equal to one, as in Eade et al. +(2014). + +It is equivalent to function \code{Calibration} but for objects +of class \code{s2dv_cube}. +} +\details{ +Both the \code{na.fill} and \code{na.rm} parameters can be used to +indicate how the function has to handle the NA values. The \code{na.fill} +parameter checks whether there are more than three forecast-observations pairs +to perform the computation. In case there are three or less pairs, the +computation is not carried out, and the value returned by the function depends +on the value of this parameter (either NA if \code{na.fill == TRUE} or the +uncorrected value if \code{na.fill == TRUE}). On the other hand, \code{na.rm} +is used to indicate the function whether to remove the missing values during +the computation of the parameters needed to perform the calibration. } \examples{ # Example 1: @@ -130,6 +159,26 @@ attr(obs, 'class') <- 's2dv_cube' attr(exp_cor, 'class') <- 's2dv_cube' a <- CST_Calibration(exp = exp, obs = obs, exp_cor = exp_cor, cal.method = "evmos") str(a) + +} +\references{ +Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the +success of multi-model ensembles in seasonal forecasting-II calibration and +combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x + +Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N., +Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate +predictions underestimate the predictability of the read world? Geophysical +Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146 + +Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing +through linear regression. Nonlinear Processes in Geophysics, 18(2), +147. doi:10.5194/npg-18-147-2011 + +Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble +post-processing using member-by-member approaches: theoretical aspects. +Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818. +doi:10.1002/qj.2397 } \seealso{ \code{\link{CST_Load}} diff --git a/man/Calibration.Rd b/man/Calibration.Rd index d2b49327..6871329a 100644 --- a/man/Calibration.Rd +++ b/man/Calibration.Rd @@ -71,7 +71,8 @@ correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}.} \item{alpha}{A numeric value indicating the significance level for the -correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}.} +correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == +"sign"}.} \item{memb_dim}{A character string indicating the name of the member dimension. By default, it is set to 'member'.} @@ -88,8 +89,8 @@ computation using multiApply function. The default value is NULL (one core).} } \value{ An array containing the calibrated forecasts with the same dimensions -as the \code{exp} array. If \code{exp_cor} is provided the returned array will -be with the same dimensions as \code{exp_cor}. +as the 'exp' array. If 'exp_cor' is provided the returned array will +be with the same dimensions as 'exp_cor'. } \description{ Five types of member-by-member bias correction can be performed. @@ -128,6 +129,7 @@ obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7) dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) a <- Calibration(exp = mod1, obs = obs1) str(a) + } \references{ Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the diff --git a/tests/testthat/test-CST_Calibration.R b/tests/testthat/test-CST_Calibration.R index 803c8fbe..9d4465af 100644 --- a/tests/testthat/test-CST_Calibration.R +++ b/tests/testthat/test-CST_Calibration.R @@ -5,6 +5,8 @@ context("CSTools::CST_Calibration tests") # dat library(zeallot) c(exp, obs) %<-% lonlat_temp +exp$data <- ClimProjDiags::Subset(exp$data, c('lat', 'lon'), list(1:5, 1:5)) +obs$data <- ClimProjDiags::Subset(obs$data, c('lat', 'lon'), list(1:5, 1:5)) # dat1 exp1 <- list(data = array(1:20, dim = c(time = 20))) @@ -192,7 +194,7 @@ test_that("2. Output checks: dat1", { ) expect_equal( dim(cal$data), - c(dataset = 1, member =15, sdate = 6, ftime = 3, lat = 22, lon = 53) + c(dataset = 1, member =15, sdate = 6, ftime = 3, lat = 5, lon = 5) ) expect_equal( as.vector(cal$data)[1:5], @@ -239,7 +241,38 @@ test_that("3. Output checks: dat3", { }) ############################################## -test_that("4. Output checks: dat4", { + +test_that("4. Output checks: dat3", { + expect_equal( + as.vector(Calibration(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset', cal.method = 'bias'))[1:7], + c(-0.3984805, 0.4116167, -0.6076553, 1.8232541, 0.5574811, -0.5924950, 0.7154024), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp3, obs3, exp_cor3, memb_dim = 'member', dat_dim = 'dataset', cal.method = 'evmos'))[1:7], + c(-0.9631369, -0.2290479, 0.3755366, -1.1717133, 0.3064434, 0.1247777, 0.1854143), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset', cal.method = 'mse_min', multi.model = TRUE))[5:10], + c(0.5364620, -0.6412113, 0.6981868, 0.9551252, 0.7886670, -0.1137257), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp3, obs3, exp_cor3, memb_dim = 'member', dat_dim = 'dataset', cal.method = 'crps_min'))[5:10], + c(0.4975732, 0.3078346, 0.3717602, 1.5607014, -1.1259947, 1.7316233), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Calibration(exp3, obs3, exp_cor3, memb_dim = 'member', dat_dim = 'dataset', cal.method = 'rpc-based', alpha = 0.05, apply_to = 'all'))[5:10], + c(0.4178384, 0.2323757, 0.2942796, 1.4487467, -1.1659159, 1.6175277), + tolerance = 0.0001 + ) + +}) + +############################################## +test_that("5. Output checks: dat4", { expect_equal( dim(Calibration(exp4, obs4)), c(member = 10, sdate = 20) @@ -250,7 +283,7 @@ test_that("4. Output checks: dat4", { tolerance = 0.0001 ) expect_equal( - as.vector(Calibration(exp4, obs4, exp_cor4))[1:5], + as.vector(Calibration(exp4, obs4, exp_cor4, eval.method = "hindcast-vs-forecast"))[1:5], c(-0.88657225, -0.08128503, 0.58193721, -1.11537810, 0.50614269), tolerance = 0.0001 ) @@ -290,7 +323,7 @@ test_that("4. Output checks: dat4", { ) }) ############################################## -test_that("5. Output checks: dat4", { +test_that("6. Output checks: dat4", { expect_equal( as.vector(Calibration(exp4, obs4, cal.method = 'bias'))[1:7], c(-0.4039514, 0.4061457, -0.6131262, 1.8177832, 0.5520101, -0.5979660, 0.7099314), @@ -316,8 +349,8 @@ test_that("5. Output checks: dat4", { c(-0.8597528, 0.1036243, -1.1085060, 1.7823603, 0.2770880, -1.0904773, 0.4648899), tolerance = 0.0001 ) - expect_equal( - as.vector(Calibration(exp4, obs4, exp_cor4, cal.method = 'rpc-based', alpha = 0.5, apply_to = 'sign'))[1:7], + expect_equal( # not equal!! + as.vector(Calibration(exp4, obs4, exp_cor4, cal.method = 'rpc-based', alpha = 0.5, apply_to = 'sign', eval.method = "hindcast-vs-forecast"))[1:7], c(-1.0464936, -0.2412064, 0.4220158, -1.2752995, 0.3462213, 0.1469362, 0.2134538), tolerance = 0.0001 ) -- GitLab From d1c0e8f9d58c7d25cd297bb43119820d38495b38 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 29 Nov 2022 10:52:34 +0100 Subject: [PATCH 07/13] Improve documentation of the returned value --- R/CST_Calibration.R | 17 +++++++++++------ man/CST_Calibration.Rd | 8 +++++--- man/Calibration.Rd | 9 ++++++--- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index 34fb8809..21e2f9b8 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -76,9 +76,11 @@ #' computations using multiApply function. The default value is one. #' #'@return An object of class \code{s2dv_cube} containing the calibrated -#'forecasts in the element \code{data} with the same dimensions as in the 'exp' -#'object. If 'exp_cor' is provided the returned array will be with the same -#'dimensions as 'exp_cor'. +#'forecasts in the element \code{data} with the dimensions nexp, nobs and same +#'dimensions as in the 'exp' object. nexp is the number of experiment +#'(i.e., 'dat_dim' in exp), and nobs is the number of observation (i.e., +#''dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. If 'exp_cor' +#'is provided the returned array will be with the same dimensions as 'exp_cor'. #' #'@details Both the \code{na.fill} and \code{na.rm} parameters can be used to #'indicate how the function has to handle the NA values. The \code{na.fill} @@ -270,9 +272,12 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #'@param ncores An integer that indicates the number of cores for parallel #' computation using multiApply function. The default value is NULL (one core). #' -#'@return An array containing the calibrated forecasts with the same dimensions -#'as the 'exp' array. If 'exp_cor' is provided the returned array will -#'be with the same dimensions as 'exp_cor'. +#'@return An array containing the calibrated forecasts with the dimensions +#'nexp, nobs and same dimensions as in the 'exp' array. nexp is the number of +#'experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation +#'(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. +#'If 'exp_cor' is provided the returned array will be with the same dimensions as +#''exp_cor'. #' #'@details Both the \code{na.fill} and \code{na.rm} parameters can be used to #'indicate how the function has to handle the NA values. The \code{na.fill} diff --git a/man/CST_Calibration.Rd b/man/CST_Calibration.Rd index 865d9f13..d99e5a2a 100644 --- a/man/CST_Calibration.Rd +++ b/man/CST_Calibration.Rd @@ -93,9 +93,11 @@ computations using multiApply function. The default value is one.} } \value{ An object of class \code{s2dv_cube} containing the calibrated -forecasts in the element \code{data} with the same dimensions as in the 'exp' -object. If 'exp_cor' is provided the returned array will be with the same -dimensions as 'exp_cor'. +forecasts in the element \code{data} with the dimensions nexp, nobs and same +dimensions as in the 'exp' object. nexp is the number of experiment +(i.e., 'dat_dim' in exp), and nobs is the number of observation (i.e., +'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. If 'exp_cor' +is provided the returned array will be with the same dimensions as 'exp_cor'. } \description{ Five types of member-by-member bias correction can be performed. diff --git a/man/Calibration.Rd b/man/Calibration.Rd index 6871329a..cd66ad29 100644 --- a/man/Calibration.Rd +++ b/man/Calibration.Rd @@ -88,9 +88,12 @@ The default value is NULL.} computation using multiApply function. The default value is NULL (one core).} } \value{ -An array containing the calibrated forecasts with the same dimensions -as the 'exp' array. If 'exp_cor' is provided the returned array will -be with the same dimensions as 'exp_cor'. +An array containing the calibrated forecasts with the dimensions +nexp, nobs and same dimensions as in the 'exp' array. nexp is the number of +experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation +(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. +If 'exp_cor' is provided the returned array will be with the same dimensions as +'exp_cor'. } \description{ Five types of member-by-member bias correction can be performed. -- GitLab From d87370b1b228d6b847ee01aacef3ee32e8ccd647 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 29 Nov 2022 12:10:58 +0100 Subject: [PATCH 08/13] Correct output dimensions order of CST_BiasCorrection and CST_Calibration --- R/CST_BiasCorrection.R | 6 +++--- R/CST_Calibration.R | 11 ++++++++--- tests/testthat/test-CST_BiasCorrection.R | 6 +++--- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/R/CST_BiasCorrection.R b/R/CST_BiasCorrection.R index 408bbb41..fff68d6b 100644 --- a/R/CST_BiasCorrection.R +++ b/R/CST_BiasCorrection.R @@ -73,9 +73,6 @@ CST_BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, BiasCorrected <- BiasCorrection(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, memb_dim = memb_dim, sdate_dim = sdate_dim, dat_dim = dat_dim, na.rm = na.rm, ncores = ncores) - - pos <- match(dimnames, names(dim(BiasCorrected))) - BiasCorrected <- aperm(BiasCorrected, pos) if (is.null(exp_cor)) { exp$data <- BiasCorrected @@ -278,6 +275,9 @@ BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, pos <- match(c(names(dim(exp))[-which(names(dim(exp)) == dat_dim)], 'nexp', 'nobs'), names(dim(BiasCorrected))) BiasCorrected <- aperm(BiasCorrected, pos) + } else { + pos <- match(c(names(dim(exp))), names(dim(BiasCorrected))) + BiasCorrected <- aperm(BiasCorrected, pos) } return(BiasCorrected) diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index 21e2f9b8..4b841de4 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -180,9 +180,6 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", na.fill = na.fill, na.rm = na.rm, apply_to = apply_to, alpha = alpha, memb_dim = memb_dim, sdate_dim = sdate_dim, dat_dim = dat_dim, ncores = ncores) - pos <- match(dimnames, names(dim(Calibration))) - Calibration <- aperm(Calibration, pos) - if (is.null(exp_cor)) { exp$data <- Calibration exp$Datasets <- c(exp$Datasets, obs$Datasets) @@ -520,6 +517,14 @@ Calibration <- function(exp, obs, exp_cor = NULL, pos <- match(c(names(dim(exp))[-which(names(dim(exp)) == dat_dim)], 'nexp', 'nobs'), names(dim(calibrated))) calibrated <- aperm(calibrated, pos) + } else { + if (is.null(exp_cor)) { + pos <- match(c(names(dim(exp))), names(dim(calibrated))) + calibrated <- aperm(calibrated, pos) + } else { + pos <- match(c(names(dim(exp_cor))), names(dim(calibrated))) + calibrated <- aperm(calibrated, pos) + } } return(calibrated) diff --git a/tests/testthat/test-CST_BiasCorrection.R b/tests/testthat/test-CST_BiasCorrection.R index 62740f0d..85e1b4a5 100644 --- a/tests/testthat/test-CST_BiasCorrection.R +++ b/tests/testthat/test-CST_BiasCorrection.R @@ -216,15 +216,15 @@ test_that("2. Output checks: dat1", { ) expect_equal( round(BiasCorrection(exp = exp3, obs = obs3, exp_cor = exp3), 2), - array(c(2.66, 4.27, 3.2, 4.8, 3.73, 5.34), c(member = 2, sdate = 3)) + array(c(2.66, 3.2, 3.73, 4.27, 4.8, 5.34), c(sdate = 3, member = 2)) ) expect_equal( round(BiasCorrection(exp = exp3, obs = obs3_2, exp_cor = exp3), 2), - array(c(2.66, 4.27, 3.2, 4.8, 3.73, 5.34), c(member = 2, sdate = 3)) + array(c(2.66, 3.2, 3.73, 4.27, 4.8, 5.34), c(sdate = 3, member = 2)) ) expect_equal( dim(BiasCorrection(exp = exp4, obs = obs4, sdate_dim = 'time', memb_dim = 'members')), - c(members = 5, time = 5, lat = 2, lon = 5) + c(time = 5, members = 5, lat = 2, lon = 5) ) suppressWarnings( expect_equal( -- GitLab From 02fe40dd27072d735bfb281306a0217ec2aa46a5 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 1 Dec 2022 17:58:06 +0100 Subject: [PATCH 09/13] Correct restriction when exp_cor is provided in CST_BiasCorrection --- R/CST_BiasCorrection.R | 25 +++++++++++++----------- man/BiasCorrection.Rd | 3 +-- man/CST_BiasCorrection.Rd | 3 +-- tests/testthat/test-CST_BiasCorrection.R | 7 +++---- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/R/CST_BiasCorrection.R b/R/CST_BiasCorrection.R index fff68d6b..41019ff8 100644 --- a/R/CST_BiasCorrection.R +++ b/R/CST_BiasCorrection.R @@ -14,8 +14,7 @@ #'@param exp_cor An object of class \code{s2dv_cube} as returned by #' \code{CST_Load} function, containing the seasonal forecast experiment to be #' corrected with at least time dimension. If it is NULL, the 'exp' forecast -#' will be corrected. The dimensions must be the same as 'exp' except -#' dataset dimension. If there is only one corrected dataset, it should not +#' will be corrected. If there is only one corrected dataset, it should not #' have dataset dimension. If there is a corresponding corrected dataset for #' each 'exp' forecast, the dataset dimension must have the same length as in #' 'exp'. The default value is NULL. @@ -103,8 +102,7 @@ CST_BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, #' observed data with at least time dimension. #'@param exp_cor A multidimensional array with named dimensions containing the #' seasonal forecast experiment to be corrected with at least time and member -#' dimension. If it is NULL, the 'exp' forecast will be corrected. The -#' dimensions must be the same as 'exp' except dataset dimension. If there is +#' dimension. If it is NULL, the 'exp' forecast will be corrected. If there is #' only one corrected dataset, it should not have dataset dimension. If there #' is a corresponding corrected dataset for each 'exp' forecast, the dataset #' dimension must have the same length as in 'exp'. The default value is NULL. @@ -227,15 +225,15 @@ BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, } else { target_dims_cor <- c(memb_dim, sdate_dim) } - name_exp <- name_exp[-which(name_exp == dat_dim)] } else { target_dims_cor <- c(memb_dim, sdate_dim) } + name_exp <- name_exp[-which(name_exp %in% c(memb_dim, sdate_dim, dat_dim))] + name_exp_cor <- name_exp_cor[-which(name_exp_cor %in% target_dims_cor)] if (!identical(length(name_exp), length(name_exp_cor)) | !identical(dim(exp)[name_exp], dim(exp_cor)[name_exp_cor])) { stop(paste0("Parameter 'exp' and 'exp_cor' must have the same length of ", - "all dimensions except 'dat_dim' if there is ", - "only one reference dataset.")) + "all common dimensions except 'dat_dim', 'sdate_dim' and 'memb_dim'.")) } } ## na.rm @@ -308,11 +306,12 @@ BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, } else { cor_dat_dim <- TRUE } + corrected <- array(dim = c(dim(var_cor)[1:2], nexp = nexp, nobs = nobs)) + } else { + ntime <- dim(var_exp)[2] + corrected <- array(dim = c(dim(var_exp)[1:2], nexp = nexp, nobs = nobs)) } - ntime <- dim(var_exp)[2] - corrected <- array(dim = c(dim(var_exp)[1:2], nexp = nexp, nobs = nobs)) - for (i in 1:nexp) { for (j in 1:nobs) { if (is.null(var_cor)) { @@ -344,7 +343,11 @@ BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, } if (is.null(dat_dim)) { - dim(corrected) <- dim(var_exp)[1:2] + if (!is.null(var_cor)) { + dim(corrected) <- dim(var_cor)[1:2] + } else { + dim(corrected) <- dim(var_exp)[1:2] + } } return(corrected) diff --git a/man/BiasCorrection.Rd b/man/BiasCorrection.Rd index 04be3e25..3cf20530 100644 --- a/man/BiasCorrection.Rd +++ b/man/BiasCorrection.Rd @@ -24,8 +24,7 @@ observed data with at least time dimension.} \item{exp_cor}{A multidimensional array with named dimensions containing the seasonal forecast experiment to be corrected with at least time and member -dimension. If it is NULL, the 'exp' forecast will be corrected. The -dimensions must be the same as 'exp' except dataset dimension. If there is +dimension. If it is NULL, the 'exp' forecast will be corrected. If there is only one corrected dataset, it should not have dataset dimension. If there is a corresponding corrected dataset for each 'exp' forecast, the dataset dimension must have the same length as in 'exp'. The default value is NULL.} diff --git a/man/CST_BiasCorrection.Rd b/man/CST_BiasCorrection.Rd index ce7df579..ca72d949 100644 --- a/man/CST_BiasCorrection.Rd +++ b/man/CST_BiasCorrection.Rd @@ -27,8 +27,7 @@ with at least time dimension.} \item{exp_cor}{An object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal forecast experiment to be corrected with at least time dimension. If it is NULL, the 'exp' forecast -will be corrected. The dimensions must be the same as 'exp' except -dataset dimension. If there is only one corrected dataset, it should not +will be corrected. If there is only one corrected dataset, it should not have dataset dimension. If there is a corresponding corrected dataset for each 'exp' forecast, the dataset dimension must have the same length as in 'exp'. The default value is NULL.} diff --git a/tests/testthat/test-CST_BiasCorrection.R b/tests/testthat/test-CST_BiasCorrection.R index 85e1b4a5..5310f307 100644 --- a/tests/testthat/test-CST_BiasCorrection.R +++ b/tests/testthat/test-CST_BiasCorrection.R @@ -172,11 +172,10 @@ test_that("1. Input checks", { expect_error( BiasCorrection(exp = array(1:6, c(sdate = 3, member = 2, dataset = 2, lon = 1)), obs = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 1)), - exp_cor = array(1:6, c(sdate = 3, member = 1, lon = 1)), + exp_cor = array(1:6, c(sdate = 3, member = 1, lon = 3)), dat_dim = 'dataset'), - paste0("Parameter 'exp' and 'exp_cor' must have the same length of ", - "all dimensions except 'dat_dim' if there is ", - "only one reference dataset.") + paste0("Parameter 'exp' and 'exp_cor' must have the same length of all common dimensions", + " except 'dat_dim', 'sdate_dim' and 'memb_dim'.") ) ## na.rm expect_warning( -- GitLab From 3c34353a6a68def514d4b266df8d77c46ffd82d6 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 2 Dec 2022 17:31:18 +0100 Subject: [PATCH 10/13] Correct code for different sdate in exp_cor --- R/CST_BiasCorrection.R | 15 +++++ R/CST_Calibration.R | 123 +++++++++++++++++++++++++++++++++++------ 2 files changed, 122 insertions(+), 16 deletions(-) diff --git a/R/CST_BiasCorrection.R b/R/CST_BiasCorrection.R index 41019ff8..1a8cf8b2 100644 --- a/R/CST_BiasCorrection.R +++ b/R/CST_BiasCorrection.R @@ -187,6 +187,17 @@ BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, } else { obs <- InsertDim(obs, posdim = 1, lendim = 1, name = memb_dim) } + if (!is.null(exp_cor)) { + if (!memb_dim %in% names(dim(exp_cor))) { + exp_cor <- InsertDim(exp_cor, posdim = 1, lendim = 1, name = memb_dim) + exp_cor_remove_memb <- TRUE + } else { + exp_cor_remove_memb <- FALSE + } + } else { + exp_cor_remove_memb <- FALSE + } + ## dat_dim if (!is.null(dat_dim)) { if (!is.character(dat_dim) | length(dat_dim) > 1) { @@ -278,6 +289,10 @@ BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, BiasCorrected <- aperm(BiasCorrected, pos) } + if (exp_cor_remove_memb) { + dim(BiasCorrected) <- dim(BiasCorrected)[-which(names(dim(BiasCorrected)) == memb_dim)] + } + return(BiasCorrected) } diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index 4b841de4..f97d105e 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -400,6 +400,16 @@ Calibration <- function(exp, obs, exp_cor = NULL, } obs <- Subset(obs, along = memb_dim, indices = 1, drop = "selected") } + if (!is.null(exp_cor)) { + if (!memb_dim %in% names(dim(exp_cor))) { + exp_cor <- InsertDim(exp_cor, posdim = 1, lendim = 1, name = memb_dim) + exp_cor_remove_memb <- TRUE + } else { + exp_cor_remove_memb <- FALSE + } + } else { + exp_cor_remove_memb <- FALSE + } ## exp, obs, and exp_cor (2) name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) @@ -515,18 +525,18 @@ Calibration <- function(exp, obs, exp_cor = NULL, } if (!is.null(dat_dim)) { pos <- match(c(names(dim(exp))[-which(names(dim(exp)) == dat_dim)], 'nexp', 'nobs'), - names(dim(calibrated))) + names(dim(calibrated))) calibrated <- aperm(calibrated, pos) } else { - if (is.null(exp_cor)) { - pos <- match(c(names(dim(exp))), names(dim(calibrated))) - calibrated <- aperm(calibrated, pos) - } else { - pos <- match(c(names(dim(exp_cor))), names(dim(calibrated))) - calibrated <- aperm(calibrated, pos) - } + pos <- match(c(names(dim(exp))), names(dim(calibrated))) + calibrated <- aperm(calibrated, pos) } + if (exp_cor_remove_memb) { + dim(calibrated) <- dim(calibrated)[-which(names(dim(calibrated)) == memb_dim)] + } + + return(calibrated) } @@ -583,29 +593,110 @@ Calibration <- function(exp, obs, exp_cor = NULL, } } + expdims <- dim(exp) + expdims_cor <- dim(exp_cor) + memb <- expdims[1] # memb + sdate <- expdims[2] # sdate + sdate_cor <- expdims_cor[2] + return_data_na <- FALSE + var.cor.fc <- array(dim = c(dim(exp_cor)[1:2], nexp = nexp, nobs = nobs)) for (i in 1:nexp) { for (j in 1:nobs) { - if (!.data.set.sufficiently.large(exp = exp[, , i], obs = obs[, j])) { + if (!.data.set.sufficiently.large(exp = exp[, , i, drop = FALSE], obs = obs[, j, drop = FALSE])) { return_data_na <- TRUE if (cor_dat_dim) { - var.cor.fc <- NA * exp_cor[, , i] + var.cor.fc[, , i, j] <- NA * exp_cor[, , i] } else { - var.cor.fc <- NA * exp_cor[, i] + var.cor.fc[, , i, j] <- NA * exp_cor[, i] } if (!na.fill) { var.cor.fc[, , i] <- exp[, , i] } } + obs_data <- as.vector(obs[, j]) + exp_data <- exp[, , i] + dim(exp_data) <- dim(exp)[1:2] + if (cor_dat_dim) { + expcor_data <- exp_cor[, , i] + dim(expcor_data) = dim(exp_cor)[1:2] + } else { + expcor_data <- exp_cor + } + eval.train.dexeses <- .make.eval.train.dexes(eval.method = eval.method, amt.points = sdate, + amt.points_cor = sdate_cor) + amt.resamples <- length(eval.train.dexeses) + for (i.sample in seq(1, amt.resamples)) { + # defining training (tr) and evaluation (ev) subsets + # fc.ev is used to evaluate (not train; train should be done with exp (hindcast)) + eval.dexes <- eval.train.dexeses[[i.sample]]$eval.dexes + train.dexes <- eval.train.dexeses[[i.sample]]$train.dexes + fc.ev <- expcor_data[ , eval.dexes, drop = FALSE] + fc.tr <- exp_data[ , train.dexes] + obs.tr <- obs_data[train.dexes , drop = FALSE] + + if (cal.method == "bias") { + var.cor.fc[ , eval.dexes, i, j] <- fc.ev + mean(obs.tr, na.rm = na.rm) - mean(fc.tr, na.rm = na.rm) + # forecast correction implemented + } else if (cal.method == "evmos") { + # forecast correction implemented + # ensemble and observational characteristics + quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) + # calculate value for regression parameters + init.par <- c(.calc.evmos.par(quant.obs.fc.tr, na.rm = na.rm)) + # correct evaluation subset + var.cor.fc[ , eval.dexes, i, j] <- .correct.evmos.fc(fc.ev , init.par, na.rm = na.rm) + } else if (cal.method == "mse_min") { + quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) + init.par <- .calc.mse.min.par(quant.obs.fc.tr, multi.model, na.rm = na.rm) + var.cor.fc[ , eval.dexes, i, j] <- .correct.mse.min.fc(fc.ev , init.par, na.rm = na.rm) + } else if (cal.method == "crps_min") { + quant.obs.fc.tr <- .calc.obs.fc.quant.ext(obs = obs.tr, fc = fc.tr, na.rm = na.rm) + init.par <- c(.calc.mse.min.par(quant.obs.fc.tr, na.rm = na.rm), 0.001) + init.par[3] <- sqrt(init.par[3]) + # calculate regression parameters on training dataset + optim.tmp <- optim(par = init.par, fn = .calc.crps.opt, gr = .calc.crps.grad.opt, + quant.obs.fc = quant.obs.fc.tr, na.rm = na.rm, method = "BFGS") + mbm.par <- optim.tmp$par + var.cor.fc[ , eval.dexes, i, j] <- .correct.crps.min.fc(fc.ev , mbm.par, na.rm = na.rm) + } else if (cal.method == 'rpc-based') { + # Ensemble mean + ens_mean.ev <- Apply(data = fc.ev, target_dims = names(memb), fun = mean, na.rm = na.rm)$output1 + ens_mean.tr <- Apply(data = fc.tr, target_dims = names(memb), fun = mean, na.rm = na.rm)$output1 + # Ensemble spread + ens_spread.tr <- Apply(data = list(fc.tr, ens_mean.tr), target_dims = names(sdate), fun = "-")$output1 + # Mean (climatology) + exp_mean.tr <- mean(fc.tr, na.rm = na.rm) + # Ensemble mean variance + var_signal.tr <- var(ens_mean.tr, na.rm = na.rm) + # Variance of ensemble members about ensemble mean (= spread) + var_noise.tr <- var(as.vector(ens_spread.tr), na.rm = na.rm) + # Variance in the observations + var_obs.tr <- var(obs.tr, na.rm = na.rm) + # Correlation between observations and the ensemble mean + r.tr <- cor(x = ens_mean.tr, y = obs.tr, method = 'pearson', + use = ifelse(test = isTRUE(na.rm), yes = "pairwise.complete.obs", no = "everything")) + if ((apply_to == 'all') || (apply_to == 'sign' && + cor.test(ens_mean.tr, obs.tr, method = 'pearson', alternative = 'greater')$p.value < alpha)) { + ens_mean_cal <- (ens_mean.ev - exp_mean.tr) * r.tr * sqrt(var_obs.tr) / sqrt(var_signal.tr) + exp_mean.tr + var.cor.fc[ , eval.dexes, i, j] <- Reorder(data = Apply(data = list(exp = fc.ev, ens_mean = ens_mean.ev, + ens_mean_cal = ens_mean_cal), + target_dims = names(sdate), fun = .CalibrationMembersRPC, + var_obs = var_obs.tr, var_noise = var_noise.tr, r = r.tr)$output1, + order = names(expdims)[1:2]) + } else { + # no significant -> replacing with observed climatology + var.cor.fc[ , eval.dexes, i, j] <- array(data = mean(obs.tr, na.rm = na.rm), dim = dim(fc.ev)) + } + } else { + stop("unknown calibration method: ", cal.method) + } + } } } - expdims <- dim(exp) - expdims_cor <- dim(exp_cor) - memb <- expdims[1] # memb - sdate <- expdims[2] # sdate - sdate_cor <- expdims_cor[2] + if (!return_data_na) { var.cor.fc <- array(dim = c(dim(exp_cor)[1:2], nexp = nexp, nobs = nobs)) -- GitLab From d8e1d4fe7ce0ff6444fe3ea78c2d52cf62928718 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 10 Jan 2023 18:06:59 +0100 Subject: [PATCH 11/13] Correct documentation and added a test for exp_cor with dat_dim --- R/CST_BiasCorrection.R | 11 ++++++----- R/CST_Calibration.R | 7 +------ man/CST_BiasCorrection.Rd | 6 +++++- man/CST_Calibration.Rd | 4 +--- tests/testthat/test-CST_BiasCorrection.R | 13 ++++++++++--- tests/testthat/test-CST_Calibration.R | 15 ++++++++++++--- 6 files changed, 35 insertions(+), 21 deletions(-) diff --git a/R/CST_BiasCorrection.R b/R/CST_BiasCorrection.R index 1a8cf8b2..8603646a 100644 --- a/R/CST_BiasCorrection.R +++ b/R/CST_BiasCorrection.R @@ -30,8 +30,12 @@ #'@param ncores An integer that indicates the number of cores for parallel #' computations using multiApply function. The default value is NULL. #'@return An object of class \code{s2dv_cube} containing the bias corrected -#'forecasts with the same dimensions of the experimental data. -#' +#'forecasts with the dimensions nexp, nobs and same dimensions as in the 'exp' +#'object. nexp is the number of experiment (i.e., 'dat_dim' in exp), and nobs is +#'the number of observation (i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp +#'and nobs are omitted. If 'exp_cor' is provided the returned array will be with +#'the same dimensions as 'exp_cor'. +#' #'@references Torralba, V., F.J. Doblas-Reyes, D. MacLeod, I. Christel and M. #'Davis (2017). Seasonal climate prediction: a new source of information for #'the management of wind energy resources. Journal of Applied Meteorology and @@ -64,9 +68,6 @@ CST_BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, stop("Parameter 'exp_cor' must be of the class 's2dv_cube', ", "as output by CSTools::CST_Load.") } - dimnames <- names(dim(exp_cor$data)) - } else { - dimnames <- names(dim(exp$data)) } BiasCorrected <- BiasCorrection(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index f97d105e..ef064b70 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -15,8 +15,7 @@ #'minimizes the Continuous Ranked Probability Score (CRPS). The #'\code{"rpc-based"} method adjusts the forecast variance ensuring that the #'ratio of predictable components (RPC) is equal to one, as in Eade et al. -#'(2014). -#'@description It is equivalent to function \code{Calibration} but for objects +#'(2014). It is equivalent to function \code{Calibration} but for objects #'of class \code{s2dv_cube}. #' #'@param exp An object of class \code{s2dv_cube} as returned by \code{CST_Load} @@ -160,15 +159,11 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", stop("Parameter 'exp', 'obs' and 'exp_cor' must be of the class 's2dv_cube', ", "as output by CSTools::CST_Load.") } - dimnames <- names(dim(exp_cor$data)) # if exp_cor is provided, it will be calibrated: "calibrate forecast instead of hindcast" # if exp_cor is provided, eval.method is overruled (because if exp_cor is provided, the # train data will be all data of "exp" and the evalutaion data will be all data of "exp_cor"; # no need for "leave-one-out" or "in-sample") eval.method = "hindcast-vs-forecast" - - } else { - dimnames <- names(dim(exp$data)) } if (!missing(multi.model) & !(cal.method == "mse_min")) { diff --git a/man/CST_BiasCorrection.Rd b/man/CST_BiasCorrection.Rd index ca72d949..48c593d0 100644 --- a/man/CST_BiasCorrection.Rd +++ b/man/CST_BiasCorrection.Rd @@ -50,7 +50,11 @@ computations using multiApply function. The default value is NULL.} } \value{ An object of class \code{s2dv_cube} containing the bias corrected -forecasts with the same dimensions of the experimental data. +forecasts with the dimensions nexp, nobs and same dimensions as in the 'exp' +object. nexp is the number of experiment (i.e., 'dat_dim' in exp), and nobs is +the number of observation (i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp +and nobs are omitted. If 'exp_cor' is provided the returned array will be with +the same dimensions as 'exp_cor'. } \description{ This function applies the simple bias adjustment technique diff --git a/man/CST_Calibration.Rd b/man/CST_Calibration.Rd index d99e5a2a..40825a22 100644 --- a/man/CST_Calibration.Rd +++ b/man/CST_Calibration.Rd @@ -113,9 +113,7 @@ parameters, the \code{"crps_min"} method features four parameters and minimizes the Continuous Ranked Probability Score (CRPS). The \code{"rpc-based"} method adjusts the forecast variance ensuring that the ratio of predictable components (RPC) is equal to one, as in Eade et al. -(2014). - -It is equivalent to function \code{Calibration} but for objects +(2014). It is equivalent to function \code{Calibration} but for objects of class \code{s2dv_cube}. } \details{ diff --git a/tests/testthat/test-CST_BiasCorrection.R b/tests/testthat/test-CST_BiasCorrection.R index 5310f307..0a590ce4 100644 --- a/tests/testthat/test-CST_BiasCorrection.R +++ b/tests/testthat/test-CST_BiasCorrection.R @@ -48,10 +48,8 @@ obs4_1[1,1,1,1] <- NA # dat5 set.seed(1) exp5 <- array(rnorm(80), dim = c(member = 2, sdate = 10, lat = 2, dataset = 2)) -exp5_1 <- array(rnorm(40), dim = c(member = 2, sdate = 10, lat = 2)) set.seed(2) obs5 <- array(rnorm(60), dim = c(sdate = 10, lat = 2, dataset = 3)) -obs5_1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2)) set.seed(3) exp_cor5 <- array(rnorm(20), dim = c(member = 2, sdate = 10, lat = 2)) @@ -69,7 +67,8 @@ obs6_2[c(1, 3)] <- NA set.seed(3) exp_cor6 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) - +# dat7 +exp_cor7 <- array(rnorm(400), dim = c(member = 10, sdate = 10, lat = 2, dataset = 2)) ############################################## @@ -326,3 +325,11 @@ test_that("4. Output checks: dat6", { tolerance = 0.0001 ) }) + +############################################## +test_that("6. Output checks: dat4", { + expect_equal( + dim(BiasCorrection(exp5, obs5, exp_cor7, dat_dim = 'dataset')), + c(member = 10, sdate = 10, lat = 2, nexp = 2, nobs = 3) + ) +}) diff --git a/tests/testthat/test-CST_Calibration.R b/tests/testthat/test-CST_Calibration.R index 9d4465af..78cb8b94 100644 --- a/tests/testthat/test-CST_Calibration.R +++ b/tests/testthat/test-CST_Calibration.R @@ -29,10 +29,8 @@ obs2$data[1, 1, 2, 1, 1, 1] <- NA # dat3 set.seed(1) exp3 <- array(rnorm(400), dim = c(member = 10, sdate = 10, lat = 2, dataset = 2)) -exp3_1 <- array(rnorm(200), dim = c(member = 10, sdate = 10, lat = 2)) set.seed(2) obs3 <- array(rnorm(60), dim = c(sdate = 10, lat = 2, dataset = 3)) -obs3_1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2)) set.seed(3) exp_cor3 <- array(rnorm(200), dim = c(member = 10, sdate = 10, lat = 2)) @@ -50,6 +48,9 @@ obs4_2[c(1, 3)] <- NA set.seed(3) exp_cor4 <- array(rnorm(200), dim = c(member = 10, sdate = 20)) +# dat5 +exp_cor5 <- array(rnorm(400), dim = c(member = 10, sdate = 10, lat = 2, dataset = 2)) + ############################################## test_that("1. Input checks", { @@ -349,7 +350,7 @@ test_that("6. Output checks: dat4", { c(-0.8597528, 0.1036243, -1.1085060, 1.7823603, 0.2770880, -1.0904773, 0.4648899), tolerance = 0.0001 ) - expect_equal( # not equal!! + expect_equal( as.vector(Calibration(exp4, obs4, exp_cor4, cal.method = 'rpc-based', alpha = 0.5, apply_to = 'sign', eval.method = "hindcast-vs-forecast"))[1:7], c(-1.0464936, -0.2412064, 0.4220158, -1.2752995, 0.3462213, 0.1469362, 0.2134538), tolerance = 0.0001 @@ -360,3 +361,11 @@ test_that("6. Output checks: dat4", { tolerance = 0.0001 ) }) + +############################################## +test_that("6. Output checks: dat4", { + expect_equal( + dim(Calibration(exp3, obs3, exp_cor5, dat_dim = 'dataset')), + c(member = 10, sdate = 10, lat = 2, nexp = 2, nobs = 3) + ) +}) -- GitLab From a6aaa5b42a9239e6e410efbde7f9d81f401b8f1b Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 10 Jan 2023 18:17:55 +0100 Subject: [PATCH 12/13] Correct documentation --- R/CST_BiasCorrection.R | 8 ++++++-- R/CST_Calibration.R | 6 +----- man/BiasCorrection.Rd | 8 ++++++-- man/CST_Calibration.Rd | 2 -- man/Calibration.Rd | 5 +---- 5 files changed, 14 insertions(+), 15 deletions(-) diff --git a/R/CST_BiasCorrection.R b/R/CST_BiasCorrection.R index 8603646a..6039a463 100644 --- a/R/CST_BiasCorrection.R +++ b/R/CST_BiasCorrection.R @@ -119,8 +119,12 @@ CST_BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, #'@param ncores An integer that indicates the number of cores for parallel #' computations using multiApply function. The default value is NULL. #' -#'@return An array containing the bias corrected forecasts with the same -#'dimensions of the experimental data. +#'@return An array containing the bias corrected forecasts with the dimensions +#'nexp, nobs and same dimensions as in the 'exp' object. nexp is the number of +#'experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation +#'(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. If +#''exp_cor' is provided the returned array will be with the same dimensions as +#''exp_cor'. #' #'@references Torralba, V., F.J. Doblas-Reyes, D. MacLeod, I. Christel and M. #'Davis (2017). Seasonal climate prediction: a new source of information for the diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index ef064b70..a13b650d 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -121,7 +121,6 @@ #'attr(exp, 'class') <- 's2dv_cube' #'attr(obs, 'class') <- 's2dv_cube' #'a <- CST_Calibration(exp = exp, obs = obs, cal.method = "mse_min", eval.method = "in-sample") -#'str(a) #' #'# Example 2: #'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) @@ -139,7 +138,6 @@ #'attr(obs, 'class') <- 's2dv_cube' #'attr(exp_cor, 'class') <- 's2dv_cube' #'a <- CST_Calibration(exp = exp, obs = obs, exp_cor = exp_cor, cal.method = "evmos") -#'str(a) #' #'@importFrom s2dv InsertDim Reorder #'@import multiApply @@ -208,8 +206,7 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #'minimizes the Continuous Ranked Probability Score (CRPS). The #'\code{"rpc-based"} method adjusts the forecast variance ensuring that the #'ratio of predictable components (RPC) is equal to one, as in Eade et al. -#'(2014). -#'@description Both in-sample or our out-of-sample (leave-one-out cross +#'(2014). Both in-sample or our out-of-sample (leave-one-out cross #'validation) calibration are possible. #' #'@param exp A multidimensional array with named dimensions (at least 'sdate' @@ -304,7 +301,6 @@ CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", #'obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7) #'dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) #'a <- Calibration(exp = mod1, obs = obs1) -#'str(a) #' #'@importFrom s2dv InsertDim Reorder #'@import multiApply diff --git a/man/BiasCorrection.Rd b/man/BiasCorrection.Rd index 3cf20530..233639af 100644 --- a/man/BiasCorrection.Rd +++ b/man/BiasCorrection.Rd @@ -46,8 +46,12 @@ The default value is NULL.} computations using multiApply function. The default value is NULL.} } \value{ -An array containing the bias corrected forecasts with the same -dimensions of the experimental data. +An array containing the bias corrected forecasts with the dimensions +nexp, nobs and same dimensions as in the 'exp' object. nexp is the number of +experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation +(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. If +'exp_cor' is provided the returned array will be with the same dimensions as +'exp_cor'. } \description{ This function applies the simple bias adjustment technique diff --git a/man/CST_Calibration.Rd b/man/CST_Calibration.Rd index 40825a22..2e8861e7 100644 --- a/man/CST_Calibration.Rd +++ b/man/CST_Calibration.Rd @@ -140,7 +140,6 @@ obs <- list(data = obs1, lat = lat, lon = lon) attr(exp, 'class') <- 's2dv_cube' attr(obs, 'class') <- 's2dv_cube' a <- CST_Calibration(exp = exp, obs = obs, cal.method = "mse_min", eval.method = "in-sample") -str(a) # Example 2: mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) @@ -158,7 +157,6 @@ attr(exp, 'class') <- 's2dv_cube' attr(obs, 'class') <- 's2dv_cube' attr(exp_cor, 'class') <- 's2dv_cube' a <- CST_Calibration(exp = exp, obs = obs, exp_cor = exp_cor, cal.method = "evmos") -str(a) } \references{ diff --git a/man/Calibration.Rd b/man/Calibration.Rd index cd66ad29..2248834b 100644 --- a/man/Calibration.Rd +++ b/man/Calibration.Rd @@ -109,9 +109,7 @@ parameters, the \code{"crps_min"} method features four parameters and minimizes the Continuous Ranked Probability Score (CRPS). The \code{"rpc-based"} method adjusts the forecast variance ensuring that the ratio of predictable components (RPC) is equal to one, as in Eade et al. -(2014). - -Both in-sample or our out-of-sample (leave-one-out cross +(2014). Both in-sample or our out-of-sample (leave-one-out cross validation) calibration are possible. } \details{ @@ -131,7 +129,6 @@ dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7) dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) a <- Calibration(exp = mod1, obs = obs1) -str(a) } \references{ -- GitLab From 4b149e656230ded6ef15b42e66c971b55f8ca1bc Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 11 Jan 2023 11:04:04 +0100 Subject: [PATCH 13/13] Add some tests in test-CST_Calibration and fix .cal.obs.fc.quant --- R/CST_BiasCorrection.R | 18 +++---- R/CST_Calibration.R | 28 +++++----- tests/testthat/test-CST_Calibration.R | 74 ++++++++++++++++++++------- 3 files changed, 81 insertions(+), 39 deletions(-) diff --git a/R/CST_BiasCorrection.R b/R/CST_BiasCorrection.R index 6039a463..da368db0 100644 --- a/R/CST_BiasCorrection.R +++ b/R/CST_BiasCorrection.R @@ -1,4 +1,4 @@ -#' Bias Correction based on the mean and standard deviation adjustment +#'Bias Correction based on the mean and standard deviation adjustment #' #'@author Verónica Torralba, \email{veronica.torralba@bsc.es} #'@description This function applies the simple bias adjustment technique @@ -66,7 +66,7 @@ CST_BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, if (!is.null(exp_cor)) { if (!inherits(exp_cor, 's2dv_cube')) { stop("Parameter 'exp_cor' must be of the class 's2dv_cube', ", - "as output by CSTools::CST_Load.") + "as output by CSTools::CST_Load.") } } @@ -90,7 +90,7 @@ CST_BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, } } -#' Bias Correction based on the mean and standard deviation adjustment +#'Bias Correction based on the mean and standard deviation adjustment #' #'@author Verónica Torralba, \email{veronica.torralba@bsc.es} #'@description This function applies the simple bias adjustment technique @@ -224,8 +224,8 @@ BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, } if (!identical(length(name_exp), length(name_obs)) | !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of all dimensions", - " except 'memb_dim' and 'dat_dim'.")) + stop("Parameter 'exp' and 'obs' must have same length of all dimensions", + " except 'memb_dim' and 'dat_dim'.") } if (!is.null(exp_cor)) { name_exp_cor <- sort(names(dim(exp_cor))) @@ -233,8 +233,8 @@ BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, if (!is.null(dat_dim)) { if (dat_dim %in% exp_cordims) { if (!identical(dim(exp)[dat_dim], dim(exp_cor)[dat_dim])) { - stop(paste0("If parameter 'exp_cor' has dataset dimension, it must be", - " equal to dataset dimension of 'exp'.")) + stop("If parameter 'exp_cor' has dataset dimension, it must be", + " equal to dataset dimension of 'exp'.") } name_exp_cor <- name_exp_cor[-which(name_exp_cor == dat_dim)] target_dims_cor <- c(memb_dim, sdate_dim, dat_dim) @@ -248,8 +248,8 @@ BiasCorrection <- function(exp, obs, exp_cor = NULL, na.rm = FALSE, name_exp_cor <- name_exp_cor[-which(name_exp_cor %in% target_dims_cor)] if (!identical(length(name_exp), length(name_exp_cor)) | !identical(dim(exp)[name_exp], dim(exp_cor)[name_exp_cor])) { - stop(paste0("Parameter 'exp' and 'exp_cor' must have the same length of ", - "all common dimensions except 'dat_dim', 'sdate_dim' and 'memb_dim'.")) + stop("Parameter 'exp' and 'exp_cor' must have the same length of ", + "all common dimensions except 'dat_dim', 'sdate_dim' and 'memb_dim'.") } } ## na.rm diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index a13b650d..d0b547d1 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -387,7 +387,8 @@ Calibration <- function(exp, obs, exp_cor = NULL, } if (memb_dim %in% obsdims) { if (dim(obs)[memb_dim] != 1) { - warning("Parameter 'obs' has dimension 'memb_dim' with length larger than 1. Only the first member dimension will be used.") + warning("Parameter 'obs' has dimension 'memb_dim' with length larger", + " than 1. Only the first member dimension will be used.") } obs <- Subset(obs, along = memb_dim, indices = 1, drop = "selected") } @@ -411,8 +412,8 @@ Calibration <- function(exp, obs, exp_cor = NULL, } if (!identical(length(name_exp), length(name_obs)) | !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of all dimensions", - " except 'memb_dim' and 'dat_dim'.")) + stop("Parameter 'exp' and 'obs' must have same length of all ", + "dimensions except 'memb_dim' and 'dat_dim'.") } if (!is.null(exp_cor)) { name_exp_cor <- sort(names(dim(exp_cor))) @@ -420,8 +421,8 @@ Calibration <- function(exp, obs, exp_cor = NULL, if (!is.null(dat_dim)) { if (dat_dim %in% expcordims) { if (!identical(dim(exp)[dat_dim], dim(exp_cor)[dat_dim])) { - stop(paste0("If parameter 'exp_cor' has dataset dimension, it must be", - " equal to dataset dimension of 'exp'.")) + stop("If parameter 'exp_cor' has dataset dimension, it must be", + " equal to dataset dimension of 'exp'.") } name_exp_cor <- name_exp_cor[-which(name_exp_cor == dat_dim)] target_dims_cor <- c(memb_dim, sdate_dim, dat_dim) @@ -435,8 +436,8 @@ Calibration <- function(exp, obs, exp_cor = NULL, name_exp_cor <- name_exp_cor[-which(name_exp_cor %in% target_dims_cor)] if (!identical(length(name_exp), length(name_exp_cor)) | !identical(dim(exp)[name_exp], dim(exp_cor)[name_exp_cor])) { - stop(paste0("Parameter 'exp' and 'exp_cor' must have the same length of ", - "all common dimensions except 'dat_dim', 'sdate_dim' and 'memb_dim'.")) + stop("Parameter 'exp' and 'exp_cor' must have the same length of ", + "all common dimensions except 'dat_dim', 'sdate_dim' and 'memb_dim'.") } } ## ncores @@ -784,8 +785,11 @@ Calibration <- function(exp, obs, exp_cor = NULL, # Function to calculate different quantities of a series of ensemble forecasts and corresponding observations .calc.obs.fc.quant <- function(obs, fc, na.rm) { + if (is.null(dim(fc))) { + dim(fc) <- c(length(fc), 1) + } amt.mbr <- dim(fc)[1] - obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr) + obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr, name = 'amt.mbr') fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) cor.obs.fc <- cor(fc.ens.av, obs, use = "complete.obs") obs.av <- mean(obs, na.rm = na.rm) @@ -806,7 +810,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, # Extended function to calculate different quantities of a series of ensemble forecasts and corresponding observations .calc.obs.fc.quant.ext <- function(obs, fc, na.rm){ amt.mbr <- dim(fc)[1] - obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr) + obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr, name = 'amt.mbr') fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) cor.obs.fc <- cor(fc.ens.av, obs, use = "complete.obs") obs.av <- mean(obs, na.rm = na.rm) @@ -831,7 +835,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) fc.ens.av.av <- mean(fc.ens.av, na.rm = na.rm) fc.ens.av.sd <- sd(fc.ens.av, na.rm = na.rm) - fc.ens.av.per.ens <- InsertDim(fc.ens.av, posdim = 1, lendim = amt.mbr) + fc.ens.av.per.ens <- InsertDim(fc.ens.av, posdim = 1, lendim = amt.mbr, name = 'amt.mbr') fc.ens.sd <- apply(fc, c(2), sd, na.rm = na.rm) fc.ens.var.av.sqrt <- sqrt(mean(fc.ens.sd^2, na.rm = na.rm)) fc.dev <- fc - fc.ens.av.per.ens @@ -857,10 +861,10 @@ Calibration <- function(exp, obs, exp_cor = NULL, # Extended function to calculate different quantities of a series of ensemble forecasts .calc.fc.quant.ext <- function(fc, na.rm){ amt.mbr <- dim(fc)[1] - repmat1.tmp <- InsertDim(fc, posdim = 1, lendim = amt.mbr) + repmat1.tmp <- InsertDim(fc, posdim = 1, lendim = amt.mbr, name = 'amt.mbr') repmat2.tmp <- aperm(repmat1.tmp, c(2, 1, 3)) spr.abs <- apply(abs(repmat1.tmp - repmat2.tmp), c(3), mean, na.rm = na.rm) - spr.abs.per.ens <- InsertDim(spr.abs, posdim = 1, lendim = amt.mbr) + spr.abs.per.ens <- InsertDim(spr.abs, posdim = 1, lendim = amt.mbr, name = 'amt.mbr') return( append(.calc.fc.quant(fc, na.rm = na.rm), diff --git a/tests/testthat/test-CST_Calibration.R b/tests/testthat/test-CST_Calibration.R index 78cb8b94..35eccbe9 100644 --- a/tests/testthat/test-CST_Calibration.R +++ b/tests/testthat/test-CST_Calibration.R @@ -3,8 +3,9 @@ context("CSTools::CST_Calibration tests") ############################################## # dat -library(zeallot) -c(exp, obs) %<-% lonlat_temp +exp_obs <- lonlat_temp +exp <- exp_obs$exp +obs <- exp_obs$obs exp$data <- ClimProjDiags::Subset(exp$data, c('lat', 'lon'), list(1:5, 1:5)) obs$data <- ClimProjDiags::Subset(obs$data, c('lat', 'lon'), list(1:5, 1:5)) @@ -75,6 +76,10 @@ test_that("1. Input checks", { "as output by CSTools::CST_Load.") ) # exp and obs + expect_error( + Calibration(exp = 1, obs = obs1), + "Parameter 'exp' must be a numeric array." + ) expect_error( CST_Calibration(exp = exp1_2, obs = obs1), "Parameter 'exp' must have dimension names." @@ -84,41 +89,74 @@ test_that("1. Input checks", { "Parameter 'obs' must have dimension names." ) expect_warning( - CST_Calibration(exp = exp2, obs = obs2), + CST_Calibration(exp = exp2, obs = obs2, exp_cor = exp2), "Parameter 'obs' contains NA values", - "Parameter 'exp' contains NA values." + "Parameter 'exp' contains NA values.", + "Parameter 'exp_cor' contains NA values." ) # exp_cor expect_error( CST_Calibration(exp = exp1, obs = obs1, exp_cor = exp_cor1, sdate_dim = 'time'), "Parameter 'exp_cor' must have dimension names." ) + # dat_dim + expect_error( + Calibration(exp = exp3, obs = obs3, dat_dim = 1), + paste0("Parameter 'dat_dim' must be a character string.") + ) + expect_error( + CST_Calibration(exp = exp, obs = obs, dat_dim = 'dat'), + paste0("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + ) # sdate_dim, memb_dim expect_error( CST_Calibration(exp = exp1, obs = obs1, sdate_dim = 1), paste0("Parameter 'sdate_dim' should be a character string indicating the", "name of the dimension where start dates are stored in 'exp'.") ) + expect_warning( + CST_Calibration(exp = exp, obs = obs, sdate_dim = c('sdate', 'time')), + paste0("Parameter 'sdate_dim' has length greater than 1 and only", + " the first element will be used.") + ) + expect_error( + CST_Calibration(exp = exp1, obs = obs1, memb_dim = 1), + paste0("Parameter 'memb_dim' should be a character string indicating the", + "name of the dimension where members are stored in 'exp'.") + ) expect_error( CST_Calibration(exp = exp, obs = obs, sdate_dim = 'time'), paste0("Parameter 'exp' requires 'sdate_dim' and 'memb_dim' dimensions.") ) - expect_error( - Calibration(exp = array(1:20, dim = c(time = 1, member = 1)), - obs = array(1:20, dim = c(time = 2, member = 1)), sdate_dim = 'time'), - paste0("Parameter 'exp' and 'obs' must have same length of all dimensions", - " except 'memb_dim' and 'dat_dim'.") + expect_warning( + CST_Calibration(exp = exp, obs = obs, memb_dim = c('member', 'memb')), + paste0("Parameter 'memb_dim' has length greater than 1 and only", + " the first element will be used.") ) expect_error( CST_Calibration(exp = exp1, obs = obs1, sdate_dim = 'time'), paste0("Parameter 'exp' requires 'sdate_dim' and 'memb_dim' dimensions.") ) - ## dat_dim expect_error( - Calibration(exp = exp3, obs = obs3, dat_dim = 1), - paste0("Parameter 'dat_dim' must be a character string.") + Calibration(exp = array(1:20, dim = c(time = 1, member = 1)), + obs = array(1:20, dim = c(member = 1)), sdate_dim = 'time'), + paste0("Parameter 'obs' must have the dimension defined in sdate_dim ", + "parameter.") + ) + expect_warning( + Calibration(exp = array(abs(rnorm(400)), dim = c(sdate = 4, member = 10)), + obs = array(abs(rnorm(8)), dim = c(sdate = 4, member = 2))), + paste0("Parameter 'obs' has dimension 'memb_dim' with length larger", + " than 1. Only the first member dimension will be used.") + ) + # exp, obs, and exp_cor (2) + expect_error( + Calibration(exp = array(1:20, dim = c(time = 1, member = 1)), + obs = array(1:20, dim = c(time = 2, member = 1)), sdate_dim = 'time'), + paste0("Parameter 'exp' and 'obs' must have same length of all dimensions", + " except 'memb_dim' and 'dat_dim'.") ) - ## exp, obs, and exp_cor (2) expect_error( Calibration(exp = array(1:6, c(sdate = 3, member = 2, dataset = 2, lon = 1)), obs = array(1:6, c(sdate = 3, member = 1, dataset = 3, lon = 2)), @@ -141,16 +179,16 @@ test_that("1. Input checks", { paste0("Parameter 'exp' and 'exp_cor' must have the same length of ", "all common dimensions except 'dat_dim', 'sdate_dim' and 'memb_dim'.") ) - # na.rm - expect_warning( - CST_Calibration(exp = exp, obs = obs, na.rm = c(T,F)), - "Paramter 'na.rm' has length greater than 1, and only the fist element is used." - ) # ncores expect_error( CST_Calibration(exp = exp, obs = obs, ncores = TRUE), "Parameter 'ncores' must be either NULL or a positive integer." ) + # na.rm + expect_warning( + CST_Calibration(exp = exp, obs = obs, na.rm = c(T,F)), + "Paramter 'na.rm' has length greater than 1, and only the fist element is used." + ) # cal.method expect_error( as.vector(Calibration(exp4, obs4, cal.method = 'biass')), -- GitLab