From 9b96f58464b9412a46304b60e5785749d2595b29 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 27 Dec 2022 12:15:45 +0100 Subject: [PATCH 1/4] Add randomwalk test; parameter ''ref', 'sig_method' and 'memb_dim' --- R/RMSSS.R | 257 ++++++++++++++++++++++++++++-------- man/RMSSS.Rd | 20 ++- tests/testthat/test-RMSSS.R | 86 +++++++++++- 3 files changed, 305 insertions(+), 58 deletions(-) diff --git a/R/RMSSS.R b/R/RMSSS.R index 9c47da4..b8f411a 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -11,7 +11,7 @@ #'The RMSSS are computed along the time_dim dimension which should correspond #'to the start date dimension.\cr #'The p-value and significance test are optionally provided by an one-sided -#'Fisher test.\cr +#'Fisher test or Random Walk test.\cr #' #'@param exp A named numeric array of experimental data which contains at least #' two dimensions for dat_dim and time_dim. It can also be a vector with the @@ -23,10 +23,20 @@ #' dimension can be different. It can also be a vector with the same length as #' 'exp', then the vector will automatically be 'time_dim' and 'dat_dim' will #' be 1. +#'@param ref A named numerical array of the reference forecast data with at +#' least time dimension. The dimensions must be the same as 'exp' except +#' 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should +#' not have dataset dimension. If there is corresponding reference for each +#' experiement, the dataset dimension must have the same length as in 'exp'. If +#' 'ref' is NULL, the climatological forecast is used as reference forecast. +#' The default value is NULL. #'@param dat_dim A character string indicating the name of dataset (nobs/nexp) #' dimension. The default value is 'dataset'. #'@param time_dim A character string indicating the name of dimension along #' which the RMSSS are computed. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the parameter 'exp' +#' and 'ref' are already the ensemble mean. The default value is NULL. #'@param pval A logical value indicating whether to compute or not the p-value #' of the test Ho: RMSSS = 0. The default value is TRUE. #'@param sign A logical value indicating whether to compute or not the @@ -34,6 +44,8 @@ #' FALSE. #'@param alpha A numeric of the significance level to be used in the #' statistical significance test. The default value is 0.05. +#'@param sig_method A character string indicating the significance method. The +#' options are "one-sided Fisher" (default) and "Random Walk". #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -66,8 +78,9 @@ #'@import multiApply #'@importFrom stats pf #'@export -RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', - pval = TRUE, sign = FALSE, alpha = 0.05, ncores = NULL) { +RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', + memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, + sig_method = 'one-sided Fisher', ncores = NULL) { # Check inputs ## exp and obs (1) @@ -99,6 +112,14 @@ RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', !all(names(dim(obs)) %in% names(dim(exp)))) { stop("Parameter 'exp' and 'obs' must have same dimension name.") } + if (!is.null(ref)) { + if (!is.array(ref) | !is.numeric(ref)) + stop("Parameter 'ref' must be a numeric array.") + if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { + stop("Parameter 'ref' must have dimension names.") + } + } + ## time_dim if (!is.character(time_dim) | length(time_dim) > 1) { stop("Parameter 'time_dim' must be a character string.") @@ -116,6 +137,23 @@ RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', " Set it as NULL if there is no dataset dimension.") } } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", + "but it should be of length = 1).") + } + } + } ## pval if (!is.logical(pval) | length(pval) > 1) { stop("Parameter 'pval' must be one logical value.") @@ -128,6 +166,14 @@ RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', if (!is.numeric(alpha) | length(alpha) > 1) { stop("Parameter 'alpha' must be one numeric value.") } + ## sig_method + if (length(sig_method) != 1 | !any(sig_method %in% c('one-sided Fisher', 'Random Walk'))) { + stop("Parameter 'sig_meothd' must be one of 'one-sided Fisher' or 'Random Walk'.") + } + if (sig_method == "Random Walk" & pval == T) { + warning("p-value cannot be calculated by significance method 'Random Walk'.") + pval <- FALSE + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | @@ -138,66 +184,162 @@ RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', ## exp and obs (2) name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } if (!is.null(dat_dim)) { name_exp <- name_exp[-which(name_exp == dat_dim)] name_obs <- name_obs[-which(name_obs == dat_dim)] } if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimension except 'dat_dim'.")) + "all dimension except 'memb_dim' and 'dat_dim'.")) + } + if (!is.null(ref)) { + name_ref <- sort(names(dim(ref))) + if (!is.null(memb_dim) && memb_dim %in% name_ref) { + name_ref <- name_ref[-which(name_ref == memb_dim)] + } + if (!is.null(dat_dim)) { + if (dat_dim %in% name_ref) { + if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { + stop(paste0("If parameter 'ref' has dataset dimension, it must be ", + "equal to dataset dimension of 'exp'.")) + } + name_ref <- name_ref[-which(name_ref == dat_dim)] + } + } + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.")) + } } + if (dim(exp)[time_dim] <= 2) { stop("The length of time_dim must be more than 2 to compute RMSSS.") } ############################### - # Sort dimension - name_exp <- names(dim(exp)) - name_obs <- names(dim(obs)) - order_obs <- match(name_exp, name_obs) - obs <- Reorder(obs, order_obs) +# # Sort dimension +# name_exp <- names(dim(exp)) +# name_obs <- names(dim(obs)) +# order_obs <- match(name_exp, name_obs) +# obs <- Reorder(obs, order_obs) + ############################### + ## Ensemble mean + if (!is.null(memb_dim)) { + exp <- MeanDims(exp, memb_dim, na.rm = T) + if (!is.null(ref) & memb_dim %in% names(dim(ref))) { + ref <- MeanDims(ref, memb_dim, na.rm = T) + } + } + ############################### # Calculate RMSSS + + if (!is.null(ref)) { # use "ref" as reference forecast + if (!is.null(dat_dim) && dat_dim %in% names(dim(ref))) { + target_dims_ref <- c(time_dim, dat_dim) + } else { + target_dims_ref <- c(time_dim) + } + data <- list(exp = exp, obs = obs, ref = ref) + target_dims = list(exp = c(time_dim, dat_dim), + obs = c(time_dim, dat_dim), + ref = target_dims_ref) + } else { + data <- list(exp = exp, obs = obs) + target_dims = list(exp = c(time_dim, dat_dim), + obs = c(time_dim, dat_dim)) + } - res <- Apply(list(exp, obs), - target_dims = list(c(time_dim, dat_dim), - c(time_dim, dat_dim)), + res <- Apply(data, + target_dims = target_dims, fun = .RMSSS, time_dim = time_dim, dat_dim = dat_dim, pval = pval, sign = sign, alpha = alpha, + sig_method = sig_method, ncores = ncores) return(res) } -.RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', pval = TRUE, - sign = FALSE, alpha = 0.05) { +.RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', pval = TRUE, + sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher') { # exp: [sdate, (dat)] # obs: [sdate, (dat)] + # ref: [sdate, (dat)] or NULL if (is.null(dat_dim)) { # exp: [sdate] # obs: [sdate] nexp <- 1 nobs <- 1 - dim(exp) <- c(dim(exp), nexp = nexp) - dim(obs) <- c(dim(obs), nobs = nobs) + # Add dat dim back temporarily + dim(exp) <- c(dim(exp), dat = 1) + dim(obs) <- c(dim(obs), dat = 1) + if (!is.null(ref)) { + dim(ref) <- c(dim(ref), dat = 1) + } +# ref_dat_dim <- FALSE + } else { # exp: [sdate, dat_exp] # obs: [sdate, dat_obs] nexp <- as.numeric(dim(exp)[2]) nobs <- as.numeric(dim(obs)[2]) + if (!is.null(ref)) { + if (dat_dim %in% names(dim(ref))) { + nref <- as.numeric(dim(ref)[2]) + } else { + dim(ref) <- c(dim(ref), dat = 1) + nref <- 1 + } + } } nsdate <- as.numeric(dim(exp)[1]) - - p_val <- array(dim = c(nexp = nexp, nobs = nobs)) + + # RMS of forecast dif1 <- array(dim = c(nsdate, nexp, nobs)) names(dim(dif1)) <- c(time_dim, 'nexp', 'nobs') + for (i in 1:nobs) { + dif1[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) + } + + rms_exp <- apply(dif1^2, c(2, 3), mean, na.rm = TRUE)^0.5 #array(dim = c(nexp, nobs)) + + # RMS of reference + if (!is.null(ref)) { + dif2 <- array(dim = c(nsdate, nref, nobs)) + names(dim(dif2)) <- c(time_dim, 'nexp', 'nobs') + for (i in 1:nobs) { + dif2[, , i] <- sapply(1:nref, function(x) {ref[, x] - obs[, i]}) + } + rms_ref <- apply(dif2^2, c(2, 3), mean, na.rm = TRUE)^0.5 #array(dim = c(nref, nobs)) + if (nexp != nref) { + # expand rms_ref to nexp (nref is 1) + rms_ref <- array(rms_ref, dim = c(nobs = nobs, nexp = nexp)) + rms_ref <- Reorder(rms_ref, c(2, 1)) + } + } else { + rms_ref <- array(colMeans(obs^2, na.rm = TRUE)^0.5, dim = c(nobs = nobs, nexp = nexp)) +# rms_ref[which(abs(rms_ref) <= (max(abs(rms_ref), na.rm = TRUE) / 1000))] <- max(abs( +# rms_ref), na.rm = TRUE) / 1000 + rms_ref <- Reorder(rms_ref, c(2, 1)) + #rms_ref above: [nexp, nobs] + } + + rmsss <- 1 - rms_exp / rms_ref + +################################################# + # if (conf) { # conflow <- (1 - conf.lev) / 2 # confhigh <- 1 - conflow @@ -205,45 +347,52 @@ RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', # conf_high <- array(dim = c(nexp = nexp, nobs = nobs)) # } - # dif1 - for (i in 1:nobs) { - dif1[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) - } - - rms1 <- apply(dif1^2, c(2, 3), mean, na.rm = TRUE)^0.5 #array(dim = c(nexp, nobs)) - rms2 <- array(colMeans(obs^2, na.rm = TRUE)^0.5, dim = c(nobs = nobs)) - rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max(abs( - rms2), na.rm = TRUE) / 1000 - #rms2 above: [nobs] - rms2 <- array(rms2, dim = c(nobs = nobs, nexp = nexp)) - #rms2 above: [nobs, nexp] - rms2 <- Reorder(rms2, c(2, 1)) - #rms2 above: [nexp, nobs] + if (sig_method == 'one-sided Fisher') { + p_val <- array(dim = c(nexp = nexp, nobs = nobs)) + ## pval and sign + if (pval || sign) { + eno1 <- Eno(dif1, time_dim) + if (is.null(ref)) { + eno2 <- Eno(obs, time_dim) + eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) + eno2 <- Reorder(eno2, c(2, 1)) + } else { + eno2 <- Eno(dif2, time_dim) + if (nref != nexp) { + eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) + eno2 <- Reorder(eno2, c(2, 1)) + } + } - # use rms1 and rms2 to calculate rmsss - rmsss <- 1 - rms1/rms2 - - ## pval and sign - if (pval || sign) { - eno1 <- Eno(dif1, time_dim) - eno2 <- Eno(obs, time_dim) - eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) - eno2 <- Reorder(eno2, c(2, 1)) - } + F.stat <- (eno2 * rms_ref^2 / (eno2 - 1)) / ((eno1 * rms_exp^2 / (eno1- 1))) + tmp <- !is.na(eno1) & !is.na(eno2) & eno1 > 2 & eno2 > 2 + p_val <- 1 - pf(F.stat, eno1 - 1, eno2 - 1) + if (sign) signif <- p_val <= alpha + # If there isn't enough valid data, return NA + p_val[which(!tmp)] <- NA + if (sign) signif[which(!tmp)] <- NA + + # change not enough valid data rmsss to NA + rmsss[which(!tmp)] <- NA + } - # pval - if (pval || sign) { + } else if (sig_method == "Random Walk") { + #TODO: What if ref is NULL? + signif <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + for (j in 1:nobs) { - F.stat <- (eno2 * rms2^2 / (eno2- 1)) / ((eno1 * rms1^2 / (eno1- 1))) - tmp <- !is.na(eno1) & !is.na(eno2) & eno1 > 2 & eno2 > 2 - p_val <- 1 - pf(F.stat, eno1 - 1, eno2 - 1) - if (sign) signif <- p_val <= alpha - # If there isn't enough valid data, return NA - p_val[which(!tmp)] <- NA - if (sign) signif[which(!tmp)] <- NA - - # change not significant rmsss to NA - rmsss[which(!tmp)] <- NA + # Error + error_exp <- array(data = abs(exp[, i] - obs[, j]), dim = c(time = nsdate)) + if (nref == nexp) { + error_ref <- array(data = abs(ref[, i] - obs[, j]), dim = c(time = nsdate)) + } else { + # nref = 1 + error_ref <- array(data = abs(ref - obs[, j]), dim = c(time = nsdate)) + } + signif[i, j] <- .RandomWalkTest(skill_A = error_exp, skill_B = error_ref)$signif + } + } } ################################### diff --git a/man/RMSSS.Rd b/man/RMSSS.Rd index 05dfb1b..af892cb 100644 --- a/man/RMSSS.Rd +++ b/man/RMSSS.Rd @@ -7,11 +7,14 @@ RMSSS( exp, obs, + ref = NULL, time_dim = "sdate", dat_dim = "dataset", + memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, + sig_method = "one-sided Fisher", ncores = NULL ) } @@ -28,12 +31,24 @@ dimension can be different. It can also be a vector with the same length as 'exp', then the vector will automatically be 'time_dim' and 'dat_dim' will be 1.} +\item{ref}{A named numerical array of the reference forecast data with at +least time dimension. The dimensions must be the same as 'exp' except +'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should +not have dataset dimension. If there is corresponding reference for each +experiement, the dataset dimension must have the same length as in 'exp'. If +'ref' is NULL, the climatological forecast is used as reference forecast. +The default value is NULL.} + \item{time_dim}{A character string indicating the name of dimension along which the RMSSS are computed. The default value is 'sdate'.} \item{dat_dim}{A character string indicating the name of dataset (nobs/nexp) dimension. The default value is 'dataset'.} +\item{memb_dim}{A character string indicating the name of the member dimension +to compute the ensemble mean; it should be set to NULL if the parameter 'exp' +and 'ref' are already the ensemble mean. The default value is NULL.} + \item{pval}{A logical value indicating whether to compute or not the p-value of the test Ho: RMSSS = 0. The default value is TRUE.} @@ -44,6 +59,9 @@ FALSE.} \item{alpha}{A numeric of the significance level to be used in the statistical significance test. The default value is 0.05.} +\item{sig_method}{A character string indicating the significance method. The +options are "one-sided Fisher" (default) and "Random Walk".} + \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } @@ -77,7 +95,7 @@ of the array.\cr The RMSSS are computed along the time_dim dimension which should correspond to the start date dimension.\cr The p-value and significance test are optionally provided by an one-sided -Fisher test.\cr +Fisher test or Random Walk test.\cr } \examples{ set.seed(1) diff --git a/tests/testthat/test-RMSSS.R b/tests/testthat/test-RMSSS.R index 0a4916e..fa5ca33 100644 --- a/tests/testthat/test-RMSSS.R +++ b/tests/testthat/test-RMSSS.R @@ -6,7 +6,11 @@ context("s2dv::RMSSS tests") exp1 <- array(rnorm(15), dim = c(sdate = 3, dataset = 5)) set.seed(2) obs1 <- array(rnorm(6), dim = c(sdate = 3, dataset = 2)) - + set.seed(3) + ref1_1 <- array(rnorm(15), dim = c(sdate = 3, dataset = 5)) + ref1_2 <- exp1[, 3] + dim(ref1_2) <- c(sdate = 3) + # case 2 set.seed(3) exp2 <- array(rnorm(120), dim = c(time = 10, dat = 1, lon = 3, lat = 2, dataset = 2)) @@ -25,6 +29,15 @@ context("s2dv::RMSSS tests") set.seed(8) obs4 <- array(exp4 + rnorm(60) / 2, dim = dim(exp4)) + # case 5: memb_dim + set.seed(1) + exp5 <- array(rnorm(45), dim = c(sdate = 3, dataset = 5, member = 3)) + set.seed(2) + obs5 <- array(rnorm(3), dim = c(sdate = 3, dataset = 1, member = 1)) + set.seed(3) + ref5 <- array(rnorm(6), dim = c(sdate = 3, member = 2)) + + ############################################## test_that("1. Input checks", { @@ -47,7 +60,7 @@ test_that("1. Input checks", { "Parameter 'exp' and 'obs' must have dimension names." ) expect_error( - RMSSS(array(1:10, dim = c(a = 2, c = 5)), array(1:4, dim = c(a = 2, b = 2))), + RMSSS(array(1:10, dim = c(a = 3, c = 5)), array(1:4, dim = c(a = 3, b = 5)), time_dim = 'a', dat_dim = NULL), "Parameter 'exp' and 'obs' must have same dimension name" ) ## time_dim @@ -93,7 +106,7 @@ test_that("1. Input checks", { expect_error( RMSSS(exp = array(1:10, dim = c(sdate = 1, dataset = 5, a = 1)), obs = array(1:4, dim = c(a = 1, sdate = 2, dataset = 2))), - "Parameter 'exp' and 'obs' must have same length of all dimension except 'dat_dim'." + "Parameter 'exp' and 'obs' must have same length of all dimension except 'memb_dim' and 'dat_dim'." ) expect_error( RMSSS(exp = array(1:10, dim = c(sdate = 1, dataset = 5, a = 1)), @@ -141,6 +154,59 @@ test_that("2. Output checks: case 1", { tolerance = 0.00001 ) + #ref + res1_3 <- RMSSS(exp1, obs1, ref = ref1_1, dat_dim = 'dataset') + expect_equal( + dim(res1_3$rmsss), + c(nexp = 5, nobs = 2) + ) + expect_equal( + dim(res1_3$p.val), + c(nexp = 5, nobs = 2) + ) + expect_equal( + as.vector(res1_3$rmsss[2, ]), + c(-1.197432, -8.879809), + tolerance = 0.0001 + ) + expect_equal( + as.vector(res1_3$p.val[2, ]), + c(0.8284354, 0.9898591), + tolerance = 0.0001 + ) + res1_4 <- RMSSS(exp1, obs1, ref = ref1_2, dat_dim = 'dataset', sign = T, alpha = 0.3) + expect_equal( + dim(res1_4$rmsss), + c(nexp = 5, nobs = 2) + ) + expect_equal( + dim(res1_4$sign), + c(nexp = 5, nobs = 2) + ) + expect_equal( + as.vector(res1_4$rmsss[2, ]), + c(-0.9249772, -0.5624465), + tolerance = 0.0001 + ) + expect_equal( + as.vector(res1_4$p[2, ]), + c(0.7874844, 0.7094070), + tolerance = 0.0001 + ) + expect_equal( + as.vector(res1_4$sign), + c(rep(F, 5), T, rep(F, 4)) + ) + + # Random Walk + suppressWarnings({ + res1_5 <- RMSSS(exp1, obs1, ref = ref1_1, dat_dim = 'dataset', sig_method = "Random Walk", pval = F, sign = T) + }) + expect_equal( + as.vector(res1_5$sign), + rep(F, 10) + ) + }) @@ -223,3 +289,17 @@ test_that("5. Output checks: case 4", { ) }) + +############################################## +test_that("6. Output checks: case 5", { + res5_1 <- RMSSS(exp5, obs5, ref = ref5, dat_dim = 'dataset', memb_dim = 'member') + res5_2 <- RMSSS(s2dv::MeanDims(exp5, 'member'), s2dv::MeanDims(obs5, 'member'), + ref = s2dv::MeanDims(ref5, 'member'), dat_dim = 'dataset') + expect_equal( + res5_1, + res5_2 + ) + + +}) + -- GitLab From fa23886d4b44ecaff9c2ccb95022103b7a7e3f76 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 27 Dec 2022 12:33:52 +0100 Subject: [PATCH 2/4] Correct typo --- R/RMSSS.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/RMSSS.R b/R/RMSSS.R index b8f411a..6f1958f 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -168,7 +168,7 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', } ## sig_method if (length(sig_method) != 1 | !any(sig_method %in% c('one-sided Fisher', 'Random Walk'))) { - stop("Parameter 'sig_meothd' must be one of 'one-sided Fisher' or 'Random Walk'.") + stop("Parameter 'sig_method' must be one of 'one-sided Fisher' or 'Random Walk'.") } if (sig_method == "Random Walk" & pval == T) { warning("p-value cannot be calculated by significance method 'Random Walk'.") -- GitLab From 21d0229da68f80185350b1651ad24d6304f26c06 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 29 Dec 2022 10:37:46 +0100 Subject: [PATCH 3/4] Correct random walk test; the inputs should be time series --- R/RPSS.R | 4 ++-- tests/testthat/test-RPSS.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/RPSS.R b/R/RPSS.R index 3d50d2b..a9377b2 100644 --- a/R/RPSS.R +++ b/R/RPSS.R @@ -399,14 +399,14 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', for (i in 1:nexp) { for (j in 1:nobs) { rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[j] - sign[i, j] <- .RandomWalkTest(skill_A = rps_exp_mean[i, j], skill_B = rps_ref_mean[j])$signif + sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[, i, j], skill_B = rps_ref[, j])$signif } } } else { for (i in 1:nexp) { for (j in 1:nobs) { rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[i, j] - sign[i, j] <- .RandomWalkTest(skill_A = rps_exp_mean[i, j], skill_B = rps_ref_mean[i, j])$signif + sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[, i, j], skill_B = rps_ref[, i, j])$signif } } } diff --git a/tests/testthat/test-RPSS.R b/tests/testthat/test-RPSS.R index 7976905..fc616e6 100644 --- a/tests/testthat/test-RPSS.R +++ b/tests/testthat/test-RPSS.R @@ -343,7 +343,7 @@ test_that("4. Output checks: dat3", { ) expect_equal( as.vector(RPSS(exp3, obs3, dat_dim = 'dataset')$sign)[1:3], - c(FALSE, FALSE, FALSE), + c(FALSE, FALSE, TRUE), ) expect_equal( mean(RPSS(exp3, obs3, dat_dim = 'dataset', weights_exp = weights3, Fair = T)$rpss), -- GitLab From ede49bccff2115f616c9b52d6ed39abdff6c6141 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 29 Dec 2022 14:58:59 +0100 Subject: [PATCH 4/4] 'ref' can be or 1 --- R/RMSSS.R | 113 ++++++++++++++++++++++-------------- man/RMSSS.Rd | 12 ++-- tests/testthat/test-RMSSS.R | 7 +++ 3 files changed, 85 insertions(+), 47 deletions(-) diff --git a/R/RMSSS.R b/R/RMSSS.R index 6f1958f..d2ff486 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -24,11 +24,13 @@ #' 'exp', then the vector will automatically be 'time_dim' and 'dat_dim' will #' be 1. #'@param ref A named numerical array of the reference forecast data with at -#' least time dimension. The dimensions must be the same as 'exp' except -#' 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should -#' not have dataset dimension. If there is corresponding reference for each -#' experiement, the dataset dimension must have the same length as in 'exp'. If -#' 'ref' is NULL, the climatological forecast is used as reference forecast. +#' least time dimension, or 0 (typical climatological forecast) or 1 +#' (normalized climatological forecast). If it is an array, the dimensions must +#' be the same as 'exp' except 'memb_dim' and 'dat_dim'. If there is only one +#' reference dataset, it should not have dataset dimension. If there is +#' corresponding reference for each experiment, the dataset dimension must +#' have the same length as in 'exp'. If 'ref' is NULL, the typical +#' climatological forecast is used as reference forecast (equivelant to 0.) #' The default value is NULL. #'@param dat_dim A character string indicating the name of dataset (nobs/nexp) #' dimension. The default value is 'dataset'. @@ -83,7 +85,7 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', sig_method = 'one-sided Fisher', ncores = NULL) { # Check inputs - ## exp and obs (1) + ## exp, obs, and ref (1) if (is.null(exp) | is.null(obs)) { stop("Parameter 'exp' and 'obs' cannot be NULL.") } @@ -113,10 +115,15 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', stop("Parameter 'exp' and 'obs' must have same dimension name.") } if (!is.null(ref)) { - if (!is.array(ref) | !is.numeric(ref)) - stop("Parameter 'ref' must be a numeric array.") - if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { - stop("Parameter 'ref' must have dimension names.") + if (!is.numeric(ref)) { + stop("Parameter 'ref' must be numeric.") + } + if (is.array(ref)) { + if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { + stop("Parameter 'ref' must have dimension names.") + } + } else if (length(ref) != 1 | any(!ref %in% c(0, 1))) { + stop("Parameter 'ref' must be a numeric array or number 0 or 1.") } } @@ -230,6 +237,13 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', # obs <- Reorder(obs, order_obs) + ############################### + # Create ref array if needed + if (is.null(ref)) ref <- 0 + if (!is.array(ref)) { + ref <- array(data = ref, dim = dim(exp)) + } + ############################### ## Ensemble mean if (!is.null(memb_dim)) { @@ -242,22 +256,36 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', ############################### # Calculate RMSSS - if (!is.null(ref)) { # use "ref" as reference forecast - if (!is.null(dat_dim) && dat_dim %in% names(dim(ref))) { - target_dims_ref <- c(time_dim, dat_dim) +# if (!is.null(ref)) { # use "ref" as reference forecast +# if (!is.null(dat_dim) && dat_dim %in% names(dim(ref))) { +# target_dims_ref <- c(time_dim, dat_dim) +# } else { +# target_dims_ref <- c(time_dim) +# } +# data <- list(exp = exp, obs = obs, ref = ref) +# target_dims = list(exp = c(time_dim, dat_dim), +# obs = c(time_dim, dat_dim), +# ref = target_dims_ref) +# } else { +# data <- list(exp = exp, obs = obs) +# target_dims = list(exp = c(time_dim, dat_dim), +# obs = c(time_dim, dat_dim)) +# } + data <- list(exp = exp, obs = obs, ref = ref) + if (!is.null(dat_dim)) { + if (dat_dim %in% names(dim(ref))) { + target_dims <- list(exp = c(time_dim, dat_dim), + obs = c(time_dim, dat_dim), + ref = c(time_dim, dat_dim)) } else { - target_dims_ref <- c(time_dim) + target_dims <- list(exp = c(time_dim, dat_dim), + obs = c(time_dim, dat_dim), + ref = c(time_dim)) } - data <- list(exp = exp, obs = obs, ref = ref) - target_dims = list(exp = c(time_dim, dat_dim), - obs = c(time_dim, dat_dim), - ref = target_dims_ref) } else { - data <- list(exp = exp, obs = obs) - target_dims = list(exp = c(time_dim, dat_dim), - obs = c(time_dim, dat_dim)) + target_dims <- list(exp = time_dim, obs = time_dim, ref = time_dim) } - + res <- Apply(data, target_dims = target_dims, fun = .RMSSS, @@ -275,31 +303,33 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', # obs: [sdate, (dat)] # ref: [sdate, (dat)] or NULL + if (is.null(ref)) { + ref <- array(data = 0, dim = dim(obs)) + } else if (identical(ref, 0) | identical(ref, 1)) { + ref <- array(ref, dim = dim(exp)) + } + if (is.null(dat_dim)) { # exp: [sdate] # obs: [sdate] nexp <- 1 nobs <- 1 + nref <- 1 # Add dat dim back temporarily dim(exp) <- c(dim(exp), dat = 1) dim(obs) <- c(dim(obs), dat = 1) - if (!is.null(ref)) { - dim(ref) <- c(dim(ref), dat = 1) - } -# ref_dat_dim <- FALSE + dim(ref) <- c(dim(ref), dat = 1) } else { # exp: [sdate, dat_exp] # obs: [sdate, dat_obs] nexp <- as.numeric(dim(exp)[2]) nobs <- as.numeric(dim(obs)[2]) - if (!is.null(ref)) { - if (dat_dim %in% names(dim(ref))) { - nref <- as.numeric(dim(ref)[2]) - } else { - dim(ref) <- c(dim(ref), dat = 1) - nref <- 1 - } + if (dat_dim %in% names(dim(ref))) { + nref <- as.numeric(dim(ref)[2]) + } else { + dim(ref) <- c(dim(ref), dat = 1) + nref <- 1 } } @@ -316,7 +346,7 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', rms_exp <- apply(dif1^2, c(2, 3), mean, na.rm = TRUE)^0.5 #array(dim = c(nexp, nobs)) # RMS of reference - if (!is.null(ref)) { +# if (!is.null(ref)) { dif2 <- array(dim = c(nsdate, nref, nobs)) names(dim(dif2)) <- c(time_dim, 'nexp', 'nobs') for (i in 1:nobs) { @@ -328,13 +358,13 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', rms_ref <- array(rms_ref, dim = c(nobs = nobs, nexp = nexp)) rms_ref <- Reorder(rms_ref, c(2, 1)) } - } else { - rms_ref <- array(colMeans(obs^2, na.rm = TRUE)^0.5, dim = c(nobs = nobs, nexp = nexp)) -# rms_ref[which(abs(rms_ref) <= (max(abs(rms_ref), na.rm = TRUE) / 1000))] <- max(abs( -# rms_ref), na.rm = TRUE) / 1000 - rms_ref <- Reorder(rms_ref, c(2, 1)) - #rms_ref above: [nexp, nobs] - } +# } else { +# rms_ref <- array(colMeans(obs^2, na.rm = TRUE)^0.5, dim = c(nobs = nobs, nexp = nexp)) +## rms_ref[which(abs(rms_ref) <= (max(abs(rms_ref), na.rm = TRUE) / 1000))] <- max(abs( +## rms_ref), na.rm = TRUE) / 1000 +# rms_ref <- Reorder(rms_ref, c(2, 1)) +# #rms_ref above: [nexp, nobs] +# } rmsss <- 1 - rms_exp / rms_ref @@ -377,7 +407,6 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', } } else if (sig_method == "Random Walk") { - #TODO: What if ref is NULL? signif <- array(dim = c(nexp = nexp, nobs = nobs)) for (i in 1:nexp) { for (j in 1:nobs) { diff --git a/man/RMSSS.Rd b/man/RMSSS.Rd index af892cb..bcf221c 100644 --- a/man/RMSSS.Rd +++ b/man/RMSSS.Rd @@ -32,11 +32,13 @@ dimension can be different. It can also be a vector with the same length as be 1.} \item{ref}{A named numerical array of the reference forecast data with at -least time dimension. The dimensions must be the same as 'exp' except -'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should -not have dataset dimension. If there is corresponding reference for each -experiement, the dataset dimension must have the same length as in 'exp'. If -'ref' is NULL, the climatological forecast is used as reference forecast. +least time dimension, or 0 (typical climatological forecast) or 1 +(normalized climatological forecast). If it is an array, the dimensions must +be the same as 'exp' except 'memb_dim' and 'dat_dim'. If there is only one +reference dataset, it should not have dataset dimension. If there is +corresponding reference for each experiment, the dataset dimension must +have the same length as in 'exp'. If 'ref' is NULL, the typical +climatological forecast is used as reference forecast (equivelant to 0.) The default value is NULL.} \item{time_dim}{A character string indicating the name of dimension along diff --git a/tests/testthat/test-RMSSS.R b/tests/testthat/test-RMSSS.R index fa5ca33..fa019e6 100644 --- a/tests/testthat/test-RMSSS.R +++ b/tests/testthat/test-RMSSS.R @@ -206,6 +206,13 @@ test_that("2. Output checks: case 1", { as.vector(res1_5$sign), rep(F, 10) ) + suppressWarnings({ + res1_6 <- RMSSS(exp1, obs1, ref = NULL, dat_dim = 'dataset', sig_method = "Random Walk", pval = F, sign = T) + }) + expect_equal( + as.vector(res1_6$sign), + rep(F, 10) + ) }) -- GitLab