From ca924e329d9f5f13c9985741ce052eb5f910e9ef Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 16 Apr 2021 14:02:47 +0200 Subject: [PATCH 1/2] Transform RatioRMS and create unit test --- NAMESPACE | 1 + R/RatioRMS.R | 188 +++++++++++++++++++++++++++++++++ man/RatioRMS.Rd | 85 +++++++++++++++ tests/testthat/test-RatioRMS.R | 128 ++++++++++++++++++++++ 4 files changed, 402 insertions(+) create mode 100644 R/RatioRMS.R create mode 100644 man/RatioRMS.Rd create mode 100644 tests/testthat/test-RatioRMS.R diff --git a/NAMESPACE b/NAMESPACE index 87937e2..fd51fc8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -42,6 +42,7 @@ export(PlotStereoMap) export(RMS) export(RMSSS) export(RandomWalkTest) +export(RatioRMS) export(Regression) export(Reorder) export(SPOD) diff --git a/R/RatioRMS.R b/R/RatioRMS.R new file mode 100644 index 0000000..f93e486 --- /dev/null +++ b/R/RatioRMS.R @@ -0,0 +1,188 @@ +#'Compute the ratio between the RMSE of two experiments +#' +#'Calculate the ratio of the RMSE for two forecasts with the same observation, +#'that is, RMSE(ens, obs) / RMSE(ens.ref, obs). The p-value is provided by a +#'two-sided Fischer test. +#' +#'@param exp1 A numeric array with named dimensions of the first experimental +#' data. It must have at least 'time_dim' and have the same dimensions as +#' 'exp2' and 'obs'. +#'@param exp2 A numeric array with named dimensions of the second experimental +#' data. It must have at least 'time_dim' and have the same dimensions as +#' 'exp1' and 'obs'. +#'@param obs A numeric array with named dimensions of the observational data. +#' It must have at least 'time_dim' and have the same dimensions as 'exp1' and +#' 'exp2'. +#'@param time_dim A character string of the dimension name along which RMS is +#' computed. The default value is 'sdate'. +#'@param pval A logical value indicating whether to compute the p-value of Ho: +#' RMSE1/RMSE2 = 1 or not. The default value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list containing the numeric arrays with dimensions identical with +#' 'exp1', 'exp2', and 'obs', expect 'time_dim': +#'\item{$ratiorms}{ +#' The ratio between the RMSE (i.e., RMSE1/RMSE2). +#'} +#'\item{$p.val}{ +#' The p-value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1. Only +#' exists if 'pval' is TRUE. +#'} +#' +#'@examples +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'# Compute DJF seasonal means and anomalies. +#'initial_month <- 11 +#'mean_start_month <- 12 +#'mean_stop_month <- 2 +#'sampleData$mod <- Season(sampleData$mod, monini = initial_month, +#' moninf = mean_start_month, monsup = mean_stop_month) +#'sampleData$obs <- Season(sampleData$obs, monini = initial_month, +#' moninf = mean_start_month, monsup = mean_stop_month) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'# Generate two experiments with 2 and 1 members from the only experiment +#'# available in the sample data. Take only data values for a single forecast +#'# time step. +#'ano_exp_1 <- Subset(ano_exp, 'member', c(1, 2)) +#'ano_exp_2 <- Subset(ano_exp, 'member', c(3)) +#'ano_exp_1 <- Subset(ano_exp_1, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +#'ano_exp_2 <- Subset(ano_exp_2, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +#'ano_obs <- Subset(ano_obs, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +#'# Compute ensemble mean and provide as inputs to RatioRMS. +#'rrms <- RatioRMS(MeanDims(ano_exp_1, 'member'), +#' MeanDims(ano_exp_2, 'member'), +#' MeanDims(ano_obs, 'member')) +#'# Plot the RatioRMS for the first forecast time step. +#'\donttest{ +#'PlotEquiMap(rrms$ratiorms, sampleData$lon, sampleData$lat, +#' toptitle = 'Ratio RMSE') +#'} +#' +#'@import multiApply +#'@export +RatioRMS <- function(exp1, exp2, obs, time_dim = 'sdate', pval = TRUE, ncores = NULL) { + + # Check inputs + ## exp1, exp2, obs + if (is.null(exp1) | is.null(exp2) | is.null(obs)) { + stop("Parameter 'exp1', 'exp2', and 'obs' cannot be NULL.") + } + if (!is.numeric(exp1) | !is.numeric(exp2) | !is.numeric(obs)) { + stop("Parameter 'exp1', 'exp2', and 'obs' must be a numeric array.") + } + if (is.null(dim(exp1))) { #is vector + dim(exp1) <- c(length(exp1)) + names(dim(exp1)) <- time_dim + } + if (is.null(dim(exp2))) { #is vector + dim(exp2) <- c(length(exp2)) + names(dim(exp2)) <- time_dim + } + if (is.null(dim(obs))) { #is vector + dim(obs) <- c(length(obs)) + names(dim(obs)) <- time_dim + } + if(any(is.null(names(dim(exp1))))| any(nchar(names(dim(exp1))) == 0) | + any(is.null(names(dim(exp2))))| any(nchar(names(dim(exp2))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp1', 'exp2', and 'obs' must have dimension names.") + } + if(!all(names(dim(exp1)) %in% names(dim(exp2))) | + !all(names(dim(exp2)) %in% names(dim(obs))) | + !all(names(dim(obs)) %in% names(dim(exp1)))) { + stop("Parameter 'exp1', 'exp2', and 'obs' must have same dimension names.") + } + name_1 <- sort(names(dim(exp1))) + name_2 <- sort(names(dim(exp2))) + name_3 <- sort(names(dim(obs))) + if (!all(dim(exp1)[name_1] == dim(exp2)[name_2]) | + !all(dim(exp1)[name_1] == dim(obs)[name_3])) { + stop(paste0("Parameter 'exp1', 'exp2', and 'obs' must have the same length of ", + "all the dimensions.")) + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp1))) { + stop("Parameter 'time_dim' is not found in 'exp1', 'exp2', and 'obs' dimensions.") + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate RatioRMS + if (is.null(ncores)) { + use_Apply <- FALSE + } else if (ncores == 1) { + use_Apply <- FALSE + } else { + use_Apply <- TRUE + } + + if (use_Apply) { + res <- Apply(list(exp1, exp2, obs), + target_dims = list(c(names(dim(exp1))), + c(names(dim(exp1))), + c(names(dim(exp1)))), + fun = .RatioRMS, + time_dim = time_dim, pval = pval, + ncores = ncores) + } else { + res <- .RatioRMS(exp1, exp2, obs, time_dim = time_dim, pval = pval) + } + + return(res) +} + +.RatioRMS <- function(exp1, exp2, obs, time_dim = 'sdate', pval = TRUE) { + + # exp1, exp2, obs: [all_dim] + dif1 <- exp1 - obs + dif2 <- exp2 - obs + rms1 <- MeanDims(dif1^2, time_dim, na.rm = TRUE)^0.5 + rms2 <- MeanDims(dif2^2, time_dim, na.rm = TRUE)^0.5 + rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max(abs(rms2), na.rm = TRUE) / 1000 + ratiorms <- rms1 / rms2 + + if (pval) { + eno1 <- Eno(dif1, time_dim) + eno2 <- Eno(dif2, time_dim) + F <- (eno1 * (rms1) ** 2 / (eno1 - 1)) / (eno2 * (rms2) ** 2 / (eno2 - 1)) + F[which(F < 1)] <- 1 / F[which(F < 1)] + + if (is.null(dim(ratiorms))) { + p.val <- c() + } else { + p.val <- array(dim = dim(ratiorms)) + } + avail_ind <- which(!is.na(eno1) & !is.na(eno2) & eno1 > 2 & eno2 > 2) + p.val[avail_ind] <- (1 - pf(F,eno1[avail_ind] - 1, eno2[avail_ind] - 1)) * 2 + ratiorms[-avail_ind] <- NA + } + + if (pval) { + return(invisible(list(ratiorms = ratiorms, p.val = p.val))) + } else { + return(invisible(list(ratiorms = ratiorms))) + } +} diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd new file mode 100644 index 0000000..bf68e8d --- /dev/null +++ b/man/RatioRMS.Rd @@ -0,0 +1,85 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RatioRMS.R +\name{RatioRMS} +\alias{RatioRMS} +\title{Compute the ratio between the RMSE of two experiments} +\usage{ +RatioRMS(exp1, exp2, obs, time_dim = "sdate", pval = TRUE, ncores = NULL) +} +\arguments{ +\item{exp1}{A numeric array with named dimensions of the first experimental +data. It must have at least 'time_dim' and have the same dimensions as +'exp2' and 'obs'.} + +\item{exp2}{A numeric array with named dimensions of the second experimental +data. It must have at least 'time_dim' and have the same dimensions as +'exp1' and 'obs'.} + +\item{obs}{A numeric array with named dimensions of the observational data. +It must have at least 'time_dim' and have the same dimensions as 'exp1' and +'exp2'.} + +\item{time_dim}{A character string of the dimension name along which RMS is +computed. The default value is 'sdate'.} + +\item{pval}{A logical value indicating whether to compute the p-value of Ho: +RMSE1/RMSE2 = 1 or not. The default value is TRUE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list containing the numeric arrays with dimensions identical with + 'exp1', 'exp2', and 'obs', expect 'time_dim': +\item{$ratiorms}{ + The ratio between the RMSE (i.e., RMSE1/RMSE2). +} +\item{$p.val}{ + The p-value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1. Only + exists if 'pval' is TRUE. +} +} +\description{ +Calculate the ratio of the RMSE for two forecasts with the same observation, +that is, RMSE(ens, obs) / RMSE(ens.ref, obs). The p-value is provided by a +two-sided Fischer test. +} +\examples{ +\dontshow{ +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), + c('observation'), startDates, + output = 'lonlat', + latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) +} +# Compute DJF seasonal means and anomalies. +initial_month <- 11 +mean_start_month <- 12 +mean_stop_month <- 2 +sampleData$mod <- Season(sampleData$mod, monini = initial_month, + moninf = mean_start_month, monsup = mean_stop_month) +sampleData$obs <- Season(sampleData$obs, monini = initial_month, + moninf = mean_start_month, monsup = mean_stop_month) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +# Generate two experiments with 2 and 1 members from the only experiment +# available in the sample data. Take only data values for a single forecast +# time step. +ano_exp_1 <- Subset(ano_exp, 'member', c(1, 2)) +ano_exp_2 <- Subset(ano_exp, 'member', c(3)) +ano_exp_1 <- Subset(ano_exp_1, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +ano_exp_2 <- Subset(ano_exp_2, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +ano_obs <- Subset(ano_obs, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +# Compute ensemble mean and provide as inputs to RatioRMS. +rrms <- RatioRMS(MeanDims(ano_exp_1, 'member'), + MeanDims(ano_exp_2, 'member'), + MeanDims(ano_obs, 'member')) +# Plot the RatioRMS for the first forecast time step. +\donttest{ +PlotEquiMap(rrms$ratiorms, sampleData$lon, sampleData$lat, + toptitle = 'Ratio RMSE') +} + +} diff --git a/tests/testthat/test-RatioRMS.R b/tests/testthat/test-RatioRMS.R new file mode 100644 index 0000000..b70d6fb --- /dev/null +++ b/tests/testthat/test-RatioRMS.R @@ -0,0 +1,128 @@ +context("s2dv::RatioRMS tests") + +############################################## + # dat1 + set.seed(1) + exp1_1 <- array(rnorm(120), dim = c(dataset = 1, sdate = 5, ftime = 3)) + set.seed(2) + exp1_2 <- array(rnorm(120), dim = c(dataset = 1, sdate = 5, ftime = 3)) + set.seed(3) + obs1 <- array(rnorm(80), dim = c(dataset = 1, sdate = 5, ftime = 3)) + + # dat 2: vector + set.seed(4) + exp2_1 <- rnorm(10) + set.seed(5) + exp2_2 <- rnorm(10) + set.seed(6) + obs2 <- rnorm(10) + + +############################################## +test_that("1. Input checks", { + + # exp1, exp2, obs + expect_error( + RatioRMS(c(), exp1_2, c()), + "Parameter 'exp1', 'exp2', and 'obs' cannot be NULL." + ) + expect_error( + RatioRMS(c('b'), c('a'), obs1), + "Parameter 'exp1', 'exp2', and 'obs' must be a numeric array." + ) + expect_error( + RatioRMS(exp1_1, array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), + "Parameter 'exp1', 'exp2', and 'obs' must have dimension names." + ) + expect_error( + RatioRMS(exp1_1, exp1_2, array(1:15, dim = c(data = 1, ftime = 3, sdates = 5))), + "Parameter 'exp1', 'exp2', and 'obs' must have same dimension names." + ) + expect_error( + RatioRMS(exp1_1, exp1_2, array(1:12, dim = c(dataset = 1, ftime = 3, sdate = 4))), + "Parameter 'exp1', 'exp2', and 'obs' must have the same length of all the dimensions." + ) + # time_dim + expect_error( + RatioRMS(exp1_1, exp1_2, obs1, time_dim = c('sdate', 'ftime')), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + RatioRMS(exp1_1, exp1_2, obs1, time_dim = 'a'), + "Parameter 'time_dim' is not found in 'exp1', 'exp2', and 'obs' dimensions." + ) + # pval + expect_error( + RatioRMS(exp1_1, exp1_2, obs1, pval = 1), + "Parameter 'pval' must be one logical value." + ) + expect_error( + RatioRMS(exp1_1, exp1_2, obs1, ncores = 1.5), + "Parameter 'ncores' must be a positive integer." + ) + + +}) + +############################################## +test_that("2. Output checks: dat1", { + +expect_equal( +names(RatioRMS(exp1_1, exp1_2, obs1)), +c('ratiorms', 'p.val') +) +expect_equal( +dim(RatioRMS(exp1_1, exp1_2, obs1)$ratiorms), +c(dataset = 1, ftime = 3) +) +expect_equal( +dim(RatioRMS(exp1_1, exp1_2, obs1)$p.val), +c(dataset = 1, ftime = 3) +) +expect_equal( +as.vector(RatioRMS(exp1_1, exp1_2, obs1)$p.val), +c(0.1811868, 0.4758232, 0.7473213), +tolerance = 0.0001 +) +expect_equal( +as.vector(RatioRMS(exp1_1, exp1_2, obs1)$ratiorms), +c(2.0944471, 0.6814573, 1.1873955), +tolerance = 0.0001 +) +expect_equal( +names(RatioRMS(exp1_1, exp1_2, obs1, pval = FALSE)), +c('ratiorms') +) +expect_equal( +as.vector(RatioRMS(exp1_1, exp1_2, obs1, pval = FALSE, time_dim = 'ftime')$ratiorms), +c(2.0832571, 0.7292987, 0.6031437, 1.1885930, 0.8542696), +tolerance = 0.0001 +) +expect_equal( +as.vector(RatioRMS(exp1_1, exp1_2, obs1, time_dim = 'ftime')$p.val), +c(0.3745346, 0.6944118, 0.5334904, 0.8289285, 0.8437813), +tolerance = 0.0001 +) + +}) + +############################################## +test_that("3. Output checks: dat2", { + +expect_equal( +names(RatioRMS(exp2_1, exp2_2, obs2)), +c('ratiorms', 'p.val') +) +expect_equal( +RatioRMS(exp2_1, exp2_2, obs2)$p.val, +0.7418331, +tolerance = 0.0001 +) +expect_equal( +RatioRMS(exp2_1, exp2_2, obs2)$ratiorms, +0.8931399, +tolerance = 0.0001 +) + +}) + -- GitLab From 448bbb0b9a6b45eefdf20193feddc25e49839e11 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 16 Apr 2021 14:14:27 +0200 Subject: [PATCH 2/2] Fix example --- R/RatioRMS.R | 10 +++++----- man/RatioRMS.Rd | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/R/RatioRMS.R b/R/RatioRMS.R index f93e486..f7e34b4 100644 --- a/R/RatioRMS.R +++ b/R/RatioRMS.R @@ -53,11 +53,11 @@ #'# Generate two experiments with 2 and 1 members from the only experiment #'# available in the sample data. Take only data values for a single forecast #'# time step. -#'ano_exp_1 <- Subset(ano_exp, 'member', c(1, 2)) -#'ano_exp_2 <- Subset(ano_exp, 'member', c(3)) -#'ano_exp_1 <- Subset(ano_exp_1, c('dataset', 'ftime'), list(1, 1), drop = 'selected') -#'ano_exp_2 <- Subset(ano_exp_2, c('dataset', 'ftime'), list(1, 1), drop = 'selected') -#'ano_obs <- Subset(ano_obs, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +#'ano_exp_1 <- ClimProjDiags::Subset(ano_exp, 'member', c(1, 2)) +#'ano_exp_2 <- ClimProjDiags::Subset(ano_exp, 'member', c(3)) +#'ano_exp_1 <- ClimProjDiags::Subset(ano_exp_1, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +#'ano_exp_2 <- ClimProjDiags::Subset(ano_exp_2, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +#'ano_obs <- ClimProjDiags::Subset(ano_obs, c('dataset', 'ftime'), list(1, 1), drop = 'selected') #'# Compute ensemble mean and provide as inputs to RatioRMS. #'rrms <- RatioRMS(MeanDims(ano_exp_1, 'member'), #' MeanDims(ano_exp_2, 'member'), diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index bf68e8d..194c6b9 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -67,11 +67,11 @@ ano_obs <- Ano(sampleData$obs, clim$clim_obs) # Generate two experiments with 2 and 1 members from the only experiment # available in the sample data. Take only data values for a single forecast # time step. -ano_exp_1 <- Subset(ano_exp, 'member', c(1, 2)) -ano_exp_2 <- Subset(ano_exp, 'member', c(3)) -ano_exp_1 <- Subset(ano_exp_1, c('dataset', 'ftime'), list(1, 1), drop = 'selected') -ano_exp_2 <- Subset(ano_exp_2, c('dataset', 'ftime'), list(1, 1), drop = 'selected') -ano_obs <- Subset(ano_obs, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +ano_exp_1 <- ClimProjDiags::Subset(ano_exp, 'member', c(1, 2)) +ano_exp_2 <- ClimProjDiags::Subset(ano_exp, 'member', c(3)) +ano_exp_1 <- ClimProjDiags::Subset(ano_exp_1, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +ano_exp_2 <- ClimProjDiags::Subset(ano_exp_2, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +ano_obs <- ClimProjDiags::Subset(ano_obs, c('dataset', 'ftime'), list(1, 1), drop = 'selected') # Compute ensemble mean and provide as inputs to RatioRMS. rrms <- RatioRMS(MeanDims(ano_exp_1, 'member'), MeanDims(ano_exp_2, 'member'), -- GitLab