From ca924e329d9f5f13c9985741ce052eb5f910e9ef Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Fri, 16 Apr 2021 14:02:47 +0200
Subject: [PATCH 1/2] Transform RatioRMS and create unit test

---
 NAMESPACE                      |   1 +
 R/RatioRMS.R                   | 188 +++++++++++++++++++++++++++++++++
 man/RatioRMS.Rd                |  85 +++++++++++++++
 tests/testthat/test-RatioRMS.R | 128 ++++++++++++++++++++++
 4 files changed, 402 insertions(+)
 create mode 100644 R/RatioRMS.R
 create mode 100644 man/RatioRMS.Rd
 create mode 100644 tests/testthat/test-RatioRMS.R

diff --git a/NAMESPACE b/NAMESPACE
index 87937e2..fd51fc8 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -42,6 +42,7 @@ export(PlotStereoMap)
 export(RMS)
 export(RMSSS)
 export(RandomWalkTest)
+export(RatioRMS)
 export(Regression)
 export(Reorder)
 export(SPOD)
diff --git a/R/RatioRMS.R b/R/RatioRMS.R
new file mode 100644
index 0000000..f93e486
--- /dev/null
+++ b/R/RatioRMS.R
@@ -0,0 +1,188 @@
+#'Compute the ratio between the RMSE of two experiments
+#'
+#'Calculate the ratio of the RMSE for two forecasts with the same observation,
+#'that is, RMSE(ens, obs) / RMSE(ens.ref, obs). The p-value is provided by a 
+#'two-sided Fischer test.
+#'
+#'@param exp1 A numeric array with named dimensions of the first experimental 
+#'  data. It must have at least 'time_dim' and have the same dimensions as
+#'  'exp2' and 'obs'.
+#'@param exp2 A numeric array with named dimensions of the second experimental 
+#'  data. It must have at least 'time_dim' and have the same dimensions as 
+#'  'exp1' and 'obs'.
+#'@param obs A numeric array with named dimensions of the observational data.
+#'  It must have at least 'time_dim' and have the same dimensions as 'exp1' and
+#'  'exp2'.
+#'@param time_dim A character string of the dimension name along which RMS is
+#'  computed. The default value is 'sdate'.
+#'@param pval A logical value indicating whether to compute the p-value of Ho:
+#'  RMSE1/RMSE2 = 1 or not. The default value is TRUE.
+#'@param ncores An integer indicating the number of cores to use for parallel 
+#'  computation. The default value is NULL.
+#'
+#'@return A list containing the numeric arrays with dimensions identical with
+#'  'exp1', 'exp2', and 'obs', expect 'time_dim':
+#'\item{$ratiorms}{
+#'  The ratio between the RMSE (i.e., RMSE1/RMSE2).
+#'}
+#'\item{$p.val}{
+#'  The p-value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1. Only 
+#'  exists if 'pval' is TRUE.
+#'}
+#'
+#'@examples
+#'\dontshow{
+#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101')
+#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'),
+#'                                     c('observation'), startDates,
+#'                                     output = 'lonlat',
+#'                                     latmin = 27, latmax = 48,
+#'                                     lonmin = -12, lonmax = 40)
+#'}
+#'# Compute DJF seasonal means and anomalies.
+#'initial_month <- 11
+#'mean_start_month <- 12
+#'mean_stop_month <- 2                                
+#'sampleData$mod <- Season(sampleData$mod, monini = initial_month, 
+#'                         moninf = mean_start_month, monsup = mean_stop_month)
+#'sampleData$obs <- Season(sampleData$obs, monini = initial_month, 
+#'                         moninf = mean_start_month, monsup = mean_stop_month)
+#'clim <- Clim(sampleData$mod, sampleData$obs)
+#'ano_exp <- Ano(sampleData$mod, clim$clim_exp)
+#'ano_obs <- Ano(sampleData$obs, clim$clim_obs)
+#'# Generate two experiments with 2 and 1 members from the only experiment 
+#'# available in the sample data. Take only data values for a single forecast
+#'# time step.
+#'ano_exp_1 <- Subset(ano_exp, 'member', c(1, 2))
+#'ano_exp_2 <- Subset(ano_exp, 'member', c(3))
+#'ano_exp_1 <- Subset(ano_exp_1, c('dataset', 'ftime'), list(1, 1), drop = 'selected')
+#'ano_exp_2 <- Subset(ano_exp_2, c('dataset', 'ftime'), list(1, 1), drop = 'selected')
+#'ano_obs <- Subset(ano_obs, c('dataset', 'ftime'), list(1, 1), drop = 'selected')
+#'# Compute ensemble mean and provide as inputs to RatioRMS.
+#'rrms <- RatioRMS(MeanDims(ano_exp_1, 'member'), 
+#'                 MeanDims(ano_exp_2, 'member'), 
+#'                 MeanDims(ano_obs, 'member'))
+#'# Plot the RatioRMS for the first forecast time step.
+#'\donttest{
+#'PlotEquiMap(rrms$ratiorms, sampleData$lon, sampleData$lat, 
+#'            toptitle = 'Ratio RMSE')
+#'}
+#'
+#'@import multiApply
+#'@export
+RatioRMS <- function(exp1, exp2, obs, time_dim = 'sdate', pval = TRUE, ncores = NULL) {
+
+  # Check inputs 
+  ## exp1, exp2, obs
+  if (is.null(exp1) | is.null(exp2) | is.null(obs)) {
+    stop("Parameter 'exp1', 'exp2', and 'obs' cannot be NULL.")
+  }
+  if (!is.numeric(exp1) | !is.numeric(exp2) | !is.numeric(obs)) {
+    stop("Parameter 'exp1', 'exp2', and 'obs' must be a numeric array.")
+  }
+  if (is.null(dim(exp1))) {  #is vector
+    dim(exp1) <- c(length(exp1))
+    names(dim(exp1)) <- time_dim
+  }
+  if (is.null(dim(exp2))) {  #is vector
+    dim(exp2) <- c(length(exp2))
+    names(dim(exp2)) <- time_dim
+  }
+  if (is.null(dim(obs))) {  #is vector
+    dim(obs) <- c(length(obs))
+    names(dim(obs)) <- time_dim
+  }
+  if(any(is.null(names(dim(exp1))))| any(nchar(names(dim(exp1))) == 0) |
+     any(is.null(names(dim(exp2))))| any(nchar(names(dim(exp2))) == 0) |
+     any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) {
+    stop("Parameter 'exp1', 'exp2', and 'obs' must have dimension names.")
+  }
+  if(!all(names(dim(exp1)) %in% names(dim(exp2))) | 
+     !all(names(dim(exp2)) %in% names(dim(obs))) | 
+     !all(names(dim(obs)) %in% names(dim(exp1)))) {
+    stop("Parameter 'exp1', 'exp2', and 'obs' must have same dimension names.")
+  }
+  name_1 <- sort(names(dim(exp1)))
+  name_2 <- sort(names(dim(exp2)))
+  name_3 <- sort(names(dim(obs)))
+  if (!all(dim(exp1)[name_1] == dim(exp2)[name_2]) |
+      !all(dim(exp1)[name_1] == dim(obs)[name_3])) {
+    stop(paste0("Parameter 'exp1', 'exp2', and 'obs' must have the same length of ",
+                "all the dimensions."))
+  }
+  ## time_dim
+  if (!is.character(time_dim) | length(time_dim) > 1) {
+    stop("Parameter 'time_dim' must be a character string.")
+  }
+  if (!time_dim %in% names(dim(exp1))) {
+    stop("Parameter 'time_dim' is not found in 'exp1', 'exp2', and 'obs' dimensions.")
+  }
+  ## pval
+  if (!is.logical(pval) | length(pval) > 1) {
+    stop("Parameter 'pval' must be one logical value.")
+  }
+  ## ncores
+  if (!is.null(ncores)) {
+    if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 |
+      length(ncores) > 1) {
+      stop("Parameter 'ncores' must be a positive integer.")
+    }
+  } 
+
+  ###############################
+  # Calculate RatioRMS
+  if (is.null(ncores)) {
+    use_Apply <- FALSE
+  } else if (ncores == 1) {
+    use_Apply <- FALSE
+  } else {
+    use_Apply <- TRUE
+  }
+
+  if (use_Apply) {
+    res <- Apply(list(exp1, exp2, obs), 
+                 target_dims = list(c(names(dim(exp1))), 
+                                    c(names(dim(exp1))),
+                                    c(names(dim(exp1)))),
+                 fun = .RatioRMS, 
+                 time_dim = time_dim, pval = pval,
+                 ncores = ncores)
+  } else {
+    res <- .RatioRMS(exp1, exp2, obs, time_dim = time_dim, pval = pval)
+  }
+
+  return(res)
+}
+
+.RatioRMS <- function(exp1, exp2, obs, time_dim = 'sdate', pval = TRUE) {
+
+  # exp1, exp2, obs: [all_dim]
+  dif1 <- exp1 - obs
+  dif2 <- exp2 - obs
+  rms1 <- MeanDims(dif1^2, time_dim, na.rm = TRUE)^0.5
+  rms2 <- MeanDims(dif2^2, time_dim, na.rm = TRUE)^0.5
+  rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max(abs(rms2), na.rm = TRUE) / 1000
+  ratiorms <- rms1 / rms2
+
+  if (pval) {
+    eno1 <- Eno(dif1, time_dim)
+    eno2 <- Eno(dif2, time_dim)
+    F <- (eno1 * (rms1) ** 2 / (eno1 - 1)) / (eno2 * (rms2) ** 2 / (eno2 - 1))
+    F[which(F < 1)] <- 1 / F[which(F < 1)]
+    
+    if (is.null(dim(ratiorms))) {
+      p.val <- c()
+    } else {
+      p.val <- array(dim = dim(ratiorms))
+    }
+    avail_ind <- which(!is.na(eno1) & !is.na(eno2) & eno1 > 2 & eno2 > 2)
+    p.val[avail_ind] <- (1 - pf(F,eno1[avail_ind] - 1, eno2[avail_ind] - 1)) * 2
+    ratiorms[-avail_ind] <- NA
+  }
+
+  if (pval) {
+    return(invisible(list(ratiorms = ratiorms, p.val = p.val)))
+  } else {
+    return(invisible(list(ratiorms = ratiorms)))
+  }
+}
diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd
new file mode 100644
index 0000000..bf68e8d
--- /dev/null
+++ b/man/RatioRMS.Rd
@@ -0,0 +1,85 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RatioRMS.R
+\name{RatioRMS}
+\alias{RatioRMS}
+\title{Compute the ratio between the RMSE of two experiments}
+\usage{
+RatioRMS(exp1, exp2, obs, time_dim = "sdate", pval = TRUE, ncores = NULL)
+}
+\arguments{
+\item{exp1}{A numeric array with named dimensions of the first experimental 
+data. It must have at least 'time_dim' and have the same dimensions as
+'exp2' and 'obs'.}
+
+\item{exp2}{A numeric array with named dimensions of the second experimental 
+data. It must have at least 'time_dim' and have the same dimensions as 
+'exp1' and 'obs'.}
+
+\item{obs}{A numeric array with named dimensions of the observational data.
+It must have at least 'time_dim' and have the same dimensions as 'exp1' and
+'exp2'.}
+
+\item{time_dim}{A character string of the dimension name along which RMS is
+computed. The default value is 'sdate'.}
+
+\item{pval}{A logical value indicating whether to compute the p-value of Ho:
+RMSE1/RMSE2 = 1 or not. The default value is TRUE.}
+
+\item{ncores}{An integer indicating the number of cores to use for parallel 
+computation. The default value is NULL.}
+}
+\value{
+A list containing the numeric arrays with dimensions identical with
+ 'exp1', 'exp2', and 'obs', expect 'time_dim':
+\item{$ratiorms}{
+ The ratio between the RMSE (i.e., RMSE1/RMSE2).
+}
+\item{$p.val}{
+ The p-value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1. Only 
+ exists if 'pval' is TRUE.
+}
+}
+\description{
+Calculate the ratio of the RMSE for two forecasts with the same observation,
+that is, RMSE(ens, obs) / RMSE(ens.ref, obs). The p-value is provided by a 
+two-sided Fischer test.
+}
+\examples{
+\dontshow{
+startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101')
+sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'),
+                                    c('observation'), startDates,
+                                    output = 'lonlat',
+                                    latmin = 27, latmax = 48,
+                                    lonmin = -12, lonmax = 40)
+}
+# Compute DJF seasonal means and anomalies.
+initial_month <- 11
+mean_start_month <- 12
+mean_stop_month <- 2                                
+sampleData$mod <- Season(sampleData$mod, monini = initial_month, 
+                        moninf = mean_start_month, monsup = mean_stop_month)
+sampleData$obs <- Season(sampleData$obs, monini = initial_month, 
+                        moninf = mean_start_month, monsup = mean_stop_month)
+clim <- Clim(sampleData$mod, sampleData$obs)
+ano_exp <- Ano(sampleData$mod, clim$clim_exp)
+ano_obs <- Ano(sampleData$obs, clim$clim_obs)
+# Generate two experiments with 2 and 1 members from the only experiment 
+# available in the sample data. Take only data values for a single forecast
+# time step.
+ano_exp_1 <- Subset(ano_exp, 'member', c(1, 2))
+ano_exp_2 <- Subset(ano_exp, 'member', c(3))
+ano_exp_1 <- Subset(ano_exp_1, c('dataset', 'ftime'), list(1, 1), drop = 'selected')
+ano_exp_2 <- Subset(ano_exp_2, c('dataset', 'ftime'), list(1, 1), drop = 'selected')
+ano_obs <- Subset(ano_obs, c('dataset', 'ftime'), list(1, 1), drop = 'selected')
+# Compute ensemble mean and provide as inputs to RatioRMS.
+rrms <- RatioRMS(MeanDims(ano_exp_1, 'member'), 
+                MeanDims(ano_exp_2, 'member'), 
+                MeanDims(ano_obs, 'member'))
+# Plot the RatioRMS for the first forecast time step.
+\donttest{
+PlotEquiMap(rrms$ratiorms, sampleData$lon, sampleData$lat, 
+           toptitle = 'Ratio RMSE')
+}
+
+}
diff --git a/tests/testthat/test-RatioRMS.R b/tests/testthat/test-RatioRMS.R
new file mode 100644
index 0000000..b70d6fb
--- /dev/null
+++ b/tests/testthat/test-RatioRMS.R
@@ -0,0 +1,128 @@
+context("s2dv::RatioRMS tests")
+
+##############################################
+  # dat1
+  set.seed(1)
+  exp1_1 <- array(rnorm(120), dim = c(dataset = 1, sdate = 5, ftime = 3))
+  set.seed(2)
+  exp1_2 <- array(rnorm(120), dim = c(dataset = 1, sdate = 5, ftime = 3))
+  set.seed(3)
+  obs1 <- array(rnorm(80),  dim = c(dataset = 1, sdate = 5, ftime = 3))
+
+  # dat 2: vector
+  set.seed(4)
+  exp2_1 <- rnorm(10)
+  set.seed(5)
+  exp2_2 <- rnorm(10)
+  set.seed(6)
+  obs2 <- rnorm(10)
+
+
+##############################################
+test_that("1. Input checks", {
+
+  # exp1, exp2, obs
+  expect_error(
+  RatioRMS(c(), exp1_2, c()),
+  "Parameter 'exp1', 'exp2', and 'obs' cannot be NULL."
+  )
+  expect_error(
+  RatioRMS(c('b'), c('a'), obs1),
+  "Parameter 'exp1', 'exp2', and 'obs' must be a numeric array."
+  )
+  expect_error(
+  RatioRMS(exp1_1, array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))),
+  "Parameter 'exp1', 'exp2', and 'obs' must have dimension names."
+  )
+  expect_error(
+  RatioRMS(exp1_1, exp1_2, array(1:15, dim = c(data = 1, ftime = 3, sdates = 5))),
+  "Parameter 'exp1', 'exp2', and 'obs' must have same dimension names."
+  )
+  expect_error(
+  RatioRMS(exp1_1, exp1_2, array(1:12, dim = c(dataset = 1, ftime = 3, sdate = 4))),
+  "Parameter 'exp1', 'exp2', and 'obs' must have the same length of all the dimensions."
+  )
+  # time_dim
+  expect_error(
+  RatioRMS(exp1_1, exp1_2, obs1, time_dim = c('sdate', 'ftime')),
+  "Parameter 'time_dim' must be a character string."
+  )
+  expect_error(
+  RatioRMS(exp1_1, exp1_2, obs1, time_dim = 'a'),
+  "Parameter 'time_dim' is not found in 'exp1', 'exp2', and 'obs' dimensions."
+  )
+  # pval
+  expect_error(
+  RatioRMS(exp1_1, exp1_2, obs1, pval = 1),
+  "Parameter 'pval' must be one logical value."
+  )
+  expect_error(
+  RatioRMS(exp1_1, exp1_2, obs1, ncores = 1.5),
+  "Parameter 'ncores' must be a positive integer."
+  )
+
+
+})
+
+##############################################
+test_that("2. Output checks: dat1", {
+
+expect_equal(
+names(RatioRMS(exp1_1, exp1_2, obs1)),
+c('ratiorms', 'p.val')
+)
+expect_equal(
+dim(RatioRMS(exp1_1, exp1_2, obs1)$ratiorms),
+c(dataset = 1, ftime = 3)
+)
+expect_equal(
+dim(RatioRMS(exp1_1, exp1_2, obs1)$p.val),
+c(dataset = 1, ftime = 3)
+)
+expect_equal(
+as.vector(RatioRMS(exp1_1, exp1_2, obs1)$p.val),
+c(0.1811868, 0.4758232, 0.7473213),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(RatioRMS(exp1_1, exp1_2, obs1)$ratiorms),
+c(2.0944471, 0.6814573, 1.1873955),
+tolerance = 0.0001
+)
+expect_equal(
+names(RatioRMS(exp1_1, exp1_2, obs1, pval = FALSE)),
+c('ratiorms')
+)
+expect_equal(
+as.vector(RatioRMS(exp1_1, exp1_2, obs1, pval = FALSE, time_dim = 'ftime')$ratiorms),
+c(2.0832571, 0.7292987, 0.6031437, 1.1885930, 0.8542696),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(RatioRMS(exp1_1, exp1_2, obs1, time_dim = 'ftime')$p.val),
+c(0.3745346, 0.6944118, 0.5334904, 0.8289285, 0.8437813),
+tolerance = 0.0001
+)
+
+})
+
+##############################################
+test_that("3. Output checks: dat2", {
+
+expect_equal(
+names(RatioRMS(exp2_1, exp2_2, obs2)),
+c('ratiorms', 'p.val')
+)
+expect_equal(
+RatioRMS(exp2_1, exp2_2, obs2)$p.val,
+0.7418331,
+tolerance = 0.0001
+)
+expect_equal(
+RatioRMS(exp2_1, exp2_2, obs2)$ratiorms,
+0.8931399,
+tolerance = 0.0001
+)
+
+})
+
-- 
GitLab


From 448bbb0b9a6b45eefdf20193feddc25e49839e11 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Fri, 16 Apr 2021 14:14:27 +0200
Subject: [PATCH 2/2] Fix example

---
 R/RatioRMS.R    | 10 +++++-----
 man/RatioRMS.Rd | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/R/RatioRMS.R b/R/RatioRMS.R
index f93e486..f7e34b4 100644
--- a/R/RatioRMS.R
+++ b/R/RatioRMS.R
@@ -53,11 +53,11 @@
 #'# Generate two experiments with 2 and 1 members from the only experiment 
 #'# available in the sample data. Take only data values for a single forecast
 #'# time step.
-#'ano_exp_1 <- Subset(ano_exp, 'member', c(1, 2))
-#'ano_exp_2 <- Subset(ano_exp, 'member', c(3))
-#'ano_exp_1 <- Subset(ano_exp_1, c('dataset', 'ftime'), list(1, 1), drop = 'selected')
-#'ano_exp_2 <- Subset(ano_exp_2, c('dataset', 'ftime'), list(1, 1), drop = 'selected')
-#'ano_obs <- Subset(ano_obs, c('dataset', 'ftime'), list(1, 1), drop = 'selected')
+#'ano_exp_1 <- ClimProjDiags::Subset(ano_exp, 'member', c(1, 2))
+#'ano_exp_2 <- ClimProjDiags::Subset(ano_exp, 'member', c(3))
+#'ano_exp_1 <- ClimProjDiags::Subset(ano_exp_1, c('dataset', 'ftime'), list(1, 1), drop = 'selected')
+#'ano_exp_2 <- ClimProjDiags::Subset(ano_exp_2, c('dataset', 'ftime'), list(1, 1), drop = 'selected')
+#'ano_obs <- ClimProjDiags::Subset(ano_obs, c('dataset', 'ftime'), list(1, 1), drop = 'selected')
 #'# Compute ensemble mean and provide as inputs to RatioRMS.
 #'rrms <- RatioRMS(MeanDims(ano_exp_1, 'member'), 
 #'                 MeanDims(ano_exp_2, 'member'), 
diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd
index bf68e8d..194c6b9 100644
--- a/man/RatioRMS.Rd
+++ b/man/RatioRMS.Rd
@@ -67,11 +67,11 @@ ano_obs <- Ano(sampleData$obs, clim$clim_obs)
 # Generate two experiments with 2 and 1 members from the only experiment 
 # available in the sample data. Take only data values for a single forecast
 # time step.
-ano_exp_1 <- Subset(ano_exp, 'member', c(1, 2))
-ano_exp_2 <- Subset(ano_exp, 'member', c(3))
-ano_exp_1 <- Subset(ano_exp_1, c('dataset', 'ftime'), list(1, 1), drop = 'selected')
-ano_exp_2 <- Subset(ano_exp_2, c('dataset', 'ftime'), list(1, 1), drop = 'selected')
-ano_obs <- Subset(ano_obs, c('dataset', 'ftime'), list(1, 1), drop = 'selected')
+ano_exp_1 <- ClimProjDiags::Subset(ano_exp, 'member', c(1, 2))
+ano_exp_2 <- ClimProjDiags::Subset(ano_exp, 'member', c(3))
+ano_exp_1 <- ClimProjDiags::Subset(ano_exp_1, c('dataset', 'ftime'), list(1, 1), drop = 'selected')
+ano_exp_2 <- ClimProjDiags::Subset(ano_exp_2, c('dataset', 'ftime'), list(1, 1), drop = 'selected')
+ano_obs <- ClimProjDiags::Subset(ano_obs, c('dataset', 'ftime'), list(1, 1), drop = 'selected')
 # Compute ensemble mean and provide as inputs to RatioRMS.
 rrms <- RatioRMS(MeanDims(ano_exp_1, 'member'), 
                 MeanDims(ano_exp_2, 'member'), 
-- 
GitLab