Commit 9edfc681 authored by aho's avatar aho
Browse files

Merge branch 'develop-Corr_member' into 'master'

Add parameter 'memb_dim' and 'memb' in Corr(). Create unit tests and modify the examples.

See merge request !39
parents 6a1c1d59 8d40622f
Pipeline #5072 passed with stage
in 2 minutes and 55 seconds
# s2dv 1.0.0 (Release date: 2021-)
- Add parameter 'memb_dim' and 'memb' in Corr(). They allow the existence of the member dimension
which can have different length between exp and obs, and users can choose to do the ensemble mean
first before correlation or calculate the correlation for individual member.
# s2dv 0.1.1 (Release date: 2020-11-16)
- Change the lincense to Apache License 2.0.
......
......@@ -19,7 +19,7 @@
#'@param exp A named numeric array of experimental data, with at least two
#' dimensions 'time_dim' and 'dat_dim'.
#'@param obs A named numeric array of observational data, same dimensions as
#' parameter 'exp' except along dat_dim.
#' parameter 'exp' except along 'dat_dim' and 'memb_dim'.
#'@param time_dim A character string indicating the name of dimension along
#' which the correlations are computed. The default value is 'sdate'.
#'@param dat_dim A character string indicating the name of dataset (nobs/nexp)
......@@ -31,6 +31,12 @@
#' be completed. The default is c(1, length(comp_dim dimension)).
#'@param method A character string indicating the type of correlation:
#' 'pearson', 'spearman', or 'kendall'. The default value is 'pearson'.
#'@param memb_dim A character string indicating the name of the member
#' dimension. It must be one dimension in 'exp' and 'obs'. If there is no
#' member dimension, set NULL. The default value is NULL.
#'@param memb A logical value indicating whether to remain 'memb_dim' dimension
#' (TRUE) or do ensemble mean over 'memb_dim' (FALSE). Only functional when
#' 'memb_dim' is not NULL. The default value is TRUE.
#'@param pval A logical value indicating whether to compute or not the p-value
#' of the test Ho: Corr = 0. The default value is TRUE.
#'@param conf A logical value indicating whether to retrieve the confidence
......@@ -42,9 +48,12 @@
#'
#'@return
#'A list containing the numeric arrays with dimension:\cr
#' c(nexp, nobs, all other dimensions of exp except time_dim).\cr
#'nexp is the number of experiment (i.e., dat_dim in exp), and nobs is the
#'number of observation (i.e., dat_dim in obs).\cr
#' c(nexp, nobs, exp_memb, obs_memb, all other dimensions of exp except
#' time_dim and memb_dim).\cr
#'nexp is the number of experiment (i.e., 'dat_dim' in exp), and nobs is the
#'number of observation (i.e., 'dat_dim' in obs). exp_memb is the number of
#'member in experiment (i.e., 'memb_dim' in exp) and obs_memb is the number of
#'member in observation (i.e., 'memb_dim' in obs).\cr\cr
#'\item{$corr}{
#' The correlation coefficient.
#'}
......@@ -59,20 +68,37 @@
#'}
#'
#'@examples
#'# Load sample data as in Load() example:
#'# Case 1: Load sample data as in Load() example:
#'example(Load)
#'clim <- Clim(sampleData$mod, sampleData$obs)
#'corr <- Corr(clim$clim_exp, clim$clim_obs, time_dim = 'ftime', dat_dim = 'member')
#'# Renew the example when Ano and Smoothing is ready
#'ano_exp <- Ano(sampleData$mod, clim$clim_exp)
#'ano_obs <- Ano(sampleData$obs, clim$clim_obs)
#'runmean_months <- 12
#'
#'# Smooth along lead-times
#'smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = runmean_months)
#'smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = runmean_months)
#'required_complete_row <- 3 # Discard start dates which contain any NA lead-times
#'leadtimes_per_startdate <- 60
#'corr <- Corr(MeanDims(smooth_ano_exp, 'member'),
#' MeanDims(smooth_ano_obs, 'member'),
#' comp_dim = 'ftime',
#' limits = c(ceiling((runmean_months + 1) / 2),
#' leadtimes_per_startdate - floor(runmean_months / 2)))
#'
#'# Case 2: Keep member dimension
#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member')
#'# ensemble mean
#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', memb = FALSE)
#'
#'@rdname Corr
#'@import multiApply
#'@importFrom ClimProjDiags Subset
#'@importFrom stats cor pt qnorm
#'@export
Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset',
comp_dim = NULL, limits = NULL,
method = 'pearson', pval = TRUE, conf = TRUE,
comp_dim = NULL, limits = NULL, method = 'pearson',
memb_dim = NULL, memb = TRUE,
pval = TRUE, conf = TRUE,
conf.lev = 0.95, ncores = NULL) {
# Check inputs
......@@ -133,6 +159,19 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset',
if (!(method %in% c("kendall", "spearman", "pearson"))) {
stop("Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'.")
}
## memb_dim
if (!is.null(memb_dim)) {
if (!is.character(memb_dim) | length(memb_dim) > 1) {
stop("Parameter 'memb_dim' must be a character string.")
}
if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) {
stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.")
}
}
## memb
if (!is.logical(memb) | length(memb) > 1) {
stop("Parameter 'memb' must be one logical value.")
}
## pval
if (!is.logical(pval) | length(pval) > 1) {
stop("Parameter 'pval' must be one logical value.")
......@@ -157,9 +196,13 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset',
name_obs <- sort(names(dim(obs)))
name_exp <- name_exp[-which(name_exp == dat_dim)]
name_obs <- name_obs[-which(name_obs == dat_dim)]
if (!is.null(memb_dim)) {
name_exp <- name_exp[-which(name_exp == memb_dim)]
name_obs <- name_obs[-which(name_obs == memb_dim)]
}
if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) {
stop(paste0("Parameter 'exp' and 'obs' must have same length of ",
"all dimension expect 'dat_dim'."))
"all dimension expect 'dat_dim' and 'memb_dim'."))
}
if (dim(exp)[time_dim] < 3) {
stop("The length of time_dim must be at least 3 to compute correlation.")
......@@ -189,43 +232,156 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset',
obs[which(outrows)] <- NA
}
res <- Apply(list(exp, obs),
target_dims = list(c(time_dim, dat_dim),
c(time_dim, dat_dim)),
fun = .Corr,
time_dim = time_dim, method = method,
pval = pval, conf = conf, conf.lev = conf.lev, ncores_input = ncores,
ncores = ncores)
if (is.null(memb_dim)) {
# Define output_dims
if (conf & pval) {
output_dims <- list(corr = c('nexp', 'nobs'),
p.val = c('nexp', 'nobs'),
conf.lower = c('nexp', 'nobs'),
conf.upper = c('nexp', 'nobs'))
} else if (conf & !pval) {
output_dims <- list(corr = c('nexp', 'nobs'),
conf.lower = c('nexp', 'nobs'),
conf.upper = c('nexp', 'nobs'))
} else if (!conf & pval) {
output_dims <- list(corr = c('nexp', 'nobs'),
p.val = c('nexp', 'nobs'))
} else {
output_dims <- list(corr = c('nexp', 'nobs'))
}
res <- Apply(list(exp, obs),
target_dims = list(c(time_dim, dat_dim),
c(time_dim, dat_dim)),
output_dims = output_dims,
fun = .Corr,
time_dim = time_dim, method = method,
pval = pval, conf = conf, conf.lev = conf.lev,
ncores = ncores)
} else {
if (!memb) { #ensemble mean
name_exp <- names(dim(exp))
margin_dims_ind <- c(1:length(name_exp))[-which(name_exp == memb_dim)]
exp <- apply(exp, margin_dims_ind, mean, na.rm = TRUE) #NOTE: remove NAs here
obs <- apply(obs, margin_dims_ind, mean, na.rm = TRUE)
# Define output_dims
if (conf & pval) {
output_dims <- list(corr = c('nexp', 'nobs'),
p.val = c('nexp', 'nobs'),
conf.lower = c('nexp', 'nobs'),
conf.upper = c('nexp', 'nobs'))
} else if (conf & !pval) {
output_dims <- list(corr = c('nexp', 'nobs'),
conf.lower = c('nexp', 'nobs'),
conf.upper = c('nexp', 'nobs'))
} else if (!conf & pval) {
output_dims <- list(corr = c('nexp', 'nobs'),
p.val = c('nexp', 'nobs'))
} else {
output_dims <- list(corr = c('nexp', 'nobs'))
}
res <- Apply(list(exp, obs),
target_dims = list(c(time_dim, dat_dim),
c(time_dim, dat_dim)),
output_dims = output_dims,
fun = .Corr,
time_dim = time_dim, method = method,
pval = pval, conf = conf, conf.lev = conf.lev, ncores_input = ncores,
ncores = ncores)
} else { # correlation for each member
# Define output_dims
if (conf & pval) {
output_dims <- list(corr = c('nexp', 'nobs', 'exp_memb', 'obs_memb'),
p.val = c('nexp', 'nobs', 'exp_memb', 'obs_memb'),
conf.lower = c('nexp', 'nobs', 'exp_memb', 'obs_memb'),
conf.upper = c('nexp', 'nobs', 'exp_memb', 'obs_memb'))
} else if (conf & !pval) {
output_dims <- list(corr = c('nexp', 'nobs', 'exp_memb', 'obs_memb'),
conf.lower = c('nexp', 'nobs', 'exp_memb', 'obs_memb'),
conf.upper = c('nexp', 'nobs', 'exp_memb', 'obs_memb'))
} else if (!conf & pval) {
output_dims <- list(corr = c('nexp', 'nobs', 'exp_memb', 'obs_memb'),
p.val = c('nexp', 'nobs', 'exp_memb', 'obs_memb'))
} else {
output_dims <- list(corr = c('nexp', 'nobs', 'exp_memb', 'obs_memb'))
}
res <- Apply(list(exp, obs),
target_dims = list(c(time_dim, dat_dim, memb_dim),
c(time_dim, dat_dim, memb_dim)),
output_dims = output_dims,
fun = .Corr,
time_dim = time_dim, method = method,
pval = pval, conf = conf, conf.lev = conf.lev, ncores_input = ncores,
ncores = ncores)
}
}
return(res)
}
.Corr <- function(exp, obs, time_dim = 'sdate', method = 'pearson',
conf = TRUE, pval = TRUE, conf.lev = 0.95, ncores_input = NULL) {
if (length(dim(exp)) == 2) {
# exp: [sdate, dat_exp]
# obs: [sdate, dat_obs]
nexp <- as.numeric(dim(exp)[2])
nobs <- as.numeric(dim(obs)[2])
CORR <- array(dim = c(nexp = nexp, nobs = nobs))
eno_expand <- array(dim = c(nexp = nexp, nobs = nobs))
p.val <- array(dim = c(nexp = nexp, nobs = nobs))
# ens_mean
for (i in 1:nobs) {
CORR[, i] <- sapply(1:nexp,
function(x) {
if (any(!is.na(exp[, x])) && sum(!is.na(obs[, i])) > 2) {
cor(exp[, x], obs[, i],
use = "pairwise.complete.obs",
method = method)
} else {
CORR[, i] <- NA
}
})
# NOTE: Use sapply to replace the for loop
CORR <- sapply(1:nobs, function(i) {
sapply(1:nexp, function (x) {
if (any(!is.na(exp[, x])) && sum(!is.na(obs[, i])) > 2) { #NOTE: Is this necessary?
cor(exp[, x], obs[, i],
use = "pairwise.complete.obs",
method = method)
} else {
NA #CORR[, i] <- NA
}
})
})
if (is.null(dim(CORR))) {
CORR <- array(CORR, dim = c(1, 1))
}
} else { # member
# exp: [sdate, dat_exp, memb_exp]
# obs: [sdate, dat_obs, memb_obs]
nexp <- as.numeric(dim(exp)[2])
nobs <- as.numeric(dim(obs)[2])
exp_memb <- as.numeric(dim(exp)[3])
obs_memb <- as.numeric(dim(obs)[3])
CORR <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb))
for (j in 1:obs_memb) {
for (y in 1:exp_memb) {
CORR[, , y, j] <- sapply(1:nobs, function(i) {
sapply(1:nexp, function (x) {
if (any(!is.na(exp[, x, y])) && sum(!is.na(obs[, i, j])) > 2) {
cor(exp[, x, y], obs[, i, j],
use = "pairwise.complete.obs",
method = method)
} else {
NA #CORR[, i] <- NA
}
})
})
}
}
}
# if (pval) {
# for (i in 1:nobs) {
# p.val[, i] <- try(sapply(1:nexp,
......@@ -240,16 +396,29 @@ cor(exp[, x], obs[, i],
if (pval | conf) {
if (method == "kendall" | method == "spearman") {
tmp <- apply(obs, 2, rank)
tmp <- apply(obs, c(1:length(dim(obs)))[-1], rank) # for memb_dim = NULL, 2; for memb_dim, c(2, 3)
names(dim(tmp))[1] <- time_dim
eno <- Eno(tmp, time_dim, ncores = ncores_input)
} else if (method == "pearson") {
eno <- Eno(obs, time_dim, ncores = ncores_input)
}
for (i in 1:nexp) {
eno_expand[i, ] <- eno
if (length(dim(exp)) == 2) {
eno_expand <- array(dim = c(nexp = nexp, nobs = nobs))
for (i in 1:nexp) {
eno_expand[i, ] <- eno
}
} else { #member
eno_expand <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb))
for (i in 1:nexp) {
for (j in 1:exp_memb) {
eno_expand[i, , j, ] <- eno
}
}
}
}
#############old#################
#This doesn't return error but it's diff from cor.test() when method is spearman and kendall
if (pval) {
......
......@@ -12,6 +12,8 @@ Corr(
comp_dim = NULL,
limits = NULL,
method = "pearson",
memb_dim = NULL,
memb = TRUE,
pval = TRUE,
conf = TRUE,
conf.lev = 0.95,
......@@ -23,7 +25,7 @@ Corr(
dimensions 'time_dim' and 'dat_dim'.}
\item{obs}{A named numeric array of observational data, same dimensions as
parameter 'exp' except along dat_dim.}
parameter 'exp' except along 'dat_dim' and 'memb_dim'.}
\item{time_dim}{A character string indicating the name of dimension along
which the correlations are computed. The default value is 'sdate'.}
......@@ -41,6 +43,14 @@ be completed. The default is c(1, length(comp_dim dimension)).}
\item{method}{A character string indicating the type of correlation:
'pearson', 'spearman', or 'kendall'. The default value is 'pearson'.}
\item{memb_dim}{A character string indicating the name of the member
dimension. It must be one dimension in 'exp' and 'obs'. If there is no
member dimension, set NULL. The default value is NULL.}
\item{memb}{A logical value indicating whether to remain 'memb_dim' dimension
(TRUE) or do ensemble mean over 'memb_dim' (FALSE). Only functional when
'memb_dim' is not NULL. The default value is TRUE.}
\item{pval}{A logical value indicating whether to compute or not the p-value
of the test Ho: Corr = 0. The default value is TRUE.}
......@@ -55,9 +65,12 @@ computation. The default value is NULL.}
}
\value{
A list containing the numeric arrays with dimension:\cr
c(nexp, nobs, all other dimensions of exp except time_dim).\cr
nexp is the number of experiment (i.e., dat_dim in exp), and nobs is the
number of observation (i.e., dat_dim in obs).\cr
c(nexp, nobs, exp_memb, obs_memb, all other dimensions of exp except
time_dim and memb_dim).\cr
nexp is the number of experiment (i.e., 'dat_dim' in exp), and nobs is the
number of observation (i.e., 'dat_dim' in obs). exp_memb is the number of
member in experiment (i.e., 'memb_dim' in exp) and obs_memb is the number of
member in observation (i.e., 'memb_dim' in obs).\cr\cr
\item{$corr}{
The correlation coefficient.
}
......@@ -89,10 +102,27 @@ have inconsistent length between 'exp' and 'obs'. If all the dimensions of
compute the correlation.
}
\examples{
# Load sample data as in Load() example:
# Case 1: Load sample data as in Load() example:
example(Load)
clim <- Clim(sampleData$mod, sampleData$obs)
corr <- Corr(clim$clim_exp, clim$clim_obs, time_dim = 'ftime', dat_dim = 'member')
# Renew the example when Ano and Smoothing is ready
ano_exp <- Ano(sampleData$mod, clim$clim_exp)
ano_obs <- Ano(sampleData$obs, clim$clim_obs)
runmean_months <- 12
# Smooth along lead-times
smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = runmean_months)
smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = runmean_months)
required_complete_row <- 3 # Discard start dates which contain any NA lead-times
leadtimes_per_startdate <- 60
corr <- Corr(MeanDims(smooth_ano_exp, 'member'),
MeanDims(smooth_ano_obs, 'member'),
comp_dim = 'ftime',
limits = c(ceiling((runmean_months + 1) / 2),
leadtimes_per_startdate - floor(runmean_months / 2)))
# Case 2: Keep member dimension
corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member')
# ensemble mean
corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', memb = FALSE)
}
context("s2dv::Corr tests")
##############################################
# dat1
# dat1: memb_dim is NULL
set.seed(1)
exp1 <- array(rnorm(240), dim = c(member = 1, dataset = 2, sdate = 5,
ftime = 3, lat = 2, lon = 4))
......@@ -13,6 +13,33 @@ context("s2dv::Corr tests")
na <- floor(runif(10, min = 1, max = 120))
obs1[na] <- NA
# dat2: memb_dim = member
set.seed(1)
exp2 <- array(rnorm(180), dim = c(member = 3, dataset = 2, sdate = 5,
lat = 2, lon = 3))
set.seed(2)
obs2 <- array(rnorm(30), dim = c(member = 1, dataset = 1, sdate = 5,
lat = 2, lon = 3))
# dat3: memb_dim = member, obs has multiple memb
set.seed(1)
exp3 <- array(rnorm(180), dim = c(member = 3, dataset = 2, sdate = 5,
lat = 2, lon = 3))
set.seed(2)
obs3 <- array(rnorm(120), dim = c(member = 2, dataset = 2, sdate = 5,
lat = 2, lon = 3))
# dat4: exp and obs have dataset = 1 (to check the return array by small func)
set.seed(1)
exp4 <- array(rnorm(10), dim = c(member = 1, dataset = 1, sdate = 5,
lat = 2))
set.seed(2)
obs4 <- array(rnorm(10), dim = c(member = 1, dataset = 1, sdate = 5,
lat = 2))
##############################################
test_that("1. Input checks", {
......@@ -79,6 +106,18 @@ test_that("1. Input checks", {
"Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'."
)
expect_error(
Corr(exp1, obs1, memb_dim = 1),
"Parameter 'memb_dim' must be a character string."
)
expect_error(
Corr(exp1, obs1, memb_dim = 'memb'),
"Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension."
)
expect_error(
Corr(exp2, obs2, memb_dim = 'member', memb = 1),
"Parameter 'memb' must be one logical value."
)
expect_error(
Corr(exp1, obs1, conf = 1),
"Parameter 'conf' must be one logical value."
)
......@@ -105,61 +144,252 @@ test_that("1. Input checks", {
##############################################
test_that("2. Output checks: dat1", {
suppressWarnings(
expect_equal(
dim(Corr(exp1, obs1)$corr),
c(nexp = 2, nobs = 1, member = 1, ftime = 3, lat = 2, lon = 4)
)
)
suppressWarnings(
expect_equal(
Corr(exp1, obs1)$corr[1:6],
c(0.11503859, -0.46959987, -0.64113021, 0.09776572, -0.32393603, 0.27565829),
tolerance = 0.001
)
)
suppressWarnings(
expect_equal(
length(which(is.na(Corr(exp1, obs1)$p.val))),
2
)
)
suppressWarnings(
expect_equal(
max(Corr(exp1, obs1)$conf.lower, na.rm = T),
0.6332941,
tolerance = 0.001
)
)
suppressWarnings(
expect_equal(
length(which(is.na(Corr(exp1, obs1, comp_dim = 'ftime')$corr))),
6
)
)
suppressWarnings(
expect_equal(
length(which(is.na(Corr(exp1, obs1, comp_dim = 'ftime', limits = c(2, 3))$corr))),
2
)
)
suppressWarnings(
expect_equal(
min(Corr(exp1, obs1, conf.lev = 0.99)$conf.upper, na.rm = TRUE),
0.2747904,
tolerance = 0.0001
)
)
suppressWarnings(
expect_equal(
length(Corr(exp1, obs1, conf = FALSE, pval = FALSE)),
1
)
)
suppressWarnings(
expect_equal(
length(Corr(exp1, obs1, conf = FALSE)),
2
)
)
suppressWarnings(
expect_equal(
length(Corr(exp1, obs1, pval = FALSE)),
3
)
)
suppressWarnings(
expect_equal(
Corr(exp1, obs1, method = 'spearman')$corr[1:6],
c(-0.3, -0.4, -0.6, 0.3, -0.3, 0.2)
)
)
suppressWarnings(
expect_equal(
range(Corr(exp1, obs1, method = 'spearman', comp_dim = 'ftime')$p.val, na.rm = T),
c(0.0, 0.5),
tolerance = 0.001
)
)
})
##############################################
test_that("3. Output checks: dat2", {
# individual member
expect_equal(
dim(Corr(exp2, obs2, memb_dim = 'member')$corr),
c(nexp = 2, nobs = 1, exp_memb = 3, obs_memb = 1, lat = 2, lon = 3)
)
expect_equal(
names(Corr(exp2, obs2, memb_dim = 'member')),
c("corr", "p.val", "conf.lower", "conf.upper")
)
expect_equal(
names(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = FALSE)),
c("corr")
)
expect_equal(
names(Corr(exp2, obs2, memb_dim = 'member', conf = FALSE)),
c("corr", "p.val")
)
expect_equal(
names(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE)),
c("corr", "conf.lower", "conf.upper")
)
expect_equal(
mean(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = FALSE)$corr),
0.01645575,
tolerance = 0.0001
)
expect_equal(
median(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = FALSE)$corr),
0.03024513,