Newer
Older
#'Compute the spatial anomaly correlation coefficient between the forecast and corresponding observation
#'Calculate the spatial anomaly correlation coefficient (ACC) for the ensemble
#'mean of each model and the corresponding references over a spatial domain. It
#'can return a forecast time series if the data contain forest time dimension,
#'and also the ACC mean over one dimension, e.g., start date dimension.
#'The domain of interest can be specified by providing the list of longitudes/
#'latitudes of the data together with the corners of the domain: lonlatbox =
#'c(lonmin, lonmax, latmin, latmax). The data will be adjusted to have a spatial
#'mean of zero, then area weighting is applied. The formula is referenced from
#'Wilks (2011; section 7.6.4; https://doi.org/10.1016/B978-0-12-385022-5.00008-7).
#'
#'@param exp A numeric array of experimental anomalies with named dimensions.
#' The dimension must have at least 'lat_dim' and 'lon_dim'.
#'@param obs A numeric array of observational anomalies with named dimensions.
#' The dimension should be the same as 'exp' except the length of 'dat_dim'
#' and 'memb_dim'.
#'@param dat_dim A character string indicating the name of dataset (nobs/nexp)
#' dimension. The default value is NULL (no dataset).
#'@param lat_dim A character string indicating the name of the latitude
#' dimension of 'exp' and 'obs' along which ACC is computed. The default value
#' is 'lat'.
#'@param lon_dim A character string indicating the name of the longitude
#' dimension of 'exp' and 'obs' along which ACC is computed. The default value
#' is 'lon'.
#'@param avg_dim A character string indicating the name of the dimension to be
#' averaged, which is usually the time dimension. If no need to calculate mean
#' ACC, set as NULL. The default value is 'sdate'.
#'@param memb_dim A character string indicating the name of the member
#' dimension. If the data are not ensemble ones, set as NULL. The default
#' value is 'member'.
#'@param lat A vector of the latitudes of the exp/obs grids. It is used for
#' area weighting and when the domain of interested 'lonlatbox' is specified.
#'@param lon A vector of the longitudes of the exp/obs grids. Only required when
#' the domain of interested 'lonlatbox' is specified. The default value is
#' NULL.
#'@param lonlatbox A numeric vector of 4 indicating the corners of the domain of
#' interested: c(lonmin, lonmax, latmin, latmax). The default value is NULL
#' and the whole data will be used.
#'@param alpha A numeric indicating the significance level for the statistical
#' significance test. The default value is 0.05.
#'@param pval A logical value indicating whether to compute the p-value or not.
#' The default value is TRUE.
#'@param sign A logical value indicating whether to retrieve the statistical
#' significance of the test Ho: ACC = 0 based on 'alpha'. The default value is
#' FALSE.
#'@param conf A logical value indicating whether to retrieve the confidence
#' intervals or not. The default value is TRUE.
#'@param conftype A charater string of "parametric" or "bootstrap".
#' "parametric" provides a confidence interval for the ACC computed by a
#' Fisher transformation and a significance level for the ACC from a one-sided
#' student-T distribution. "bootstrap" provides a confidence interval for the
#' ACC and MACC computed from bootstrapping on the members with 100 drawings
#' with replacement. To guarantee the statistical robustness of the result,
#' make sure that your experiment and observation always have the same number
#' of members. "bootstrap" requires 'memb_dim' has value. The default value is
#' 'parametric'.
#'@param ncores An integer indicating the number of cores to use for parallel
#' computation. The default value is NULL.
#'
#'@return
#'A list containing the numeric arrays:\cr
#' The ACC with the dimensions c(nexp, nobs, the rest of the dimension except
#' lat_dim, lon_dim and memb_dim). nexp is the number of experiment (i.e., dat_dim in
#' exp), and nobs is the number of observation (i.e., dat_dim in obs). If
#' dat_dim is NULL, nexp and nobs are omitted.
#'\item{macc}{
#' The mean anomaly correlation coefficient with dimensions
#' c(nexp, nobs, the rest of the dimension except lat_dim, lon_dim, memb_dim, and
#' avg_dim). Only present if 'avg_dim' is not NULL. If dat_dim is NULL, nexp
#' and nobs are omitted.
#'}
#'\item{conf.lower (if conftype = "parametric") or acc_conf.lower (if
#' conftype = "bootstrap")}{
#' The lower confidence interval of ACC with the same dimensions as ACC. Only
#' present if \code{conf = TRUE}.
#'\item{conf.upper (if conftype = "parametric") or acc_conf.upper (if
#' conftype = "bootstrap")}{
#' The upper confidence interval of ACC with the same dimensions as ACC. Only
#' present if \code{conf = TRUE}.
#'}
#'\item{p.val}{
#' The p-value with the same dimensions as ACC. Only present if
#' \code{pval = TRUE} and code{conftype = "parametric"}.
#'\item{$sign}{
#' The statistical significance. Only present if \code{sign = TRUE}.
#'}
#'\item{macc_conf.lower}{
#' The lower confidence interval of MACC with the same dimensions as MACC.
#' Only present if \code{conftype = "bootstrap"}.
#'}
#'\item{macc_conf.upper}{
#' The upper confidence interval of MACC with the same dimensions as MACC.
#' Only present if \code{conftype = "bootstrap"}.
#'}
#'
#'@examples
#' \dontshow{
#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101')
#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'),
#' c('observation'), startDates,
#' leadtimemin = 1,
#' leadtimemax = 4,
#' output = 'lonlat',
#' latmin = 27, latmax = 48,
#' lonmin = -12, lonmax = 40)
#' }
#'sampleData$mod <- Season(sampleData$mod, monini = 11, moninf = 12, monsup = 2)
#'sampleData$obs <- Season(sampleData$obs, monini = 11, moninf = 12, monsup = 2)
#'clim <- Clim(sampleData$mod, sampleData$obs)
#'ano_exp <- Ano(sampleData$mod, clim$clim_exp)
#'ano_obs <- Ano(sampleData$obs, clim$clim_obs)
#'acc <- ACC(ano_exp, ano_obs, lat = sampleData$lat, dat_dim = 'dataset')
#'acc_bootstrap <- ACC(ano_exp, ano_obs, conftype = 'bootstrap',
#' lat = sampleData$lat, dat_dim = 'dataset')
#'res <- array(c(acc$conf.lower, acc$acc, acc$conf.upper, acc$p.val),
#' dim = c(dim(acc$acc), 4))
#'res_bootstrap <- array(c(acc$acc_conf.lower, acc$acc, acc$acc_conf.upper, acc$p.val),
#' dim = c(dim(acc$acc), 4))
#' }
#'@references Joliffe and Stephenson (2012). Forecast Verification: A
#' Practitioner's Guide in Atmospheric Science. Wiley-Blackwell.;
#' Wilks (2011; section 7.6.4; https://doi.org/10.1016/B978-0-12-385022-5.00008-7).
#'@import multiApply
#'@importFrom abind abind
#'@importFrom stats qt qnorm quantile
#'@importFrom ClimProjDiags Subset
#'@export
ACC <- function(exp, obs, dat_dim = NULL, lat_dim = 'lat', lon_dim = 'lon',
lat = NULL, lon = NULL, lonlatbox = NULL, alpha = 0.05,
pval = TRUE, sign = FALSE, conf = TRUE, conftype = "parametric",
ncores = NULL) {
# Check inputs
## exp and obs (1)
if (is.null(exp) | is.null(obs)) {
stop("Parameter 'exp' and 'obs' cannot be NULL.")
}
if (!is.numeric(exp) | !is.numeric(obs)) {
stop("Parameter 'exp' and 'obs' must be a numeric array.")
}
if (is.null(dim(exp)) | is.null(dim(obs))) {
stop(paste0("Parameter 'exp' and 'obs' must have at least dimensions ",
}
if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) |
any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) {
stop("Parameter 'exp' and 'obs' must have dimension names.")
}
## dat_dim
if (!is.null(dat_dim)) {
if (!is.character(dat_dim) | length(dat_dim) > 1) {
stop("Parameter 'dat_dim' must be a character string or NULL.")
}
if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) {
stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension. ",
"Set it as NULL if there is no dataset dimension.")
}
## lat_dim
if (!is.character(lat_dim) | length(lat_dim) != 1) {
stop("Parameter 'lat_dim' must be a character string.")
}
if (!lat_dim %in% names(dim(exp)) | !lat_dim %in% names(dim(obs))) {
stop("Parameter 'lat_dim' is not found in 'exp' or 'obs' dimension.")
}
## lon_dim
if (!is.character(lon_dim) | length(lon_dim) != 1) {
stop("Parameter 'lon_dim' must be a character string.")
}
if (!lon_dim %in% names(dim(exp)) | !lon_dim %in% names(dim(obs))) {
stop("Parameter 'lon_dim' is not found in 'exp' or 'obs' dimension.")
}
## avg_dim
if (!is.null(avg_dim)) {
if (!is.character(avg_dim) | length(avg_dim) > 1) {
stop("Parameter 'avg_dim' must be a character string.")
}
if (!avg_dim %in% names(dim(exp)) | !avg_dim %in% names(dim(obs))) {
stop("Parameter 'avg_dim' is not found in 'exp' or 'obs' dimension.")
}
}
## memb_dim
if (!is.null(memb_dim)) {
if (!is.character(memb_dim) | length(memb_dim) > 1) {
stop("Parameter 'memb_dim' must be a character string.")
}
if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) {
stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension. ",
# Add [member = 1]
if (memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) {
dim(obs) <- c(dim(obs), 1)
names(dim(obs))[length(dim(obs))] <- memb_dim
}
if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) {
dim(exp) <- c(dim(exp), 1)
names(dim(exp))[length(dim(exp))] <- memb_dim
}
if (is.null(lat)) {
stop("Parameter 'lat' cannot be NULL. It is required for area weighting.")
}
if (!is.numeric(lat) | length(lat) != dim(exp)[lat_dim]) {
stop(paste0("Parameter 'lat' must be a numeric vector with the same ",
"length as the latitude dimension of 'exp' and 'obs'."))
if (!is.numeric(lon) | length(lon) != dim(exp)[lon_dim]) {
stop(paste0("Parameter 'lon' must be a numeric vector with the same ",
"length as the longitude dimension of 'exp' and 'obs'."))
}
}
## lonlatbox
if (!is.null(lonlatbox)) {
if (!is.numeric(lonlatbox) | length(lonlatbox) != 4) {
stop("Parameter 'lonlatbox' must be a numeric vector of 4.")
}
if (is.null(lon)) {
stop("Parameter 'lat' and 'lon' are required if 'lonlatbox' is specified.")
}
## alpha
if (!is.numeric(alpha) | any(alpha < 0) | any(alpha > 1) | length(alpha) > 1) {
stop("Parameter 'alpha' must be a numeric number between 0 and 1.")
}
## pval
if (!is.logical(pval) | length(pval) > 1) {
stop("Parameter 'pval' must be one logical value.")
}
## sign
if (!is.logical(sign) | length(sign) > 1) {
stop("Parameter 'sign' must be one logical value.")
}
## conf
if (!is.logical(conf) | length(conf) > 1) {
stop("Parameter 'conf' must be one logical value.")
}
if (conf) {
## conftype
if (!conftype %in% c('parametric', 'bootstrap')) {
stop("Parameter 'conftype' must be either 'parametric' or 'bootstrap'.")
}
if (conftype == 'bootstrap' & is.null(memb_dim)) {
stop("Parameter 'memb_dim' cannot be NULL when parameter 'conftype' is 'bootstrap'.")
}
}
## ncores
if (!is.null(ncores)) {
if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) |
length(ncores) > 1) {
stop("Parameter 'ncores' must be a positive integer.")
}
}
## exp and obs (2)
name_exp <- sort(names(dim(exp)))
name_obs <- sort(names(dim(obs)))
if(!all(name_exp %in% name_obs) | !all(name_obs %in% name_exp)) {
stop("Parameter 'exp' and 'obs' must have same dimension names.")
}
if (!is.null(dat_dim)) {
name_exp <- name_exp[-which(name_exp == dat_dim)]
name_obs <- name_obs[-which(name_obs == dat_dim)]
}
if (!is.null(memb_dim)) {
name_exp <- name_exp[-which(name_exp == memb_dim)]
name_obs <- name_obs[-which(name_obs == memb_dim)]
}
if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) {
stop(paste0("Parameter 'exp' and 'obs' must have same length of ",
"all the dimensions except 'dat_dim' and 'memb_dim'."))
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
}
#-----------------------------------------------------------------
###############################
# Sort dimension
name_exp <- names(dim(exp))
name_obs <- names(dim(obs))
order_obs <- match(name_exp, name_obs)
obs <- Reorder(obs, order_obs)
###############################
# Select the domain
if (select_lonlat) {
for (jind in 1:2) {
while (lonlatbox[jind] < 0) {
lonlatbox[jind] <- lonlatbox[jind] + 360
}
while (lonlatbox[jind] > 360) {
lonlatbox[jind] <- lonlatbox[jind] - 360
}
}
indlon <- which((lon >= lonlatbox[1] & lon <= lonlatbox[2]) |
(lonlatbox[1] > lonlatbox[2] & (lon > lonlatbox[1] | lon < lonlatbox[2])))
indlat <- which(lat >= lonlatbox[3] & lat <= lonlatbox[4])
exp <- ClimProjDiags::Subset(exp, c(lat_dim, lon_dim), list(indlat, indlon), drop = FALSE)
obs <- ClimProjDiags::Subset(obs, c(lat_dim, lon_dim), list(indlat, indlon), drop = FALSE)
}
# Ensemble mean
if (!is.null(memb_dim)) {
if (conftype == 'bootstrap') {
exp_ori <- exp
obs_ori <- obs
}
exp <- MeanDims(exp, memb_dim, na.rm = TRUE)
obs <- MeanDims(obs, memb_dim, na.rm = TRUE)
target_dims <- list(c(lat_dim, lon_dim, dat_dim), c(lat_dim, lon_dim, dat_dim))
target_dims <- list(c(lat_dim, lon_dim, avg_dim, dat_dim), c(lat_dim, lon_dim, avg_dim, dat_dim))
}
res <- Apply(list(exp, obs),
target_dims = target_dims,
dat_dim = dat_dim, avg_dim = avg_dim,
conftype = conftype, pval = pval, conf = conf, alpha = alpha,
sign = sign, ncores = ncores)
# If bootstrap, calculate confidence level
if (conftype == 'bootstrap') {
if (is.null(avg_dim)) {
target_dims_bs <- list(c(memb_dim, dat_dim, lat_dim, lon_dim),
c(memb_dim, dat_dim, lat_dim, lon_dim))
target_dims_bs <- list(c(memb_dim, dat_dim, avg_dim, lat_dim, lon_dim),
c(memb_dim, dat_dim, avg_dim, lat_dim, lon_dim))
res_conf <- Apply(list(exp_ori, obs_ori),
target_dims = target_dims_bs,
fun = .ACC_bootstrap,
dat_dim = dat_dim, memb_dim = memb_dim, avg_dim = avg_dim,
lat = lat,
conftype = conftype, pval = pval, conf = conf, alpha = alpha,
sign = sign, ncores = ncores)
#NOTE: pval?
res <- list(acc = res$acc,
acc_conf.lower = res_conf$acc_conf.lower,
acc_conf.upper = res_conf$acc_conf.upper,
macc = res$macc,
macc_conf.lower = res_conf$macc_conf.lower,
macc_conf.upper = res_conf$macc_conf.upper)
.ACC <- function(exp, obs, dat_dim = NULL, avg_dim = 'sdate', lat, alpha = 0.05,
pval = TRUE, sign = FALSE, conf = TRUE, conftype = "parametric") {
# .ACC() should use all the spatial points to calculate ACC. It returns [nexp, nobs].
# If dat_dim = NULL, it returns a number.
# if (is.null(avg_dim))
## exp: [lat, lon, (dat_exp)]
## obs: [lat, lon, (dat_obs)]
# if (!is.null(avg_dim))
## exp: [lat, lon, avg_dim, (dat_exp)]
## obs: [lat, lon, avg_dim, (dat_obs)]
# Add dat_dim temporarily if dat_dim = NULL
if (is.null(dat_dim)) {
nexp <- 1
nobs <- 1
} else {
nexp <- as.numeric(dim(exp)[dat_dim])
nobs <- as.numeric(dim(obs)[dat_dim])
if (is.null(avg_dim)) {
acc <- array(dim = c(nexp = nexp, nobs = nobs))
if (sign) signif <- array(dim = c(nexp = nexp, nobs = nobs))
if (conf) {
conf.upper <- array(dim = c(nexp = nexp, nobs = nobs))
conf.lower <- array(dim = c(nexp = nexp, nobs = nobs))
}
acc <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[avg_dim]))
names(dim(acc))[3] <- avg_dim
macc <- array(dim = c(nexp = nexp, nobs = nobs))
if (pval) p.val <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[avg_dim]))
if (sign) signif <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[avg_dim]))
conf.upper <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[avg_dim]))
conf.lower <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[avg_dim]))
# centralize & area weighted
## spatial centralization for each [avg_dim, dat]
dim_exp <- dim(exp)
dim_obs <- dim(obs)
wt <- cos(lat * pi/180)
wt <- rep(wt, times = prod(dim_exp[2:length(dim_exp)]))
if (is.null(avg_dim) & is.null(dat_dim)) { #[lat, lon]
# turn exp and obs into vector, first latitudes and then longitudes
exp <- as.vector(exp)
obs <- as.vector(obs)
exp <- array(sqrt(wt) * (exp - mean(exp, na.rm = TRUE)), dim = dim_exp)
obs <- array(sqrt(wt) * (obs - mean(obs, na.rm = TRUE)), dim = dim_obs)
} else { # [lat, lon, dat], [lat, lon, avg_dim], or [lat, lon, avg_dim, dat]
# exp
exp <- array(exp, dim = c(prod(dim_exp[1:2]), dim_exp[3:length(dim_exp)]))
mean_exp <- colMeans(exp, na.rm = TRUE) # [avg_dim, (dat)]
mean_exp <- rep(as.vector(mean_exp), each = prod(dim_exp[1:2]))
exp <- array(sqrt(wt) * (as.vector(exp) - mean_exp), dim = dim_exp)
# obs
obs <- array(obs, dim = c(prod(dim_obs[1:2]), dim_obs[3:length(dim_obs)]))
mean_obs <- colMeans(obs, na.rm = TRUE) # [avg_dim, (dat)]
mean_obs <- rep(as.vector(mean_obs), each = prod(dim_obs[1:2]))
obs <- array(sqrt(wt) * (as.vector(obs) - mean_obs), dim = dim_obs)
}
# Per-paired exp and obs. NAs should be in the same position in both exp and obs
for (iobs in 1:nobs) {
for (iexp in 1:nexp) {
if (!is.null(dat_dim)) {
exp_sub <- ClimProjDiags::Subset(exp, dat_dim, iexp, drop = 'selected')
obs_sub <- ClimProjDiags::Subset(obs, dat_dim, iobs, drop = 'selected')
} else {
exp_sub <- exp
obs_sub <- obs
}
# dim: [lat, lon, (avg_dim)]
# Variance(iexp) should not take into account any point
# that is not available in iobs and therefore not accounted for
# in covariance(iexp, iobs) and vice-versa
exp_sub[is.na(obs_sub)] <- NA
obs_sub[is.na(exp_sub)] <- NA
if (is.null(avg_dim)) {
# ACC
top <- sum(exp_sub * obs_sub, na.rm = TRUE) #a number
bottom <- sqrt(sum(exp_sub^2, na.rm = TRUE) * sum(obs_sub^2, na.rm = TRUE))
acc[iexp, iobs] <- top/bottom #a number
# handle bottom = 0
if (is.infinite(acc[iexp, iobs])) acc[iexp, iobs] <- NA
# pval, sign, and conf
if (pval | conf | sign) {
# calculate effective sample size
eno <- .Eno(as.vector(obs_sub), na.action = na.pass)
if (pval | sign) {
t <- qt(1 - alpha, eno - 2) # a number
p.value <- sqrt(t^2 / (t^2 + eno - 2))
if (pval) p.val[iexp, iobs] <- p.value
if (sign) signif[iexp, iobs] <- !is.na(p.value) & p.value <= alpha
conf.upper[iexp, iobs] <- tanh(atanh(acc[iexp, iobs]) + qnorm(1 - alpha / 2) / sqrt(eno - 3))
conf.lower[iexp, iobs] <- tanh(atanh(acc[iexp, iobs]) + qnorm(alpha / 2) / sqrt(eno - 3))
}
}
}
} else { #avg_dim is not NULL
# exp_sub and obs_sub: [lat, lon, avg_dim]
top <- sum(exp_sub * obs_sub, na.rm = TRUE) #a number
bottom <- sqrt(sum(exp_sub^2, na.rm = TRUE) * sum(obs_sub^2, na.rm = TRUE))
macc[iexp, iobs] <- top/bottom #a number
# handle bottom = 0
if (is.infinite(macc[iexp, iobs])) macc[iexp, iobs] <- NA
for (i in 1:dim(acc)[3]) {
exp_sub_i <- exp_sub[, , i]
obs_sub_i <- obs_sub[, , i]
top <- sum(exp_sub_i * obs_sub_i, na.rm = TRUE) #a number
bottom <- sqrt(sum(exp_sub_i^2, na.rm = TRUE) * sum(obs_sub_i^2, na.rm = TRUE))
acc[iexp, iobs, i] <- top/bottom #a number
# handle bottom = 0
if (is.infinite(acc[iexp, iobs, i])) acc[iexp, iobs, i] <- NA
}
# pval, sign, and conf
if (pval | sign | conf) {
# calculate effective sample size along lat_dim and lon_dim
# combine lat_dim and lon_dim into one dim first
obs_tmp <- array(obs_sub,
dim = c(space = prod(dim(obs_sub)[1:2]),
dim(obs_sub)[3]))
eno <- apply(obs_tmp, 2, .Eno, na.action = na.pass) # a vector of avg_dim
if (pval | sign) {
t <- qt(1 - alpha, eno - 2) # a vector of avg_dim
p.value <- sqrt(t^2 / (t^2 + eno - 2))
if (pval) p.val[iexp, iobs, ] <- p.value
if (sign) signif[iexp, iobs, ] <- !is.na(p.value) & p.value <= alpha
conf.upper[iexp, iobs, ] <- tanh(atanh(acc[iexp, iobs, ]) +
conf.lower[iexp, iobs, ] <- tanh(atanh(acc[iexp, iobs, ]) +
}
}
}
} # if avg_dim is not NULL
}
}
#------------------------------------------------
# Remove nexp and nobs if dat_dim = NULL
if (is.null(dat_dim)) {
if (is.null(avg_dim)) {
acc <- as.vector(acc)
if (conf) {
conf.lower <- as.vector(conf.lower)
conf.upper <- as.vector(conf.upper)
}
if (pval) p.val <- as.vector(p.val)
if (sign) signif <- as.vector(signif)
} else {
dim(acc) <- dim(acc)[3:length(dim(acc))]
macc <- as.vector(macc)
if (conf) {
dim(conf.lower) <- dim(conf.lower)[3:length(dim(conf.lower))]
dim(conf.upper) <- dim(conf.upper)[3:length(dim(conf.upper))]
}
if (pval) dim(p.val) <- dim(p.val)[3:length(dim(p.val))]
if (sign) dim(signif) <- dim(signif)[3:length(dim(signif))]
if (conf) output <- c(output, list(conf.lower = conf.lower, conf.upper = conf.upper))
if (pval) output <- c(output, list(p.val = p.val))
if (sign) output <- c(output, list(sign = signif))
.ACC_bootstrap <- function(exp, obs, dat_dim = NULL,
avg_dim = 'sdate', memb_dim = NULL, lat, alpha = 0.05,
pval = TRUE, sign = FALSE, conf = TRUE, conftype = "parametric") {
# exp: [memb_exp, (dat_exp), lat, lon]
# obs: [memb_obs, (dat_obs), lat, lon]
# exp: [memb_exp, (dat_exp), avg_dim, lat, lon]
# obs: [memb_obs, (dat_obs), avg_dim, lat, lon]
if (is.null(dat_dim)) {
nexp <- 1
nobs <- 1
dim(exp) <- c(dim(exp)[1], dat = 1, dim(exp)[-1])
dim(obs) <- c(dim(obs)[1], dat = 1, dim(obs)[-1])
dat_dim <- 'dat'
remove_dat_dim <- TRUE
} else {
nexp <- as.numeric(dim(exp)[dat_dim])
nobs <- as.numeric(dim(obs)[dat_dim])
remove_dat_dim <- FALSE
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
nmembexp <- as.numeric(dim(exp)[1])
nmembobs <- as.numeric(dim(obs)[1])
ndraw <- 100
if (is.null(avg_dim)) {
acc_draw <- array(dim = c(nexp = nexp, nobs = nobs, ndraw))
} else {
acc_draw <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[3], ndraw))
macc_draw <- array(dim = c(nexp = nexp, nobs = nobs, ndraw))
}
for (jdraw in 1:ndraw) {
#choose a randomly member index for each point of the matrix
indexp <- array(sample(nmembexp, size = prod(dim(exp)[-c(length(dim(exp)) - 1, length(dim(exp)))]),
replace = TRUE),
dim = dim(exp))
indobs <- array(sample(nmembobs, size = prod(dim(obs)[-c(length(dim(obs)) - 1, length(dim(obs)))]),
replace = TRUE),
dim = dim(obs))
#combine maxtrix of data and random index
varindexp <- abind::abind(exp, indexp, along = length(dim(exp)) + 1)
varindobs <- abind::abind(obs, indobs, along = length(dim(obs)) + 1)
#select randomly the members for each point of the matrix
# if (is.null(avg_dim)) {
drawexp <- array(
apply(varindexp, c(2:length(dim(exp))), function(x) x[,1][x[,2]] ),
dim = dim(exp))
drawobs <- array(
apply(varindobs, c(2:length(dim(obs))), function(x) x[,1][x[,2]] ),
dim = dim(obs))
# ensemble mean before .ACC
drawexp <- MeanDims(drawexp, memb_dim, na.rm = TRUE)
drawobs <- MeanDims(drawobs, memb_dim, na.rm = TRUE)
# Reorder
if (is.null(avg_dim)) {
drawexp <- Reorder(drawexp, c(2, 3, 1))
drawobs <- Reorder(drawobs, c(2, 3, 1))
} else {
drawexp <- Reorder(drawexp, c(3, 4, 2, 1))
drawobs <- Reorder(drawobs, c(3, 4, 2, 1))
}
#calculate the ACC of the randomized field
tmpACC <- .ACC(drawexp, drawobs, conf = FALSE, pval = FALSE, sign = FALSE,
avg_dim = avg_dim, lat = lat, dat_dim = dat_dim)
if (is.null(avg_dim)) {
acc_draw[, , jdraw] <- tmpACC$acc
} else {
acc_draw[, , , jdraw] <- tmpACC$acc
macc_draw[, , jdraw] <- tmpACC$macc
}
}
#calculate the confidence interval
if (is.null(avg_dim)) {
acc_conf.upper <- apply(acc_draw, c(1, 2),
function (x) {
quantile(x, 1 - alpha / 2, na.rm = TRUE)})
acc_conf.lower <- apply(acc_draw, c(1, 2),
function (x) {
quantile(x, alpha / 2, na.rm = TRUE)})
acc_conf.upper <- apply(acc_draw, c(1, 2, 3),
function (x) {
quantile(x, 1 - alpha / 2, na.rm = TRUE)})
acc_conf.lower <- apply(acc_draw, c(1, 2, 3),
function (x) {
quantile(x, alpha / 2, na.rm = TRUE)})
macc_conf.upper <- apply(macc_draw, c(1, 2),
function (x) {
quantile(x, 1 - alpha / 2, na.rm = TRUE)})
macc_conf.lower <- apply(macc_draw, c(1, 2),
function (x) {
quantile(x, alpha / 2, na.rm = TRUE)})
}
if (remove_dat_dim) {
if (is.null(avg_dim)) {
dim(acc_conf.lower) <- NULL
dim(acc_conf.upper) <- NULL
} else {
dim(acc_conf.lower) <- dim(acc_conf.lower)[-c(1, 2)]
dim(acc_conf.upper) <- dim(acc_conf.upper)[-c(1, 2)]
dim(macc_conf.lower) <- NULL
dim(macc_conf.upper) <- NULL
}
}
# Return output
if (is.null(avg_dim)) {
return(list(acc_conf.lower = acc_conf.lower,
acc_conf.upper = acc_conf.upper))
} else {
return(list(acc_conf.lower = acc_conf.lower,
acc_conf.upper = acc_conf.upper,
macc_conf.lower = macc_conf.lower,
macc_conf.upper = macc_conf.upper))
}
}