ACC.R 26.9 KB
Newer Older
#'Compute the spatial anomaly correlation coefficient between the forecast and corresponding observation
aho's avatar
aho committed
#'
#'Calculate the spatial anomaly correlation coefficient (ACC) for the ensemble
#'mean of each model and the corresponding references over a spatial domain. It
#'can return a forecast time series if the data contain forest time dimension, 
#'and also the ACC mean over one dimension, e.g., start date dimension.
#'The domain of interest can be specified by providing the list of longitudes/
#'latitudes of the data together with the corners of the domain: lonlatbox = 
#'c(lonmin, lonmax, latmin, latmax). The data will be adjusted to have a spatial
#'mean of zero, then area weighting is applied. The formula is referenced from 
#'Wilks (2011; section 7.6.4; https://doi.org/10.1016/B978-0-12-385022-5.00008-7).
aho's avatar
aho committed
#'
#'@param exp A numeric array of experimental anomalies with named dimensions.
#'  The dimension must have at least 'lat_dim' and 'lon_dim'.
aho's avatar
aho committed
#'@param obs A numeric array of observational anomalies with named dimensions.
#'  The dimension should be the same as 'exp' except the length of 'dat_dim' 
aho's avatar
aho committed
#'  and 'memb_dim'.
#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) 
#'  dimension. The default value is 'dataset'. If there is no dataset 
#'  dimension, set NULL.
#'@param lat_dim A character string indicating the name of the latitude
#'  dimension of 'exp' and 'obs' along which ACC is computed. The default value
#'  is 'lat'. 
#'@param lon_dim A character string indicating the name of the longitude
#'  dimension of 'exp' and 'obs' along which ACC is computed. The default value
#'  is 'lon'. 
aho's avatar
aho committed
#'@param avg_dim A character string indicating the name of the dimension to be
#'  averaged, which is usually the time dimension. If no need to calculate mean
#'  ACC, set as NULL. The default value is 'sdate'.
aho's avatar
aho committed
#'@param memb_dim A character string indicating the name of the member 
#'  dimension. If the data are not ensemble ones, set as NULL. The default 
#'  value is 'member'.
#'@param lat A vector of the latitudes of the exp/obs grids. It is used for
#'  area weighting and when the domain of interested 'lonlatbox' is specified.
#'@param lon A vector of the longitudes of the exp/obs grids. Only required when
#'  the domain of interested 'lonlatbox' is specified. The default value is 
#'  NULL.
aho's avatar
aho committed
#'@param lonlatbox A numeric vector of 4 indicating the corners of the domain of
#'  interested: c(lonmin, lonmax, latmin, latmax). The default value is NULL 
#'  and the whole data will be used.
aho's avatar
aho committed
#'@param conf A logical value indicating whether to retrieve the confidence 
#'  intervals or not. The default value is TRUE.
#'@param conftype A charater string of "parametric" or "bootstrap". 
#'  "parametric" provides a confidence interval for the ACC computed by a 
#'  Fisher transformation and a significance level for the ACC from a one-sided
#'  student-T distribution. "bootstrap" provides a confidence interval for the
#'  ACC and MACC computed from bootstrapping on the members with 100 drawings 
#'  with replacement. To guarantee the statistical robustness of the result, 
#'  make sure that your experiment and observation always have the same number
#'  of members. "bootstrap" requires 'memb_dim' has value. The default value is
#'  'parametric'.
#'@param conf.lev A numeric indicating the confidence level for the 
#'  regression computation. The default value is 0.95.
#'@param pval A logical value indicating whether to compute the p-value or not.
#'  The default value is TRUE.
#'@param ncores An integer indicating the number of cores to use for parallel 
#'  computation. The default value is NULL.
#'
#'@return 
#'A list containing the numeric arrays:\cr 
aho's avatar
aho committed
#'\item{acc}{
aho's avatar
aho committed
#'  The ACC with the dimensions c(nexp, nobs, the rest of the dimension except 
#'  lat_dim, lon_dim and memb_dim). nexp is the number of experiment (i.e., dat_dim in
#'  exp), and nobs is the number of observation (i.e., dat_dim in obs). If 
#'  dat_dim is NULL, nexp and nobs are omitted.
aho's avatar
aho committed
#'}
aho's avatar
aho committed
#'\item{conf.lower (if conftype = "parametric") or acc_conf.lower (if 
#'      conftype = "bootstrap")}{
#'  The lower confidence interval of ACC with the same dimensions as ACC. Only
#'  present if \code{conf = TRUE}.
aho's avatar
aho committed
#'}
aho's avatar
aho committed
#'\item{conf.upper (if conftype = "parametric") or acc_conf.upper (if 
#'      conftype = "bootstrap")}{
#'  The upper confidence interval of ACC with the same dimensions as ACC. Only 
#'  present if \code{conf = TRUE}.
aho's avatar
aho committed
#'}
#'\item{p.val}{
#'  The p-value with the same dimensions as ACC. Only present if 
aho's avatar
aho committed
#'  \code{pval = TRUE} and code{conftype = "parametric"}.  
aho's avatar
aho committed
#'}
aho's avatar
aho committed
#'\item{macc}{
aho's avatar
aho committed
#'  The mean anomaly correlation coefficient with dimensions
#'  c(nexp, nobs, the rest of the dimension except lat_dim, lon_dim, memb_dim, and 
aho's avatar
aho committed
#'  avg_dim). Only present if 'avg_dim' is not NULL. If dat_dim is NULL, nexp 
#'  and nobs are omitted.
aho's avatar
aho committed
#'}
#'\item{macc_conf.lower}{
#'  The lower confidence interval of MACC with the same dimensions as MACC. 
#'  Only present if \code{conftype = "bootstrap"}.
#'}
#'\item{macc_conf.upper}{
#'  The upper confidence interval of MACC with the same dimensions as MACC. 
#'  Only present if \code{conftype = "bootstrap"}.
aho's avatar
aho committed
#'}
#'
#'@examples
#'  \dontshow{
#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101')
#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'),
#'                                                c('observation'), startDates,
#'                                                leadtimemin = 1,
#'                                                leadtimemax = 4,
#'                                                output = 'lonlat',
#'                                                latmin = 27, latmax = 48,
#'                                                lonmin = -12, lonmax = 40)
#'  }
aho's avatar
aho committed
#'sampleData$mod <- Season(sampleData$mod, monini = 11, moninf = 12, monsup = 2)
#'sampleData$obs <- Season(sampleData$obs, monini = 11, moninf = 12, monsup = 2) 
aho's avatar
aho committed
#'clim <- Clim(sampleData$mod, sampleData$obs)
#'ano_exp <- Ano(sampleData$mod, clim$clim_exp)
#'ano_obs <- Ano(sampleData$obs, clim$clim_obs)
#'acc <- ACC(ano_exp, ano_obs, lat = sampleData$lat)
#'acc_bootstrap <- ACC(ano_exp, ano_obs, conftype = 'bootstrap', lat = sampleData$lat)
aho's avatar
aho committed
#'# Combine acc results for PlotACC
aho's avatar
aho committed
#'res <- array(c(acc$conf.lower, acc$acc, acc$conf.upper, acc$p.val), 
#'             dim = c(dim(acc$acc), 4))
#'res_bootstrap <- array(c(acc$acc_conf.lower, acc$acc, acc$acc_conf.upper, acc$p.val),
#'                       dim = c(dim(acc$acc), 4))
aho's avatar
aho committed
#'  \donttest{
aho's avatar
aho committed
#'PlotACC(res, startDates)
aho's avatar
aho committed
#'PlotACC(res_bootstrap, startDates)
aho's avatar
aho committed
#'  }
#'@references Joliffe and Stephenson (2012). Forecast Verification: A 
#'  Practitioner's Guide in Atmospheric Science. Wiley-Blackwell.; 
#'  Wilks (2011; section 7.6.4; https://doi.org/10.1016/B978-0-12-385022-5.00008-7).
aho's avatar
aho committed
#'@import multiApply
#'@importFrom abind abind
#'@importFrom stats qt qnorm quantile
#'@importFrom ClimProjDiags Subset
#'@export
ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon',
                space_dim = c('lat', 'lon'), avg_dim = 'sdate', memb_dim = 'member', 
aho's avatar
aho committed
                lat = NULL, lon = NULL, lonlatbox = NULL, 
                conf = TRUE, conftype = "parametric", conf.lev = 0.95, pval = TRUE,
                ncores = NULL) {

  # Check inputs 
  ## exp and obs (1)
  if (is.null(exp) | is.null(obs)) {
    stop("Parameter 'exp' and 'obs' cannot be NULL.")
  }
  if (!is.numeric(exp) | !is.numeric(obs)) {
    stop("Parameter 'exp' and 'obs' must be a numeric array.")
  }
  if (is.null(dim(exp)) | is.null(dim(obs))) {
    stop(paste0("Parameter 'exp' and 'obs' must have at least dimensions ",
                "lat_dim and lon_dim."))
aho's avatar
aho committed
  }
  if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) |
     any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) {
    stop("Parameter 'exp' and 'obs' must have dimension names.")
  }
  if(!all(names(dim(exp)) %in% names(dim(obs))) |
     !all(names(dim(obs)) %in% names(dim(exp)))) {
    stop("Parameter 'exp' and 'obs' must have same dimension names.")
  }
  ## dat_dim
  if (!is.null(dat_dim)) {
    if (!is.character(dat_dim) | length(dat_dim) > 1) {
      stop("Parameter 'dat_dim' must be a character string or NULL.")
    }
    if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) {
      stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.",
           "Set it as NULL if there is no dataset dimension.")
    }
aho's avatar
aho committed
  }
  ## space_dim (deprecated)
  if (!missing("space_dim")) {
    warning("Parameter 'space_dim' is deprecated. Use 'lat_dim' and 'lon_dim' instead.")
    lat_dim <- space_dim[1]
    lon_dim <- space_dim[2]
aho's avatar
aho committed
  }
  ## lat_dim
  if (!is.character(lat_dim) | length(lat_dim) != 1) {
    stop("Parameter 'lat_dim' must be a character string.")
  }
  if (!lat_dim %in% names(dim(exp)) | !lat_dim %in% names(dim(obs))) {
    stop("Parameter 'lat_dim' is not found in 'exp' or 'obs' dimension.")
  }
  ## lon_dim
  if (!is.character(lon_dim) | length(lon_dim) != 1) {
    stop("Parameter 'lon_dim' must be a character string.")
  }
  if (!lon_dim %in% names(dim(exp)) | !lon_dim %in% names(dim(obs))) {
    stop("Parameter 'lon_dim' is not found in 'exp' or 'obs' dimension.")
aho's avatar
aho committed
  }
  ## avg_dim
  if (!is.null(avg_dim)) {
    if (!is.character(avg_dim) | length(avg_dim) > 1) {
      stop("Parameter 'avg_dim' must be a character string.")
    } 
    if (!avg_dim %in% names(dim(exp)) | !avg_dim %in% names(dim(obs))) {
      stop("Parameter 'avg_dim' is not found in 'exp' or 'obs' dimension.")
    }
  }
  ## memb_dim
  if (!is.null(memb_dim)) {
    if (!is.character(memb_dim) | length(memb_dim) > 1) {
      stop("Parameter 'memb_dim' must be a character string.")
    }
    if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) {
aho's avatar
aho committed
      stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.",
           "Set it as NULL if there is no member dimension.")
aho's avatar
aho committed
    }
  }
  ## lat
  if (is.null(lat)) {
    stop("Parameter 'lat' cannot be NULL. It is required for area weighting.")
  }
  if (!is.numeric(lat) | length(lat) != dim(exp)[lat_dim]) {
    stop(paste0("Parameter 'lat' must be a numeric vector with the same ",
                "length as the latitude dimension of 'exp' and 'obs'."))
aho's avatar
aho committed
  }
  ## lon
  if (!is.null(lon)) {
    if (!is.numeric(lon) | length(lon) != dim(exp)[lon_dim]) {
aho's avatar
aho committed
      stop(paste0("Parameter 'lon' must be a numeric vector with the same ",
                  "length as the longitude dimension of 'exp' and 'obs'."))
    }
  }
  ## lonlatbox
  if (!is.null(lonlatbox)) {
    if (!is.numeric(lonlatbox) | length(lonlatbox) != 4) {
      stop("Parameter 'lonlatbox' must be a numeric vector of 4.")
    }
    if (is.null(lon)) {
      stop("Parameter 'lat' and 'lon' are required if 'lonlatbox' is specified.")
    }
aho's avatar
aho committed
    select_lonlat <- TRUE
  } else {
    select_lonlat <- FALSE
aho's avatar
aho committed
  }
  ## conf
  if (!is.logical(conf) | length(conf) > 1) {
    stop("Parameter 'conf' must be one logical value.")
  }
  if (conf) {
    ## conftype 
    if (!conftype %in% c('parametric', 'bootstrap')) {
      stop("Parameter 'conftype' must be either 'parametric' or 'bootstrap'.")
    }
    if (conftype == 'bootstrap' & is.null(memb_dim)) {
      stop("Parameter 'memb_dim' cannot be NULL when parameter 'conftype' is 'bootstrap'.")
    }
    ## conf.lev
    if (!is.numeric(conf.lev) | any(conf.lev < 0) | any(conf.lev > 1) | 
        length(conf.lev) > 1) {
aho's avatar
aho committed
      stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.")
    }
  }
  ## pval
  if (!is.logical(pval) | length(pval) > 1) {
    stop("Parameter 'pval' must be one logical value.")
  }
  ## ncores
  if (!is.null(ncores)) {
    if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) |
        length(ncores) > 1) {
aho's avatar
aho committed
      stop("Parameter 'ncores' must be a positive integer.")
    }
  }
  ## exp and obs (2)
  name_exp <- sort(names(dim(exp)))
  name_obs <- sort(names(dim(obs)))
  if (!is.null(dat_dim)) {
    name_exp <- name_exp[-which(name_exp == dat_dim)]
    name_obs <- name_obs[-which(name_obs == dat_dim)]
  }
aho's avatar
aho committed
  if (!is.null(memb_dim)) {
    name_exp <- name_exp[-which(name_exp == memb_dim)]
    name_obs <- name_obs[-which(name_obs == memb_dim)]
  }
  if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) {
aho's avatar
aho committed
    stop(paste0("Parameter 'exp' and 'obs' must have same length of ",
                "all the dimensions expect 'dat_dim' and 'memb_dim'."))
  }

#-----------------------------------------------------------------


  ###############################
  # Sort dimension
  name_exp <- names(dim(exp))
  name_obs <- names(dim(obs))
  order_obs <- match(name_exp, name_obs)
  obs <- Reorder(obs, order_obs)
  ###############################

  # Select the domain 
  if (select_lonlat) {
    for (jind in 1:2) {
      while (lonlatbox[jind] < 0) {
        lonlatbox[jind] <- lonlatbox[jind] + 360
      }
      while (lonlatbox[jind] > 360) {
        lonlatbox[jind] <- lonlatbox[jind] - 360
      }
    }
    indlon <- which((lon >= lonlatbox[1] & lon <= lonlatbox[2]) | 
                    (lonlatbox[1] > lonlatbox[2] & (lon > lonlatbox[1] | lon < lonlatbox[2])))
    indlat <- which(lat >= lonlatbox[3] & lat <= lonlatbox[4])

    exp <- ClimProjDiags::Subset(exp, c(lat_dim, lon_dim), list(indlat, indlon), drop = FALSE)
    obs <- ClimProjDiags::Subset(obs, c(lat_dim, lon_dim), list(indlat, indlon), drop = FALSE)
aho's avatar
aho committed
  }

   # Ensemble mean
  if (!is.null(memb_dim)) {
aho's avatar
aho committed
    if (conftype == 'bootstrap') {
      exp_ori <- exp
      obs_ori <- obs
    }
aho's avatar
aho committed
    exp <- MeanDims(exp, memb_dim, na.rm = TRUE)
    obs <- MeanDims(obs, memb_dim, na.rm = TRUE)
aho's avatar
aho committed

  if (is.null(avg_dim)) {
    target_dims <- list(c(lat_dim, lon_dim, dat_dim), c(lat_dim, lon_dim, dat_dim))
aho's avatar
aho committed
  } else {
    target_dims <- list(c(lat_dim, lon_dim, avg_dim, dat_dim), c(lat_dim, lon_dim, avg_dim, dat_dim))
  }
  res <- Apply(list(exp, obs),
                 target_dims = target_dims,
aho's avatar
aho committed
                 fun = .ACC,
                 dat_dim = dat_dim, avg_dim = avg_dim,
aho's avatar
aho committed
                 lat = lat,
                 conftype = conftype, pval = pval, conf = conf, conf.lev = conf.lev,
aho's avatar
aho committed
                 ncores = ncores)
aho's avatar
aho committed

  # If bootstrap, calculate confidence level
  if (conftype == 'bootstrap') {
    if (is.null(avg_dim)) {
      target_dims_bs <- list(c(memb_dim, dat_dim, lat_dim, lon_dim),
                             c(memb_dim, dat_dim, lat_dim, lon_dim))
    } else {
      target_dims_bs <- list(c(memb_dim, dat_dim, avg_dim, lat_dim, lon_dim),
                             c(memb_dim, dat_dim, avg_dim, lat_dim, lon_dim))
aho's avatar
aho committed
    }
    res_conf <- Apply(list(exp_ori, obs_ori),
                      target_dims = target_dims_bs, 
                      fun = .ACC_bootstrap, 
                      dat_dim = dat_dim, memb_dim = memb_dim, avg_dim = avg_dim,
                      lat = lat,
                      conftype = conftype, pval = pval, conf = conf, conf.lev = conf.lev,
                      ncores = ncores)
    #NOTE: pval?
    res <- list(acc = res$acc,
                acc_conf.lower = res_conf$acc_conf.lower,
                acc_conf.upper = res_conf$acc_conf.upper,
                macc = res$macc,
                macc_conf.lower = res_conf$macc_conf.lower,
                macc_conf.upper = res_conf$macc_conf.upper)
  return(res)
.ACC <- function(exp, obs, dat_dim = 'dataset', avg_dim = 'sdate', lat,
                 conf = TRUE, conftype = "parametric", conf.lev = 0.95, pval = TRUE) {
aho's avatar
aho committed
  # .ACC() should use all the spatial points to calculate ACC. It returns [nexp, nobs].
  # If dat_dim = NULL, it returns a number.

  # if (is.null(avg_dim)) 
  ## exp: [lat, lon, (dat_exp)]
  ## obs: [lat, lon, (dat_obs)]
  # if (!is.null(avg_dim)) 
  ## exp: [lat, lon, avg_dim, (dat_exp)]
  ## obs: [lat, lon, avg_dim, (dat_obs)]

  # Add dat_dim temporarily if dat_dim = NULL
  if (is.null(dat_dim)) {
    nexp <- 1
    nobs <- 1
  } else {
    nexp <- as.numeric(dim(exp)[length(dim(exp))])
    nobs <- as.numeric(dim(obs)[length(dim(obs))])
  }
aho's avatar
aho committed

  if (is.null(avg_dim)) {
    acc <- array(dim = c(nexp = nexp, nobs = nobs))
aho's avatar
aho committed
    if (pval) p.val <- array(dim = c(nexp = nexp, nobs = nobs))
    if (conf) {
      conf.upper <- array(dim = c(nexp = nexp, nobs = nobs))
      conf.lower <- array(dim = c(nexp = nexp, nobs = nobs))
    }
    
aho's avatar
aho committed
  } else {
    acc <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[avg_dim]))
    names(dim(acc))[3] <- avg_dim
    macc <- array(dim = c(nexp = nexp, nobs = nobs))
    if (pval) p.val <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[avg_dim]))
aho's avatar
aho committed
    if (conf) {
        conf.upper <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[avg_dim]))
        conf.lower <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[avg_dim]))
aho's avatar
aho committed
    }
  # centralize & area weighted
  ## spatial centralization for each [avg_dim, dat]
  dim_exp <- dim(exp)
  dim_obs <- dim(obs)
  wt <- cos(lat * pi/180)
  wt <- rep(wt, times = prod(dim_exp[2:length(dim_exp)]))

  if (is.null(avg_dim) & is.null(dat_dim)) {  #[lat, lon]
    # turn exp and obs into vector, first latitudes and then longitudes
    exp <- as.vector(exp)
    obs <- as.vector(obs)
    exp <- array(sqrt(wt) * (exp - mean(exp, na.rm = TRUE)), dim = dim_exp)
    obs <- array(sqrt(wt) * (obs - mean(obs, na.rm = TRUE)), dim = dim_obs)
  } else { # [lat, lon, dat], [lat, lon, avg_dim], or [lat, lon, avg_dim, dat]
    # exp
    exp <- array(exp, dim = c(prod(dim_exp[1:2]), dim_exp[3:length(dim_exp)]))
    mean_exp <- apply(exp, 2:length(dim(exp)), mean, na.rm = TRUE)  # [avg_dim, (dat)]
    mean_exp <- rep(as.vector(mean_exp), each = prod(dim_exp[1:2]))
    exp <- array(sqrt(wt) * (as.vector(exp) - mean_exp), dim = dim_exp)
    # obs
    obs <- array(obs, dim = c(prod(dim_obs[1:2]), dim_obs[3:length(dim_obs)]))
    mean_obs <- apply(obs, 2:length(dim(obs)), mean, na.rm = TRUE)  # [avg_dim, (dat)]
    mean_obs <- rep(as.vector(mean_obs), each = prod(dim_obs[1:2]))
    obs <- array(sqrt(wt) * (as.vector(obs) - mean_obs), dim = dim_obs)
  }

aho's avatar
aho committed
  # Per-paired exp and obs. NAs should be in the same position in both exp and obs
  for (iobs in 1:nobs) {
    for (iexp in 1:nexp) {
      if (!is.null(dat_dim)) {
        exp_sub <- ClimProjDiags::Subset(exp, dat_dim, iexp, drop = 'selected')
        obs_sub <- ClimProjDiags::Subset(obs, dat_dim, iobs, drop = 'selected')
      } else {
        exp_sub <- exp
        obs_sub <- obs
      }
      # dim: [lat, lon, (avg_dim)]
aho's avatar
aho committed

      # Variance(iexp) should not take into account any point 
      # that is not available in iobs and therefore not accounted for 
      # in covariance(iexp, iobs) and vice-versa 
      exp_sub[is.na(obs_sub)] <- NA
      obs_sub[is.na(exp_sub)] <- NA

      if (is.null(avg_dim)) {
        # ACC
        top <- sum(exp_sub * obs_sub, na.rm = TRUE)  #a number
aho's avatar
aho committed
        bottom <- sqrt(sum(exp_sub^2, na.rm = TRUE) * sum(obs_sub^2, na.rm = TRUE))
        acc[iexp, iobs] <- top/bottom #a number
        # handle bottom = 0
        if (is.infinite(acc[iexp, iobs])) acc[iexp, iobs] <- NA
aho's avatar
aho committed
        # pval and conf
        if (pval | conf) {
          if (conftype == "parametric") {
            # calculate effective sample size
            eno <- .Eno(as.vector(obs_sub), na.action = na.pass)

aho's avatar
aho committed
            if (pval) {
              t <- qt(conf.lev, eno - 2)  # a number
              p.val[iexp, iobs] <- sqrt(t^2 / (t^2 + eno - 2))
            }
            if (conf) {
              conf.upper[iexp, iobs] <- tanh(atanh(acc[iexp, iobs]) + qnorm(1 - (1 - conf.lev) / 2) / sqrt(eno - 3))
              conf.lower[iexp, iobs] <- tanh(atanh(acc[iexp, iobs]) + qnorm((1 - conf.lev) / 2) / sqrt(eno - 3))
            }
          }
        }

      } else {  #avg_dim is not NULL
        # exp_sub and obs_sub: [lat, lon, avg_dim]

aho's avatar
aho committed
        # MACC
        top <- sum(exp_sub * obs_sub, na.rm = TRUE)  #a number
aho's avatar
aho committed
        bottom <- sqrt(sum(exp_sub^2, na.rm = TRUE) * sum(obs_sub^2, na.rm = TRUE))
        macc[iexp, iobs] <- top/bottom #a number
aho's avatar
aho committed
        # handle bottom = 0
        if (is.infinite(macc[iexp, iobs])) macc[iexp, iobs] <- NA
aho's avatar
aho committed
        # ACC
        for (i in 1:dim(acc)[3]) {
          exp_sub_i <- exp_sub[, , i]
          obs_sub_i <- obs_sub[, , i]
          top <- sum(exp_sub_i * obs_sub_i, na.rm = TRUE)  #a number
aho's avatar
aho committed
          bottom <- sqrt(sum(exp_sub_i^2, na.rm = TRUE) * sum(obs_sub_i^2, na.rm = TRUE))
          acc[iexp, iobs, i] <- top/bottom #a number
          # handle bottom = 0
          if (is.infinite(acc[iexp, iobs, i])) acc[iexp, iobs, i] <- NA
        }

        # pval and conf
        if (pval | conf) {
          if (conftype == "parametric") {
            # calculate effective sample size along lat_dim and lon_dim 
            # combine lat_dim and lon_dim into one dim first
            obs_tmp <- array(obs_sub, 
                             dim = c(space = prod(dim(obs_sub)[1:2]), 
                                     dim(obs_sub)[3]))
            eno <- apply(obs_tmp, 2, .Eno, na.action = na.pass)  # a vector of avg_dim
aho's avatar
aho committed
            if (pval) {
              t <- qt(conf.lev, eno - 2)  # a vector of avg_dim
              p.val[iexp, iobs, ] <- sqrt(t^2 / (t^2 + eno - 2))
            }
            if (conf) {
              conf.upper[iexp, iobs, ] <- tanh(atanh(acc[iexp, iobs, ]) + 
                                          qnorm(1 - (1 - conf.lev) / 2) / sqrt(eno - 3))
              conf.lower[iexp, iobs, ] <- tanh(atanh(acc[iexp, iobs, ]) + 
                                          qnorm((1 - conf.lev) / 2) / sqrt(eno - 3))
aho's avatar
aho committed
            }
          }
        }

      }  # if avg_dim is not NULL

    }
  }

#------------------------------------------------
  # Remove nexp and nobs if dat_dim = NULL
  if (is.null(dat_dim)) {
    if (is.null(avg_dim)) {
      acc <- as.vector(acc)
      if (conf) {
        conf.lower <- as.vector(conf.lower)
        conf.upper <- as.vector(conf.upper)
      }   
      if (pval) {
        p.val <- as.vector(p.val)
      }
    } else {
      dim(acc) <- dim(acc)[3:length(dim(acc))]
      macc <- as.vector(macc)
      if (conf) {
        dim(conf.lower) <- dim(conf.lower)[3:length(dim(conf.lower))]
        dim(conf.upper) <- dim(conf.upper)[3:length(dim(conf.upper))]
      }
      if (pval) {
          dim(p.val) <- dim(p.val)[3:length(dim(p.val))]
      }
    }
  }
aho's avatar
aho committed
  # Return output
aho's avatar
aho committed
  if (is.null(avg_dim)) {
    if (conf & pval) {
      return(list(acc = acc, conf.lower = conf.lower, conf.upper = conf.upper,
                  p.val = p.val))
    } else if (conf & !pval) {
      return(list(acc = acc, conf.lower = conf.lower, conf.upper = conf.upper,
                  macc = macc))
    } else if (!conf & pval) {
      return(list(acc = acc, p.val = p.val))
    } else {
      return(list(acc = acc))
    }
  } else {
    if (conf & pval) {
      return(list(acc = acc, conf.lower = conf.lower, conf.upper = conf.upper, 
                  p.val = p.val, macc = macc))
    } else if (conf & !pval) {
      return(list(acc = acc, conf.lower = conf.lower, conf.upper = conf.upper,
                  macc = macc))
    } else if (!conf & pval) {
      return(list(acc = acc, p.val = p.val, macc = macc))
    } else {
      return(list(acc = acc, macc = macc))
    }
  }

}
aho's avatar
aho committed


.ACC_bootstrap <- function(exp, obs,  dat_dim = 'dataset', 
                           avg_dim = 'sdate', memb_dim = NULL, lat,
                           conf = TRUE, conftype = "parametric", conf.lev = 0.95, 
                           pval = TRUE) {
aho's avatar
aho committed
# if (is.null(avg_dim)) 
  # exp: [memb_exp, dat_exp, lat, lon]
  # obs: [memb_obs, dat_obs, lat, lon]
aho's avatar
aho committed
# if (!is.null(avg_dim)) 
  # exp: [memb_exp, dat_exp, avg_dim, lat, lon]
  # obs: [memb_obs, dat_obs, avg_dim, lat, lon]
aho's avatar
aho committed

  nexp <- as.numeric(dim(exp)[2])
  nobs <- as.numeric(dim(obs)[2])
  nmembexp <- as.numeric(dim(exp)[1])
  nmembobs <- as.numeric(dim(obs)[1])

  ndraw <- 100
  if (is.null(avg_dim)) {
    acc_draw <- array(dim = c(nexp = nexp, nobs = nobs, ndraw))
  } else {
    acc_draw <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[3], ndraw))
    macc_draw <- array(dim = c(nexp = nexp, nobs = nobs, ndraw))
  }

  for (jdraw in 1:ndraw) {
    #choose a randomly member index for each point of the matrix 
    indexp <- array(sample(nmembexp, size = prod(dim(exp)[-c(length(dim(exp)) - 1, length(dim(exp)))]),
                           replace = TRUE), 
                    dim = dim(exp))
    indobs <- array(sample(nmembobs, size = prod(dim(obs)[-c(length(dim(obs)) - 1, length(dim(obs)))]),               
                           replace = TRUE),
                     dim = dim(obs))

      #combine maxtrix of data and random index
      varindexp <- abind::abind(exp, indexp, along = length(dim(exp)) + 1)
      varindobs <- abind::abind(obs, indobs, along = length(dim(obs)) + 1)

    #select randomly the members for each point of the matrix
#    if (is.null(avg_dim)) {

    drawexp <- array( 
                    apply(varindexp, c(2:length(dim(exp))), function(x) x[,1][x[,2]] ),
                          dim = dim(exp)) 
    drawobs <- array(
                    apply(varindobs, c(2:length(dim(obs))), function(x) x[,1][x[,2]] ),
                          dim = dim(obs))

    # ensemble mean before .ACC
aho's avatar
aho committed
    drawexp <- MeanDims(drawexp, memb_dim, na.rm = TRUE)
    drawobs <- MeanDims(drawobs, memb_dim, na.rm = TRUE)
aho's avatar
aho committed
    # Reorder
    if (is.null(avg_dim)) {
      drawexp <- Reorder(drawexp, c(2, 3, 1))
      drawobs <- Reorder(drawobs, c(2, 3, 1))
    } else {
      drawexp <- Reorder(drawexp, c(3, 4, 2, 1))
      drawobs <- Reorder(drawobs, c(3, 4, 2, 1))
    } 
   
    #calculate the ACC of the randomized field
    tmpACC <- .ACC(drawexp, drawobs, conf = FALSE, pval = FALSE, avg_dim = avg_dim,
                   lat = lat)
aho's avatar
aho committed
    if (is.null(avg_dim)) {
      acc_draw[, , jdraw] <- tmpACC$acc
    } else {
      acc_draw[, , , jdraw] <- tmpACC$acc
      macc_draw[, , jdraw] <- tmpACC$macc
    }
  }

  #calculate the confidence interval
  if (is.null(avg_dim)) {
  acc_conf.upper <- apply(acc_draw, c(1, 2),
aho's avatar
aho committed
                          function (x) {
                            quantile(x, 1 - (1 - conf.lev) / 2, na.rm = TRUE)})
  acc_conf.lower <- apply(acc_draw, c(1, 2),
aho's avatar
aho committed
                          function (x) {
                            quantile(x, (1 - conf.lev) / 2, na.rm = TRUE)})

  } else {
  acc_conf.upper <- apply(acc_draw, c(1, 2, 3), 
aho's avatar
aho committed
                          function (x) {
                            quantile(x, 1 - (1 - conf.lev) / 2, na.rm = TRUE)})
  acc_conf.lower <- apply(acc_draw, c(1, 2, 3),
aho's avatar
aho committed
                          function (x) {
                            quantile(x, (1 - conf.lev) / 2, na.rm = TRUE)})
  macc_conf.upper <- apply(macc_draw, c(1, 2),
aho's avatar
aho committed
                          function (x) {
                            quantile(x, 1 - (1 - conf.lev) / 2, na.rm = TRUE)})
  macc_conf.lower <- apply(macc_draw, c(1, 2),
aho's avatar
aho committed
                          function (x) {
                            quantile(x, (1 - conf.lev) / 2, na.rm = TRUE)})
  }

  # Return output
  if (is.null(avg_dim)) {
      return(list(acc_conf.lower = acc_conf.lower,
                  acc_conf.upper = acc_conf.upper))
  } else {
      return(list(acc_conf.lower = acc_conf.lower,
                  acc_conf.upper = acc_conf.upper,
                  macc_conf.lower = macc_conf.lower,
                  macc_conf.upper = macc_conf.upper))
  }
                  
}