ACC.R 10.5 KB
Newer Older
ACC <- function(var_exp, var_obs, lon = NULL, lat = NULL,
                lonlatbox = NULL, conf = TRUE, conftype = "parametric",
                center = FALSE) {
  # Matrix var_exp & var_obs should have dimensions (nexp/nobs, nsdates, 
  # nltimes, nlat, nlon) or (nexp/nobs, nsdates, nmember, nltimes, nlat, nlon)
  # ACC computes the Anomaly Correlation Coefficient for the ensemble mean of
  # each jexp in 1:nexp and each jobs in 1:nobs which gives nexp x nobs ACC 
  # for each startdate and each leadtime.
  # A domain can be selected by providing the list of longitudes/latitudes
  # (lon/lat) of the grid together with the corner of the domain:
  #   lonlatbox = c(lonmin, lonmax, latmin, latmax)
  # 
  # Args:
  #   var_exp: Matrix of experimental data.
  #   var_obs: Matrix of observational data, same dimensions as var_exp except 
  #            along the first dimension and the second if it corresponds to
  #                                          the member dimension
  #   lon: Array of longitudes of the var_exp/var_obs grids, optional.
  #   lat: Array of latitudes of the var_exp/var_obs grids, optional.
  #   lonlatbox: Domain to select c(lonmin, lonmax, latmin, latmax), optional.
  #   conf: TRUE/FALSE 
  #         confidence intervals or significance level provided or not
  #   conftype: "parametric" provides a confidence interval for the ACC computed 
  #                           by a Fisher transformation and a significance level 
  #                           for the ACC from a one-sided student-T distribution
  #              "bootmemb" provides a confidence interval for the ACC and MACC
  #                           computed from bootstrapping on the members with
  #                           100 drawings with replacement. 
  #                           To guarantee the statistical robustness of the result,
  #                           make sure that your experiments/oservations/startdates/
  #                           leadtimes always have the same number of members.
  #   center: TRUE/FALSE 
  #           if TRUE compute the center anomaly correlation coefficient
  #           if FALSE, the mean 2d anomalies are not substracted from both
  #           oberved and and experiement fields.
  #           The pearson correlation is used in both cases.
  #   $ACC: Anomaly Correlation Coefficient 
  #         if conf equal TRUE:
  #           Matrix with c(nexp, nobs, nsdates, nleadtimes, 4) dimensions.
  #           The fifth dimension of length 4 corresponds to the lower limit of
  #           the 95% confidence interval, the computed ACC, the upper limit of 
  #           the 95% confidence interval and the 95% significance level given by 
  #           a one-sided T-test.
  #         if conf equal FALSE:
  #           Matrix with c(nexp, nobs, nleadtimes) dimensions.
  #   $MACC: Mean Anomaly Correlation Coefficient 
  #          if conf equal TRUE:
  #            Matrix with c(nexp, nobs, nleadtimes, 3) dimensions.
  #            The fourth dimension of length 3 corresponds to the lower limit
  #            of the 95% confidence interval, the computed MACC and the upper 
  #            limit of the 95% confidence interval
  #          if conf equal FALSE:
  #            Matrix with c(nexp, nobs, nleadtimes) dimensions.
  # 
  # History:
  #   1.0  #  2013-08  (V. Guemas, vguemas@ic3.cat)  #  Original code
  #   1.1  #  2014-05  (C. Prodhomme, chloe.prodhomme@ic3.cat)  # optimisation
  #   1.2  #  2014-08  (V. Guemas, virginie.guemas@ic3.cat) # Bug-fixes : handling of
  #                              NA & selection of domain + Simplification of code
  #   1.3  #  2014-08  (V. Guemas, virginie.guemas@ic3.cat) # Boostrapping over members
Virginie Guemas's avatar
Virginie Guemas committed

  library(abind)
  if (length(dimsvar) == 5) {
    checkfirst <- 2
  }else if (length(dimsvar) == 6) {
    checkfirst <- 3
    nmembexp <- dimsvar[2]
    nmembobs <- dim(var_obs)[2]
  }else{
    stop("var_exp & var_obs should have dimensions (nexp/nsobs, nsdates, nltimes, nlat, nlon) 
                          or  dimensions (nexp/nsobs, nmembers, nsdates, nltimes, nlat, nlon) ")
  for (iind in checkfirst:length(dimsvar)) {
    if (dim(var_obs)[iind] != dimsvar[iind]) {
      stop("var_exp & var_obs must have same dimensions except the first one (number of experiments or number of observational datasets) ")
  nsdates <- dimsvar[checkfirst]
  nltimes <- dimsvar[checkfirst+1]
  nlat <- dimsvar[checkfirst+2]
  nlon <- dimsvar[checkfirst+3]
  if (is.null(lon) == FALSE & is.null(lat) == FALSE & 
      is.null(lonlatbox) == FALSE) {
    for (jind in 1:2) {
      while (lonlatbox[jind] < 0) {
        lonlatbox[jind] <- lonlatbox[jind] + 360
      }
      while (lonlatbox[jind] > 360) {
        lonlatbox[jind] <- lonlatbox[jind] - 360
      }
    }
    indlon <- which((lon >= lonlatbox[1] & lon <= lonlatbox[2]) | 
                    (lonlatbox[1] > lonlatbox[2] & (lon > lonlatbox[1] | 
    indlat <- which(lat >= lonlatbox[3] & lat <= lonlatbox[4])
  } else {
    indlon <- 1:nlon
    indlat <- 1:nlat
  }
  if(conf == TRUE) {
    ACC <- array(NA, dim = c(nexp, nobs, nsdates, nltimes, 4))
  } else {
    ACC <- array(NA, dim = c(nexp, nobs, nsdates, nltimes))
  }         
  MACCaux <- array(0, dim = c(nexp, nobs, nsdates, nltimes, 3))
  if (length(dimsvar) == 6) {
    tmp01 <- Mean1Dim(var_exp,2)
    tmp02 <- Mean1Dim(var_obs,2)
  }else{
    tmp01 <- var_exp
    tmp02 <- var_obs
  } 
  for( iobs in 1:nobs) {
    for( iexp in 1:nexp) {

      # tmp1 and tmp2 are splitted to handle NA before building
      # tmp - Virginie
      # We need indlat, indlon below - Virginie
      tmp1 <- array(tmp01[iexp, , ,indlat, indlon],
                     dim = c(1, nsdates, nltimes,
      tmp2 <- array(tmp02[iobs, , ,indlat, indlon],
                     dim = c(1, nsdates, nltimes,
                           length(indlon) * length(indlat)))

      # We should not take into account in variance(tmp1) any point 
      # that is not available in tmp2 and therefore not accounted for 
      # in covariance(tmp1,tmp2) and vice-versa hence the need for 
      # the lines below - Virginie
      tmp1[ is.na(tmp2) ] <- NA
      tmp2[ is.na(tmp1) ] <- NA

      tmp <- abind(tmp1, tmp2, along = 1)

      # Below I have corrected the way of obtaining tmpallNA 
      # which was not properly working on a few cases I tested
      # and simultaneously I have simplified the code - Virginie
      if (center) {
        # I coded it as such for backward compatibility but a true
        # centering would involve a spatial averaging accounting
        # for the area of each grid cell with cos(lat) - Virginie
        tmp <- tmp - InsertDim( Mean1Dim(tmp, 4), 4, dim(tmp1)[4] )

      top <- apply(tmp, c(2, 3), function(x)
                      sum(x[1, ]*x[2, ], na.rm = TRUE) )

      bottom1 <- apply(tmp, c(2, 3), function(x)
                      sum(x[1, ]*x[1, ], na.rm = TRUE) )

      bottom2 <- apply(tmp, c(2, 3), function(x)
                      sum(x[2, ]*x[2, ], na.rm = TRUE) )
    
      bottom <- sqrt(bottom1 * bottom2 )

Virginie Guemas's avatar
Virginie Guemas committed
      tmpallNA <- which(is.na(bottom) | bottom == 0)
      
      ACCaux <- top / bottom
      ACCaux[tmpallNA] <- NA

      if (conf == TRUE) {
        ACC[iexp, iobs, , , 2] <- ACCaux
        if (conftype == "parametric") {
          eno <- Mean1Dim( Eno(tmp2, 4), 1)
          t <- apply(eno, c(1, 2), 
                   function(x) qt(0.95, x - 2))
          enot <- abind(eno, t, along = 3)
          ACC[iexp, iobs, , , 4] <- apply(enot, c(1, 2), function(x)
                 sqrt((x[2] * x[2]) / ((x[2] * x[2]) + x[1] - 2)) )
          correno <- abind(ACCaux, eno, along = 3)
          ACC[iexp, iobs, , , 1] <- apply(correno, c(1, 2), function(x)
                  tanh(atanh(x[1]) + qnorm(0.975) / sqrt(x[2] - 3)) )
          ACC[iexp, iobs, , , 3] <- apply(correno, c(1, 2), function(x)
                  tanh(atanh(x[1]) + qnorm(0.025) / sqrt(x[2] - 3)) )
        ACC[iexp, iobs, , ] <- ACCaux

      top[tmpallNA] = NA
      bottom1[tmpallNA] = NA
      bottom2[tmpallNA] = NA
      MACCaux[iexp, iobs, , , 1] <- top
      MACCaux[iexp, iobs, , , 2] <- bottom1
      MACCaux[iexp, iobs, , , 3] <- bottom2

  # The lines I have introduced below are only to avoid that some NA are
  # called NaN or Inf. It is easier to handle a uniform NA naming outside
  # the functions - Virginie
Virginie Guemas's avatar
Virginie Guemas committed
  # Furthermore, the na.rm should be TRUE to obtain a MACC even if a few
  # start dates are missing - Virginie 
  topfinal <- apply(MACCaux, c(1, 2, 4), function(x) 
Virginie Guemas's avatar
Virginie Guemas committed
                                 sum(x[, 1], na.rm = TRUE) )
               
  bottomfinal <- apply(MACCaux, c(1, 2, 4), function(x) 
Virginie Guemas's avatar
Virginie Guemas committed
               sqrt(sum(x[, 2], na.rm = TRUE) * sum(x[, 3], na.rm = TRUE)))
Virginie Guemas's avatar
Virginie Guemas committed
  tmpNA <- which(is.na(bottomfinal) | bottomfinal == 0)

  MACC <- topfinal / bottomfinal 
  MACC[tmpNA] <- NA  
  if (conf == TRUE & conftype == "bootmemb") {
    
    ACC_draw = array(dim=c(nexp,nobs,nsdates,nltimes,ndraw))
  
    for (jdraw in 1:ndraw) {

      indexp <- array(sample(nmembexp, size = (nexp*nmembexp*nsdates*nltimes), 
                             replace = TRUE), dim = c(nexp, nmembexp, nsdates, nltimes) )
      indobs <- array(sample(nmembobs, size = (nobs*nmembobs*nsdates*nltimes), 
                             replace = TRUE), dim = c(nobs, nmembobs, nsdates, nltimes) )
      varexpdraw <- array(NA, dim = c(dim(var_exp)[1:4], length(indlat), length(indlon)))   
      varobsdraw <- array(NA, dim = c(dim(var_obs)[1:4], length(indlat), length(indlon)))   
 
      for (js in 1:nsdates) {
        for (jt in 1:nltimes) {
          for (iexp in 1:nexp) {
            for (jm in 1:nmembexp) {
              varexpdraw[iexp, jm, js, jt,,] <- var_exp[iexp, indexp[iexp, jm, js, jt],js, jt, indlat, indlon]
            }
            for (jm in nmembobs) {
              varobsdraw[iobs, jm, js, jt,,] <- var_obs[iobs, indobs[iobs, jm, js, jt],js, jt, indlat, indlon]
            }
          }
        }
      }
      ACC_draw[,,,,jdraw] <- ACC(varexpdraw, varobsdraw, conf = FALSE)$ACC
    }

    ACC[ , , , , 1] <- apply(ACC_draw, c(1, 2, 3, 4), function(x)
                                       quantile(x, 0.975, na.rm = TRUE))  
    ACC[ , , , , 3] <- apply(ACC_draw, c(1, 2, 3, 4), function(x)
                                       quantile(x, 0.025, na.rm = TRUE))  
  }