ACC.R 7.26 KB
Newer Older
ACC <- function(var_exp, var_obs, lon = NULL, lat = NULL,
                lonlatbox = NULL, conf = TRUE, center = FALSE) {
  # Matrix var_exp & var_obs should have dimensions (nexp/nobs, nsdates, 
  # nltimes, nlat, nlon).
  # ACC computes the Anomaly Correlation Coefficient for each jexp in 
  # 1:nexp and each jobs in 1:nobs which gives nexp x nobs ACC for each 
  # startdate and each leadtime.
  # A domain can be selected by providing the list of longitudes/latitudes
  # (lon/lat) of the grid together with the corner of the domain:
  #   lonlatbox = c(lonmin, lonmax, latmin, latmax)
  # 
  # Args:
  #   var_exp: Matrix of experimental data.
  #   var_obs: Matrix of observational data, same dimensions as var_exp except 
  #            along the first dimension.
  #   lon: Array of longitudes of the var_exp/var_obs grids, optional.
  #   lat: Array of latitudes of the var_exp/var_obs grids, optional.
  #   lonlatbox: Domain to select c(lonmin, lonmax, latmin, latmax), optional.
  #   conf: TRUE/FALSE if conf equal TRUE:
  #         The confidence interval is computed by a Fisher transformation.
  #         The significance level relies on a one-sided student-T distribution.
  #         If conf equal FALSE: no confidence interval is calculated
  #   center: TRUE/FALSE if TRUE compute the center anomaly correlation
  #           coefficient (using pearson correlation).
  #           if FALSE, the mean anomalies are not substracted from both
  #           oberved and and experiement fields.
  #
  # Returns:
  #   $ACC: if conf equal TRUE:
  #        Matrix with c(nexp, nobs, nsdates, nleadtimes, 4) dimensions.
  #        The fourth dimension of length 4 corresponds to the lower limit of
  #        the 95% confidence interval, the computed ACC, the upper limit of 
  #        the 95% confidence interval and the 95% significance level given by 
  #        a one-sided T-test.
  #        if conf equal FALSE:
  #        ACC with c(nexp, nobs, nleadtimes) dimensions.
  #   $MACC: Mean Anomaly Correlation Coefficient with c(nexp, nobs, nleadtimes) 
  #         dimensions.
  # 
  # History:
  #   1.0  #  2013-08  (V. Guemas, vguemas@ic3.cat)  #  Original code
  #   1.1  #  2014-05  (C. Prodhomme, chloe.prodhomme@ic3.cat)  # optimisation
  #   1.2  #  2014-08  (V. Guemas, virginie.guemas@ic3.cat) # Bug-fixes : handling of
  #                              NA & selection of domain + Simplification of code
Virginie Guemas's avatar
Virginie Guemas committed

  library(abind)
    stop("var_exp & var_obs should have dimensions (nexp/nsobs, nsdates, nltimes, nlat, nlon) ")
    if (dim(var_obs)[iind] != dimsvar[iind]) {
      stop("var_exp & var_obs must have same dimensions except the first one (number of experiments or number of observational datasets) ")
    }
  }
  nexp <- dimsvar[1]
  nobs <- dim(var_obs)[1]
  nsdates <- dimsvar[2]
  nltimes <- dimsvar[3]
  nlat <- dimsvar[4]
  nlon <- dimsvar[5]
  if (is.null(lon) == FALSE & is.null(lat) == FALSE & 
      is.null(lonlatbox) == FALSE) {
    for (jind in 1:2) {
      while (lonlatbox[jind] < 0) {
        lonlatbox[jind] <- lonlatbox[jind] + 360
      }
      while (lonlatbox[jind] > 360) {
        lonlatbox[jind] <- lonlatbox[jind] - 360
      }
    }
    indlon <- which((lon >= lonlatbox[1] & lon <= lonlatbox[2]) | 
                    (lonlatbox[1] > lonlatbox[2] & (lon > lonlatbox[1] | 
    indlat <- which(lat >= lonlatbox[3] & lat <= lonlatbox[4])
  } else {
    indlon <- 1:nlon
    indlat <- 1:nlat
  }
  if(conf == TRUE) {
    ACC <- array(NA, dim = c(nexp, nobs, nsdates, nltimes, 4))
  } else {
    ACC <- array(NA, dim = c(nexp, nobs, nsdates, nltimes))
  }         
  MACCaux <- array(0, dim = c(nexp, nobs, nsdates, nltimes, 3))

  for( iobs in 1:nobs) {
    for( iexp in 1:nexp) {

      # tmp1 and tmp2 are splitted to handle NA before building
      # tmp - Virginie
      # We need indlat, indlon below - Virginie
      tmp1 <- array(var_exp[iexp, , ,indlat, indlon],
                     dim = c(1, nsdates, nltimes,
                           length(indlon) * length(indlat))) 
      tmp2 <- array(var_obs[iobs, , ,indlat, indlon],
                     dim = c(1, nsdates, nltimes,
                           length(indlon) * length(indlat)))

      # We should not take into account in variance(tmp1) any point 
      # that is not available in tmp2 and therefore not accounted for 
      # in covariance(tmp1,tmp2) and vice-versa hence the need for 
      # the lines below - Virginie
      tmp1[ is.na(tmp2) ] <- NA
      tmp2[ is.na(tmp1) ] <- NA

      tmp <- abind(tmp1, tmp2, along = 1)

      # Below I have corrected the way of obtaining tmpallNA 
      # which was not properly working on a few cases I tested
      # and simultaneously I have simplified the code - Virginie
      if (center) {
        # I coded it as such for backward compatibility but a true
        # centering would involve a spatial averaging accounting
        # for the area of each grid cell with cos(lat) - Virginie
        tmp <- tmp - InsertDim( Mean1Dim(tmp, 4), 4, dim(tmp1)[4] )

      top <- apply(tmp, c(2, 3), function(x)
                      sum(x[1, ]*x[2, ], na.rm = TRUE) )

      bottom1 <- apply(tmp, c(2, 3), function(x)
                      sum(x[1, ]*x[1, ], na.rm = TRUE) )

      bottom2 <- apply(tmp, c(2, 3), function(x)
                      sum(x[2, ]*x[2, ], na.rm = TRUE) )
    
      bottom <- sqrt(bottom1 * bottom2 )

Virginie Guemas's avatar
Virginie Guemas committed
      tmpallNA <- which(is.na(bottom) | bottom == 0)
      
      ACCaux <- top / bottom
      ACCaux[tmpallNA] <- NA

      if (conf == TRUE) {
        ACC[iexp, iobs, , , 2] <- ACCaux
        eno <- Mean1Dim( Eno(tmp2, 4), 1)
        
        t <- apply(eno, c(1, 2), 
                   function(x) qt(0.95, x - 2))
        ACC[iexp, iobs, , , 4] <- apply(enot, c(1, 2), function(x)
                 sqrt((x[2] * x[2]) / ((x[2] * x[2]) + x[1] - 2)) )
        correno <- abind(ACCaux, eno, along = 3)
        ACC[iexp, iobs, , , 1] <- apply(correno, c(1, 2), function(x)
                  tanh(atanh(x[1]) + qnorm(0.975) / sqrt(x[2] - 3)) )
        ACC[iexp, iobs, , , 3] <- apply(correno, c(1, 2), function(x)
                  tanh(atanh(x[1]) + qnorm(0.025) / sqrt(x[2] - 3)) )
        ACC[iexp, iobs, , ] <- ACCaux

      top[tmpallNA] = NA
      bottom1[tmpallNA] = NA
      bottom2[tmpallNA] = NA
      MACCaux[iexp, iobs, , , 1] <- top
      MACCaux[iexp, iobs, , , 2] <- bottom1
      MACCaux[iexp, iobs, , , 3] <- bottom2

  # The lines I have introduced below are only to avoid that some NA are
  # called NaN or Inf. It is easier to handle a uniform NA naming outside
  # the functions - Virginie
Virginie Guemas's avatar
Virginie Guemas committed
  # Furthermore, the na.rm should be TRUE to obtain a MACC even if a few
  # start dates are missing - Virginie 
  topfinal <- apply(MACCaux, c(1, 2, 4), function(x) 
Virginie Guemas's avatar
Virginie Guemas committed
                                 sum(x[, 1], na.rm = TRUE) )
               
  bottomfinal <- apply(MACCaux, c(1, 2, 4), function(x) 
Virginie Guemas's avatar
Virginie Guemas committed
               sqrt(sum(x[, 2], na.rm = TRUE) * sum(x[, 3], na.rm = TRUE)))
Virginie Guemas's avatar
Virginie Guemas committed
  tmpNA <- which(is.na(bottomfinal) | bottomfinal == 0)

  MACC <- topfinal / bottomfinal 
  MACC[tmpNA] <- NA