Clim.R

#'Compute Bias Corrected Climatologies
#'
#'This function computes per-pair climatologies for the experimental 
#'and observational data using one of the following methods:
#'\enumerate{
#'  \item{per-pair method (Garcia-Serrano and Doblas-Reyes, CD, 2012 https://doi.org/10.1007/s00382-012-1413-1)}
#'  \item{Kharin method (Kharin et al, GRL, 2012 https://doi.org/10.1029/2012GL052647)}
#'  \item{Fuckar method (Fuckar et al, GRL, 2014 https://doi.org/10.1002/2014GL060815)}
#'}
#'Per-pair climatology means that only the startdates covered by the 
#'whole experiments/observational dataset will be used. In other words, the 
#'startdates which are not all available along 'dat_dim' dimension of both
#'the 'exp' and 'obs' are excluded when computing the climatologies.
#'Kharin method is the linear trend bias correction method, and Fuckar method 
#'is the initial condition bias correction method. The two methods both do the
#'per-pair correction beforehand.
#'
#'@param exp A named numeric array of experimental data with at least dimension
#'  'time_dim'.
#'@param obs A named numeric array of observational data that has the same 
#'  dimension as 'exp' except 'dat_dim'.
#'@param time_dim A character string indicating the name of dimension along  
#'  which the climatologies are computed. The default value is 'sdate'.
#'@param dat_dim A character vector indicating the name of the dataset and 
#'  member dimensions. If data at one startdate (i.e., 'time_dim') are not 
#'  complete along 'dat_dim', this startdate along 'dat_dim' will be discarded.
#'  If there is no dataset dimension, it can be NULL, however, it will be more 
#'  efficient to simply use mean() to do the calculation. The default value is 
#'  "c('dataset', 'member')".
#'@param method A character string indicating the method to be used. The 
#'  options include 'clim' (per-pair method), 'kharin' (Kharin method), and 
#'  'NDV' (Fuckar method). The default value is 'clim'.
#'@param ftime_dim A character string indicating the name of forecast time
#'  dimension. Only used when method = 'NDV'. The default value is 'ftime'.
#'@param memb A logical value indicating whether to remain 'memb_dim' dimension
#'  (TRUE) or do ensemble mean over 'memb_dim' (FALSE). The default value is 
#'  TRUE.
#'@param memb_dim A character string indicating the name of the member 
#'  dimension. Only used when parameter 'memb' is FALSE. It must be one element
#'  in 'dat_dim'. The default value is 'member'.
#'@param na.rm A logical value indicating whether to remove NA values along 
#'  'time_dim' when calculating climatology (TRUE) or return NA if there is NA 
#'  along 'time_dim' (FALSE). The default value is TRUE.
#'@param ncores An integer indicating the number of cores to use for parallel 
#'  computation. The default value is NULL.
#'
#'@return
#'A list of 2:
#'\item{$clim_exp}{
#'  A numeric array with the same dimensions as parameter 'exp' but 
#'  dimension 'time_dim' is moved to the first position. If parameter 'method'
#'  is 'clim', dimension 'time_dim' is removed. If parameter 'memb' is FALSE,
#'  dimension 'memb_dim' is also removed.
#'}
#'\item{$clim_obs}{
#'  A numeric array with the same dimensions as  parameter 'obs' 
#'  except dimension 'time_dim' is removed. If parameter 'memb' is FALSE,
#'  dimension 'memb_dim' is also removed.
#'}
#'
#'@examples
#'# Load sample data as in Load() example:
#'example(Load)
#'clim <- Clim(sampleData$mod, sampleData$obs)
#'clim2 <- Clim(sampleData$mod, sampleData$obs, method = 'kharin', memb = FALSE)
#'\dontrun{
#'PlotClim(clim$clim_exp, clim$clim_obs, 
#'         toptitle = paste('sea surface temperature climatologies'), 
#'         ytitle = 'K', monini = 11, listexp = c('CMIP5 IC3'), 
#'         listobs = c('ERSST'), biglab = FALSE)
#'}
#'@importFrom abind adrop
#'@importFrom ClimProjDiags Subset
#'@import multiApply
#'@export
Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), 
                 method = 'clim', ftime_dim = 'ftime', memb = TRUE,
                  memb_dim = 'member', na.rm = TRUE, ncores = NULL) {

  # Check inputs 
  ## exp and obs (1)
  if (is.null(exp) | is.null(obs)) {
    stop("Parameter 'exp' and 'obs' cannot be NULL.")
  }
  if (!is.numeric(exp) | !is.numeric(obs)) {
    stop("Parameter 'exp' and 'obs' must be a numeric array.")
  }
  if (is.null(dim(exp)) | is.null(dim(obs))) {
    stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ",
                "containing time_dim and dat_dim."))
  }
  if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) |
      any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) {
    stop("Parameter 'exp' and 'obs' must have dimension names.")
  }
  ## time_dim
  if (!is.character(time_dim) | length(time_dim) > 1) {
    stop("Parameter 'time_dim' must be a character string.")
  }
  if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) {
    stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.")
  }
  ## dat_dim
  if (!is.null(dat_dim)) {
    if (!is.character(dat_dim)) {
      stop("Parameter 'dat_dim' must be a character vector.")
    }
    # Check if dat_dim is in exp
    if (!all(dat_dim %in% names(dim(exp)))) {
      stop("Parameter 'dat_dim' is not found in 'exp' dimensions.")
    }
    # If dat_dim is not in obs, add it in
    if (any(!dat_dim %in% names(dim(obs)))) {
      reset_obs_dim <- TRUE
      ori_obs_dim <- dim(obs)
      dim(obs) <- c(dim(obs), rep(1, length(dat_dim[which(!dat_dim %in% names(dim(obs)))])))
      names(dim(obs)) <- c(names(ori_obs_dim), dat_dim[which(!dat_dim %in% names(dim(obs)))])
    } else {
      reset_obs_dim <- FALSE
    }
  } else {
    reset_obs_dim <- FALSE
  } 
  ## method
  if (!(method %in% c("clim", "kharin", "NDV"))) {
    stop("Parameter 'method' must be one of 'clim', 'kharin' or 'NDV'.")
  }
  ## ftime_dim
  if (method == "NDV") {
    if (!is.character(ftime_dim) | length(ftime_dim) > 1) {
      stop("Parameter 'ftime_dim' must be a character string.")
    }
    if (!ftime_dim %in% names(dim(exp)) | !ftime_dim %in% names(dim(obs))) {
      stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.")
    }
  }
  ## memb
  if (!is.logical(memb) | length(memb) > 1) {
    stop("Parameter 'memb' must be one logical value.")
  }
  ## memb_dim
  if (!is.null(memb_dim) & !memb) {
    if (!is.character(memb_dim) | length(memb_dim) > 1) {
      stop("Parameter 'memb_dim' must be a character string.")
    }
    if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) {
      stop("Parameter 'memb_dim' is not found in 'exp' dimension.")
    }
    if (!memb_dim %in% dat_dim)
      stop("Parameter 'memb_dim' must be one element in parameter 'dat_dim'.")
  } else if (is.null(memb_dim) & !memb) {
    memb <- TRUE
  }
  ## na.rm
  if (!is.logical(na.rm) | length(na.rm) > 1) {
    stop("Parameter 'na.rm' must be one logical value.")
  }
  ## ncores
  if (!is.null(ncores)) {
    if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 |
      length(ncores) > 1) {
      stop("Parameter 'ncores' must be a positive integer.")
    }
  } 
  ## exp and obs (2)
  name_exp <- sort(names(dim(exp)))
  name_obs <- sort(names(dim(obs)))
  if (!is.null(dat_dim)) {
    for (i in 1:length(dat_dim)) {
      name_exp <- name_exp[-which(name_exp == dat_dim[i])]
      name_obs <- name_obs[-which(name_obs == dat_dim[i])]
    }
  }
  if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) {
    stop(paste0("Parameter 'exp' and 'obs' must have the same dimensions ",
                "except 'dat_dim'."))
  }

  ###############################
  # Sort dimension
  name_exp <- names(dim(exp))
  name_obs <- names(dim(obs))
  order_obs <- match(name_exp, name_obs)
  obs <- Reorder(obs, order_obs)
  

  ###############################
  # Calculate Clim

  #----------------------------------
  # Per-pair: Remove all sdate if not complete along dat_dim
  if (!is.null(dat_dim)) {
    pos <- rep(0, length(dat_dim))
    for (i in 1:length(dat_dim)) {  #[dat, sdate]
      ## dat_dim: [dataset, member]
      pos[i] <- which(names(dim(obs)) == dat_dim[i])
    }
    outrows_exp <- MeanDims(exp, pos, na.rm = FALSE) + 
                   MeanDims(obs, pos, na.rm = FALSE)
    outrows_obs <- outrows_exp

    for (i in 1:length(pos)) {
      outrows_exp <- InsertDim(outrows_exp, pos[i], dim(exp)[pos[i]])
      outrows_obs <- InsertDim(outrows_obs, pos[i], dim(obs)[pos[i]])
    }
    exp[which(is.na(outrows_exp))] <- NA
    obs[which(is.na(outrows_obs))] <- NA
  }

  #-----------------------------------

  if (method == 'clim') {
    clim <- Apply(list(exp, obs), 
                  target_dims = c(time_dim, dat_dim),
                  fun = .Clim,
                  method = method, time_dim = time_dim, 
                  dat_dim = dat_dim, memb_dim = memb_dim,
                  memb = memb, na.rm = na.rm, ncores_input = ncores,
                  ncores = ncores)

  } else if (method == 'kharin') {
    clim <- Apply(list(exp, obs),
                  target_dims = c(time_dim, dat_dim),
                  fun = .Clim,
                  method = method, time_dim = time_dim,
                  dat_dim = dat_dim, ftime_dim = ftime_dim, memb_dim = memb_dim,
                  memb = memb, na.rm = na.rm, ncores_input = ncores,
                  ncores = ncores)

  } else if (method == 'NDV') {
    clim <- Apply(list(exp, obs),
                  target_dims = c(time_dim, dat_dim, ftime_dim),
                  fun = .Clim,
                  method = method, time_dim = time_dim,
                  dat_dim = dat_dim, ftime_dim = ftime_dim, memb_dim = memb_dim,
                  memb = memb, na.rm = na.rm, ncores_input = ncores,
                  ncores = ncores)
  }

  # Remove dat_dim in obs if obs doesn't have at first place
  if (reset_obs_dim) {
    clim_obs_dim <- ori_obs_dim[-which(names(ori_obs_dim) == time_dim)]
    if (!memb & memb_dim %in% names(clim_obs_dim)) {
      clim_obs_dim <- clim_obs_dim[-which(names(clim_obs_dim) == memb_dim)]
    }
    if (is.integer(clim_obs_dim) & length(clim_obs_dim) == 0) {
     clim$clim_obs <- as.vector(clim$clim_obs)
    } else {
      clim$clim_obs <- array(clim$clim_obs, dim = clim_obs_dim)
    }
  } 

  return(clim)
}


.Clim <- function(exp, obs, method = 'clim',
                  time_dim = 'sdate', dat_dim = c('dataset', 'member'),
                  ftime_dim = 'ftime', memb_dim = 'member', memb = TRUE,
                  na.rm = TRUE, ncores_input = NULL) {

  if (method == 'clim') {
    if (!is.null(dat_dim)) {
    # exp: [sdate, dat_dim_exp]
    # obs: [sdate, dat_dim_obs]

      clim_exp <- apply(exp, which(names(dim(exp)) != time_dim),
                        mean, na.rm = na.rm) #average out time_dim
      clim_obs <- apply(obs, which(names(dim(obs)) != time_dim),
                        mean, na.rm = na.rm) #[dat_dim]
  
      if (is.null(dim(clim_exp))) {
        dim(clim_exp) <- length(clim_exp)
        names(dim(clim_exp)) <- dat_dim
        dim(clim_obs) <- length(clim_obs)
        names(dim(clim_obs)) <- dat_dim
      }
  
    ## ensemble mean
      if (!memb) {
        if (length(dim(clim_exp)) == 1) {   #dim: [member]
          clim_exp <- mean(clim_exp, na.rm = TRUE)    
          clim_obs <- mean(clim_obs, na.rm = TRUE)
        } else {
          dim_name <- names(dim(clim_exp))
          pos <- c(1:length(dim(clim_exp)))[-which(dim_name == memb_dim)]
          clim_exp <- apply(clim_exp, pos, mean, na.rm = TRUE) 
          clim_obs <- apply(clim_obs, pos, mean, na.rm = TRUE) 
          if (is.null(dim(clim_exp))) {
            dim(clim_exp) <- length(clim_exp)
            dim(clim_obs) <- length(clim_obs)
            names(dim(clim_exp)) <- dim_name[pos]
            names(dim(clim_obs)) <- dim_name[pos]
          }
        }
      }
    } else {  #dat_dim = NULL
      clim_exp <- mean(exp)
      clim_obs <- mean(obs)
    }

  } else if (method == 'kharin') {
  # exp: [sdate, dat_dim_exp]
  # obs: [sdate, dat_dim_obs]

  # obs clim
    if (!is.null(dat_dim)) {
      clim_obs <- apply(obs, which(names(dim(obs)) != time_dim), 
                        mean, na.rm = na.rm) #[dat_dim]
      if (is.null(dim(clim_obs))) {
        dim(clim_obs) <- length(clim_obs)
        names(dim(clim_obs)) <- dat_dim
      }
    } else {
      clim_obs <- mean(obs)
    }

  # exp clim
  ##--- NEW trend ---##
    tmp_obs <- Trend(data = obs, time_dim = time_dim, interval = 1,
                     polydeg = 1, conf = FALSE, ncores = ncores_input)$trend 
    tmp_exp <- Trend(data = exp, time_dim = time_dim, interval = 1, 
                     polydeg = 1, conf = FALSE, ncores = ncores_input)$trend
    # tmp_exp: [stats, dat_dim]
    tmp_obs_mean <- apply(tmp_obs, 1, mean)  #average out dat_dim (dat and member)
    #tmp_obs_mean: [stats = 2]
    if (!is.null(dat_dim)) {
      intercept_exp <- Subset(tmp_exp, 1, 1, drop = 'selected')  #[dat_dim]
      slope_exp <- Subset(tmp_exp, 1, 2, drop = 'selected')  #[dat_dim]
      intercept_obs <- array(tmp_obs_mean[1], dim = dim(exp)[-1])  #[dat_dim]
      slope_obs <- array(tmp_obs_mean[2], dim = dim(exp)[-1])  #[dat_dim]
    } else {
      intercept_exp <- tmp_exp[1]
      slope_exp <- tmp_exp[2]
      intercept_obs <- tmp_obs_mean[1]
      slope_obs <- tmp_obs_mean[2]
    }
    trend_exp <- list()
    trend_obs <- list()
    for (jdate in 1:dim(exp)[time_dim]) {
      trend_exp[[jdate]] <- intercept_exp + jdate * slope_exp
      trend_obs[[jdate]] <- intercept_obs + jdate * slope_obs
    }
    # turn list into array
    trend_exp <- array(unlist(trend_exp), dim = c(dim(exp)[-1], dim(exp)[1]))
    trend_obs <- array(unlist(trend_obs), dim = c(dim(exp)[-1], dim(exp)[1]))
    if (!is.null(dat_dim)) {
      len <- length(dim(exp))
      trend_exp <- Reorder(trend_exp, c(len, 1:(len - 1)))
      trend_obs <- Reorder(trend_obs, c(len, 1:(len - 1)))
    }

    # average out dat_dim, get a number
    if (is.null(dim(clim_obs))) {
      clim_obs_mean <- mean(clim_obs)
    } else {
      clim_obs_mean <- mean(apply(clim_obs, 1, mean))
    }
    clim_obs_mean <- array(clim_obs_mean, dim = dim(exp)) #enlarge it for the next line
    clim_exp <- trend_exp - trend_obs + clim_obs_mean

    ## member mean
    if (!memb) {
      pos_exp <- c(1:length(dim(clim_exp)))[-which(names(dim(clim_exp)) == memb_dim)]
      pos_obs <- c(1:length(dim(clim_obs)))[-which(names(dim(clim_obs)) == memb_dim)]
      tmp_dim_exp <- dim(clim_exp)
      tmp_dim_obs <- dim(clim_obs)
      clim_exp <- apply(clim_exp, pos_exp, mean, na.rm = TRUE)
      if (is.integer(pos_obs) & length(pos_obs) == 0) {
        clim_obs <- mean(clim_obs)
      } else {
        clim_obs <- apply(clim_obs, pos_obs, mean, na.rm = TRUE)
      }
      if (is.null(dim(clim_exp))) {
        clim_exp <- as.array(clim_exp)
        dim(clim_exp) <- tmp_dim_exp[pos_exp]
      }
      if (is.null(dim(clim_obs)) & !(is.integer(pos_obs) & length(pos_obs) == 0)) {
        clim_obs <- as.array(clim_obs)
        dim(clim_obs) <- tmp_dim_obs[pos_obs]
      }
    }


  } else if (method == 'NDV') {
  # exp: [sdate, dat_dim, ftime]
  # obs: [sdate, dat_dim, ftime]

    # obs clim
    clim_obs <- apply(obs, which(names(dim(obs)) != time_dim),
                      mean, na.rm = na.rm) #[(dat_dim), ftime]

    if (is.null(dim(clim_obs))) {
      dim(clim_obs) <- length(clim_obs)
      names(dim(clim_obs)) <- c(dat_dim, ftime_dim)
    }

    # exp clim
    pos_ftime <- length(dim(exp))  #a number
    dim_ftime <- dim(exp)[pos_ftime]  #c(ftime = 4)
    if (!is.null(dat_dim)) {
      pos_dat <- 2:(length(dim(exp)) - 1)  #1 is sdate, last is ftime
      dim_dat <- dim(exp)[pos_dat]  #c(dataset = 1, member = 3)
    } else {
      dim_dat <- NULL
    }

    # Create initial data set (i.e., only first ftime)
    tmp <- Subset(exp, ftime_dim, 1, drop = 'selected')
    ini_exp <- InsertDim(tmp, pos_ftime, dim_ftime) #only first ftime
    tmp <- Subset(obs, ftime_dim, 1, drop = 'selected')
    ini_obs <- InsertDim(tmp, pos_ftime, dim_ftime) #only first ftime
    #ini_: [sdate, dat_dim, ftime]
    tmp_exp <- Regression(datay = exp, datax = ini_exp, reg_dim = time_dim,
                          na.action = na.omit,
                          pval = FALSE, conf = FALSE, ncores = ncores_input)$regression
    tmp_obs <- Regression(datay = obs, datax = ini_obs, reg_dim = time_dim, 
                          na.action = na.omit, 
                          pval = FALSE, conf = FALSE, ncores = ncores_input)$regression
    #tmp_: [stats = 2, dat_dim, ftime]
    tmp_obs_mean <- apply(tmp_obs, c(1, length(dim(tmp_obs))), mean)  #average out dat_dim (dat and member)
    #tmp_obs_mean: [stats = 2, ftime]
    ini_obs_mean <- apply(ini_obs, c(1, length(dim(ini_obs))), mean)  #average out dat_dim
    #ini_obs_mean: [sdate, ftime]

    # Find intercept and slope
    intercept_exp <- Subset(tmp_exp, 1, 1, drop = 'selected')  #[dat_dim, ftime]
    slope_exp <- Subset(tmp_exp, 1, 2, drop = 'selected')  #[dat_dim, ftime]
    intercept_obs <- array(tmp_obs_mean[1, ], dim = c(dim_ftime, dim_dat)) #[ftime, dat_dim] exp
    if (!is.null(dat_dim)) {
      intercept_obs <- Reorder(intercept_obs, c(2:length(dim(intercept_obs)), 1)) #[dat_dim, ftime] exp
    } 
    slope_obs <- array(tmp_obs_mean[2, ], dim = c(dim_ftime, dim_dat)) #[ftime, dat_dim] exp
    if (!is.null(dat_dim)) {
      slope_obs <- Reorder(slope_obs, c(2:length(dim(slope_obs)), 1)) #[dat_dim, ftime] exp
    }

    trend_exp <- list()
    trend_obs <- list()
    for (jdate in 1:dim(exp)[time_dim]) {
      tmp <- Subset(ini_exp, time_dim, jdate, drop = 'selected')  #[dat_dim, ftime]
      trend_exp[[jdate]] <- intercept_exp + tmp * slope_exp  #[dat_dim, ftime]

      tmp <- array(ini_obs_mean[jdate, ], dim = c(dim_ftime, dim_dat))  #[ftime, dat_dim]
      if (!is.null(dat_dim)) {
        tmp <- Reorder(tmp, c(2:length(dim(tmp)), 1))  #[dat_dim, ftime]
      }
      trend_obs[[jdate]] <- intercept_obs + tmp * slope_obs
    }
    # turn list into array
    trend_exp <- array(unlist(trend_exp), dim = c(dim(exp)[-1], dim(exp)[1]))
    trend_obs <- array(unlist(trend_obs), dim = c(dim(exp)[-1], dim(exp)[1]))
    #trend_: [dat_dim, ftime, sdate]
    len <- length(dim(exp))
    trend_exp <- Reorder(trend_exp, c(len, 1:(len - 1)))
    trend_obs <- Reorder(trend_obs, c(len, 1:(len - 1)))
    #trend_: [sdate, dat_dim, ftime]
    if (is.null(dim(clim_obs))) {
      clim_obs_mean <- clim_obs #[ftime]
    } else {
    clim_obs_mean <- apply(clim_obs, length(dim(clim_obs)), mean)  #average out dat_dim, [ftime]
    }
    clim_obs_mean <- array(clim_obs_mean, dim = c(dim_ftime, dim(exp)[1], dim_dat))
    #[ftime, sdate, dat_dim]
    len <- length(dim(clim_obs_mean))
    clim_obs_mean <- Reorder(clim_obs_mean, c(2:len, 1))
    #[sdate, dat_dim, ftime]

    clim_exp <- trend_exp - trend_obs + clim_obs_mean

    ## member mean
    if (!memb) {
      pos_exp <- c(1:length(dim(clim_exp)))[-which(names(dim(clim_exp)) == memb_dim)]
      pos_obs <- c(1:length(dim(clim_obs)))[-which(names(dim(clim_obs)) == memb_dim)]

      tmp_dim_exp <- dim(clim_exp)
      tmp_dim_obs <- dim(clim_obs)
      clim_exp <- apply(clim_exp, pos_exp, mean, na.rm = TRUE)
      if (is.integer(pos_obs) & length(pos_obs) == 0) {
        clim_obs <- mean(clim_obs)
      } else {
        clim_obs <- apply(clim_obs, pos_obs, mean, na.rm = TRUE)
      }
      if (is.null(dim(clim_exp))) {
        clim_exp <- as.array(clim_exp)
        dim(clim_exp) <- tmp_dim_exp[pos_exp]
      }
      if (is.null(dim(clim_obs)) & !(is.integer(pos_obs) & length(pos_obs) == 0)) {
        clim_obs <- as.array(clim_obs)
        dim(clim_obs) <- tmp_dim_obs[pos_obs]
      }
    }

  }

  return(list(clim_exp = clim_exp, clim_obs = clim_obs))
}