Spread.R 8.18 KB
Newer Older
#'Compute interquartile range, maximum-minimum, standard deviation and median
#'absolute deviation
#'
#'Compute interquartile range, maximum-minimum, standard deviation and median
#'absolute deviation along the list of dimensions provided by the compute_dim 
#'argument (typically along the ensemble member and start date dimension).
#'The confidence interval is computed by bootstrapping by 100 times. The input
#'data can be the output of \code{Load()}, \code{Ano()}, or 
#'\code{Ano_CrossValid()}, for example.
#'
#'@param data A numeric vector or array with named dimensions to compute the 
#'  statistics. The dimensions should at least include 'compute_dim'.
#'@param compute_dim A vector of character strings of the dimension names along 
#'  which to compute the statistics. The default value is 'member'.
#'@param na.rm A logical value indicating if NAs should be removed (TRUE) or
#'  kept (FALSE) for computation. The default value is TRUE.
#'@param conf A logical value indicating whether to compute the confidence 
#'  intervals or not. The default value is TRUE.
#'@param conf.lev A numeric value of the confidence level for the computation. 
#'  The default value is 0.95.
#'@param ncores An integer indicating the number of cores to use for parallel 
#'  computation. The default value is NULL.
#'
#'@return 
#'A list of numeric arrays with the same dimensions as 'data' but without
#''compute_dim' and with the first dimension 'stats'. If 'conf' is TRUE, the 
#'length of 'stats' is 3 corresponding to the lower limit of the confidence 
#'interval, the spread, and the upper limit of the confidence interval. If 
#''conf' is FALSE, the length of 'stats' is 1 corresponding to the spread.
#'\item{$iqr}{
#'  InterQuartile Range.
#'}
#'\item{$maxmin}{
#'  Maximum - Minimum.
#'}
#'\item{$sd}{
#'  Standard Deviation.
#'}
#'\item{$mad}{
#'  Median Absolute Deviation.
#'} 
#'
#'@examples
#'# Load sample data as in Load() example:
#'example(Load)
#'clim <- Clim(sampleData$mod, sampleData$obs)
#'ano_exp <- Ano(sampleData$mod, clim$clim_exp)
#'runmean_months <- 12
#'smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = runmean_months)
#'smooth_ano_exp_m_sub <- smooth_ano_exp - InsertDim(MeanDims(smooth_ano_exp, 'member', 
#'                                                            na.rm = TRUE), 
aho's avatar
aho committed
#'                                                   posdim = 3, 
#'                                                   lendim = dim(smooth_ano_exp)['member'], 
#'                                                   name = 'member')
#'spread <- Spread(smooth_ano_exp_m_sub, compute_dim = c('member', 'sdate'))
#'
#'\donttest{
#'PlotVsLTime(Reorder(spread$iqr, c('dataset', 'stats', 'ftime')), 
#'            toptitle = "Inter-Quartile Range between ensemble members",
#'            ytitle = "K", monini = 11, limits = NULL, 
#'            listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE, 
#'            hlines = c(0), fileout = 'tos_iqr.png')
#'PlotVsLTime(Reorder(spread$maxmin, c('dataset', 'stats', 'ftime')), 
#'            toptitle = "Maximum minus minimum of the members", 
#'            ytitle = "K", monini = 11, limits = NULL, 
#'            listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE, 
#'            hlines = c(0), fileout = 'tos_maxmin.png')
#'PlotVsLTime(Reorder(spread$sd, c('dataset', 'stats', 'ftime')), 
#'            toptitle = "Standard deviation of the members", 
#'            ytitle = "K", monini = 11, limits = NULL, 
#'            listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE, 
#'            hlines = c(0), fileout = 'tos_sd.png')
#'PlotVsLTime(Reorder(spread$mad, c('dataset', 'stats', 'ftime')), 
#'            toptitle = "Median Absolute Deviation of the members",
#'            ytitle = "K", monini = 11, limits = NULL, 
#'            listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE, 
#'            hlines = c(0), fileout = 'tos_mad.png')
#'}
#'
#'@import multiApply
#'@importFrom stats IQR sd mad runif quantile
#'@export
Spread <- function(data, compute_dim = 'member', na.rm = TRUE, 
                   conf = TRUE, conf.lev = 0.95, ncores = NULL) {

  # Check inputs 
  ## data
  if (is.null(data)) {
    stop("Parameter 'data' cannot be NULL.")
  }
  if (!is.numeric(data)) {
    stop("Parameter 'data' must be a numeric array.")
  }
  if (is.null(dim(data))) {  #is vector
    dim(data) <- c(length(data))
    names(dim(data)) <- compute_dim[1]
  }
  if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) {
    stop("Parameter 'data' must have dimension names.")
  }
  ## compute_dim
  if (!is.character(compute_dim)) {
    stop("Parameter 'compute_dim' must be a character vector.")
  }
  if (any(!compute_dim %in% names(dim(data)))) {
    stop("Parameter 'compute_dim' has some element not in 'data' dimension names.")
  }
  ## na.rm
  if (!is.logical(na.rm) | length(na.rm) > 1) {
    stop("Parameter 'na.rm' must be one logical value.")
  }
  ## conf
  if (!is.logical(conf) | length(conf) > 1) {
    stop("Parameter 'conf' must be one logical value.")
  }
  ## conf.lev
  if (!is.numeric(conf.lev) | conf.lev < 0 | conf.lev > 1 | length(conf.lev) > 1) {
    stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.")
  }
  ## ncores
  if (!is.null(ncores)) {
    if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 |
      length(ncores) > 1) {
      stop("Parameter 'ncores' must be a positive integer.")
    }
  } 

  ###############################
  # Calculate Spread

  output <- Apply(list(data),
                  target_dims = compute_dim,
                  fun = .Spread,
                  output_dims = list(iqr = 'stats', maxmin = 'stats', 
                                     sd = 'stats', mad = 'stats'),
                  na.rm = na.rm,
                  conf = conf, conf.lev = conf.lev,
                  ncores = ncores)

  return(output)
}

.Spread <- function(data, compute_dim = 'member', na.rm = TRUE,
                    conf = TRUE, conf.lev = 0.95) {

  # data: compute_dim. [member] or [member, sdate] for example

  # Compute spread
  res_iqr <- IQR(data, na.rm = na.rm)
  res_maxmin <- max(data, na.rm = na.rm) - min(data, na.rm = na.rm)
  res_sd <- sd(data, na.rm = na.rm)
  res_mad <- mad(data, na.rm = na.rm)

  # Compute conf (bootstrapping)
  if (conf) {
    # The output length is 3, [conf.low, spread, conf.high]
    res_iqr <- rep(res_iqr, 3)
    res_maxmin <- rep(res_maxmin, 3)
    res_sd <- rep(res_sd, 3)
    res_mad <- rep(res_mad, 3)

    conf_low <- (1 - conf.lev) / 2
    conf_high <- 1 - conf_low
  
    # Create vector for saving bootstrap result
    iqr_bs <- c()
    maxmin_bs <- c()
    sd_bs <- c()
    mad_bs <- c()

    # bootstrapping for 100 times
    num <- length(data)
    for (jmix in 1:100) {
      drawings <- round(runif(num, 0.5, num + 0.5))
      iqr_bs <- c(iqr_bs, IQR(data[drawings], na.rm = na.rm))
      maxmin_bs <- c(maxmin_bs, max(data[drawings], na.rm = na.rm) - 
                                min(data[drawings], na.rm = na.rm))
      sd_bs <- c(sd_bs, sd(data[drawings], na.rm = na.rm))
      mad_bs <- c(mad_bs, mad(data[drawings], na.rm = na.rm))
    }
    
    # Calculate confidence interval with the bootstrapping results
    res_iqr[1] <- quantile(iqr_bs, conf_low, na.rm = na.rm)
    res_iqr[3] <- quantile(iqr_bs, conf_high, na.rm = na.rm)
    res_maxmin[1] <- res_maxmin[2] + (quantile(maxmin_bs, conf_low, na.rm = na.rm) - 
                                 quantile(maxmin_bs, conf_high, na.rm = na.rm)) / 2
    res_maxmin[3] <- res_maxmin[2] - (quantile(maxmin_bs, conf_low, na.rm = na.rm) - 
                                 quantile(maxmin_bs, conf_high, na.rm = na.rm)) / 2
    res_sd[1] <- quantile(sd_bs, conf_low, na.rm = na.rm)
    res_sd[3] <- quantile(sd_bs, conf_high, na.rm = na.rm)
    res_mad[1] <- res_mad[2] + (quantile(mad_bs, conf_low, na.rm = na.rm) - 
                                quantile(mad_bs, conf_high, na.rm = na.rm))
    res_mad[3] <- res_mad[2] - (quantile(mad_bs, conf_low, na.rm = na.rm) - 
                                quantile(mad_bs, conf_high, na.rm = na.rm))

  }

  # Turn infinite to NA
  res_maxmin[which(is.infinite(res_maxmin))] <- NA

  return(invisible(list(iqr = as.array(res_iqr), maxmin = as.array(res_maxmin), 
                        sd = as.array(res_sd), mad = as.array(res_mad))))