diff --git a/NAMESPACE b/NAMESPACE index 27ebcf01c84c871ebaefdd771b0cb53f9ddda995..83b3be15f431f25977506d0247015710a623e42a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -90,6 +90,7 @@ import(multiApply) import(ncdf4) import(parallel) import(plyr) +importFrom(abind,adrop) importFrom(grDevices,bmp) importFrom(grDevices,col2rgb) importFrom(grDevices,colorRampPalette) diff --git a/R/Clim.R b/R/Clim.R index 8445e46277bcba517dd1256d957dbe3347c1b0fb..2ed1ffe96a4e0dc7fc6e5e7f2b2a4eae72df4b3b 100644 --- a/R/Clim.R +++ b/R/Clim.R @@ -1,159 +1,408 @@ -#'Computes Bias Corrected Climatologies +#'Compute Bias Corrected Climatologies #' -#'This function computes only per-pair climatologies from the experimental -#'and observational matrices output from \code{Load()}. -#'To compute plain climatologies from only experimental or observational -#'data from \code{Load()}, the following code can be used:\cr -#'\code{clim <- array(apply(obs_data, c(1, 4, 5, 6), mean),}\cr -#'\code{ dim = dim(obs_datta)[-c(2, 3)])}\cr -#'The function \code{Clim()} computes per-pair climatologies using one of the -#'following methods: +#'This function computes per-pair climatologies for the experimental +#'and observational data using one of the following methods: #'\enumerate{ #' \item{per-pair method (Garcia-Serrano and Doblas-Reyes, CD, 2012)} #' \item{Kharin method (Karin et al, GRL, 2012)} #' \item{Fuckar method (Fuckar et al, GRL, 2014)} #'} -#'\code{Clim()} computes climatologies using the startdates covered by the -#'whole experiments/observational data sets. The startdates not available for -#'all the data (model and obs) are excluded when computing the climatologies. +#'Per-pair climatology means that only the startdates covered by the +#'whole experiments/observational dataset will be used. In other words, the +#'startdates which are not all available along 'dat_dim' dimension of both +#'the 'exp' and 'obs' are excluded when computing the climatologies. #' -#'@param var_exp Model data: c(nmod/nexp, nmemb/nparam, nsdates, nltime) up to -#' c(nmod/nexp, nmemb/nparam, nsdates, nltime, nlevel, nlat, nlon). -#'@param var_obs Observational data: c(nobs, nmemb, nsdates, nltime) up to -#' c(nobs, nmemb, nsdates, nltime, nlevel, nlat, nlon). -#'@param memb TRUE/FALSE (1 climatology for each member). Default = TRUE. -#'@param kharin TRUE/FALSE (if Kharin method is applied or not). -#' Default = FALSE. -#'@param NDV TRUE/FALSE (if Fuckar method is applied or not). Default = FALSE. +#'@param exp A named numeric array of experimental data, with at least two +#' dimensions 'time_dim' and 'dat_dim'. +#'@param obs A named numeric array of observational data, same dimensions as +#' parameter 'exp' except along 'dat_dim'. +#'@param time_dim A character string indicating the name of dimension along +#' which the climatologies are computed. The default value is 'sdate'. +#'@param dat_dim A character vector indicating the name of the dataset and +#' member dimensions. If data at one startdate (i.e., 'time_dim') are not +#' complete along 'dat_dim', this startdate along 'dat_dim' will be discarded. +#' The default value is "c('dataset', 'member')". +#'@param method A character string indicating the method to be used. The +#' options include 'clim', 'kharin', and 'NDV'. The default value is 'clim'. +#'@param ftime_dim A character string indicating the name of forecast time +#' dimension. Only used when method = 'NDV'. The default value is 'ftime'. +#'@param memb_dim A character string indicating the name of the member +#' dimension. Only used when parameter 'memb' is FALSE. It must be one element +#' in 'dat_dim'. The default value is 'member'. +#'@param memb A logical value indicating whether to remain 'memb_dim' dimension +#' (TRUE) or do ensemble mean over 'memb_dim' (FALSE). The default value is TRUE. +#'@param na.rm A logical value indicating whether to remove NA values along +#' 'time_dim' when calculating climatology (TRUE) or return NA if there is NA +#' along 'time_dim' (FALSE). The default value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. #' #'@return -#'\item{clim_exp}{Array with same dimensions as var_exp except the third -#' (starting dates) and, depending on the parameters, the second (members), -#' which disappear.} -#'\item{clim_obs}{Array with same dimensions as var_obs except the third -#' (starting dates) and, depending on the parameters, the second (members), -#' which disappear.} +#'A list of 2: +#'\item{$clim_exp}{ +#' A numeric array with the same dimensions as parameter 'exp' but +#' dimension 'time_dim' is moved to the first position. If parameter 'method' +#' is 'clim', dimension 'time_dim' is removed. If parameter 'memb' is FALSE, +#' dimension 'memb_dim' is also removed. +#'} +#'\item{$clim_obs}{ +#' A numeric array with the same dimensions as parameter 'exp' +#' except dimension 'time_dim' is removed. If parameter 'memb' is FALSE, +#' dimension 'memb_dim' is also removed. +#'} +#' #'@keywords datagen #'@author History:\cr -#' 0.9 - 2011-03 (V. Guemas, \email{virginie.guemas@@ic3.cat}) - Original code\cr -#' 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@@ic3.cat}) - Formatting to R CRAN +#' 0.9 - 2011-03 (V. Guemas, \email{virginie.guemas@ic3.cat}) - Original code\cr +#' 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Formatting to R CRAN +#' 3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Adopt multiApply feature #'@examples #'# Load sample data as in Load() example: #'example(Load) #'clim <- Clim(sampleData$mod, sampleData$obs) -#' \donttest{ +#'clim2 <- Clim(sampleData$mod, sampleData$obs, method = 'kharin', memb = F) +#'\donttest{ #'PlotClim(clim$clim_exp, clim$clim_obs, #' toptitle = paste('sea surface temperature climatologies'), #' ytitle = 'K', monini = 11, listexp = c('CMIP5 IC3'), #' listobs = c('ERSST'), biglab = FALSE, fileout = 'tos_clim.eps') -#' } +#'} +#'@importFrom abind adrop +#'@import multiApply #'@export -Clim <- function(var_exp, var_obs, memb = TRUE, kharin = FALSE, NDV = FALSE) { - # - # Enlarge the number of dimensions of var_exp and var_obs to 7 if necessary - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dimexp <- dim(var_exp) - dimobs <- dim(var_obs) - if (length(dimexp) < 4 | length(dimobs) < 4) { - stop("At least 4 dim needed : c(nexp/nobs, nmemb, nsdates, nltime)") - } - for (jn in 3:max(length(dimexp), length(dimobs))) { - if (dimexp[jn] != dimobs[jn]) { - stop("Wrong input dimensions") +Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), + method = 'clim', ftime_dim = 'ftime', memb_dim = 'member', + memb = TRUE, na.rm = TRUE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing time_dim and dat_dim.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if(!all(names(dim(exp)) %in% names(dim(obs))) | + !all(names(dim(obs)) %in% names(dim(exp)))) { + stop("Parameter 'exp' and 'obs' must have same dimension name") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## dat_dim + if (!is.character(dat_dim)) { + stop("Parameter 'dat_dim' must be a character vector.") + } + if (!all(dat_dim %in% names(dim(exp))) | !all(dat_dim %in% names(dim(obs)))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") + } + ## method + if (!(method %in% c("clim", "kharin", "NDV"))) { + stop("Parameter 'method' must be one of 'clim', 'kharin' or 'NDV'.") + } + ## ftime_dim + if (!is.character(ftime_dim) | length(ftime_dim) > 1) { + stop("Parameter 'ftime_dim' must be a character string.") + } + if (!ftime_dim %in% names(dim(exp)) | !ftime_dim %in% names(dim(obs))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb + if (!is.logical(memb) | length(memb) > 1) { + stop("Parameter 'memb' must be one logical value.") + } + ## na.rm + if (!is.logical(na.rm) | length(na.rm) > 1) { + stop("Parameter 'na.rm' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + for (i in 1:length(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim[i])] + name_obs <- name_obs[-which(name_obs == dat_dim[i])] + } + if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension expect 'dat_dim'.")) } - var_exp <- Enlarge(var_exp, 7) - var_obs <- Enlarge(var_obs, 7) + + ############################### + # Sort dimension + name_exp <- names(dim(exp)) + name_obs <- names(dim(obs)) + order_obs <- match(name_exp, name_obs) + obs <- s2dverification:::.aperm2(obs, order_obs) - # - # Find common points to compute climatologies - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - tmp <- MeanListDim(var_obs, dims = 5:7, narm = TRUE) - tmp2 <- MeanListDim(var_exp, dims = 5:7, narm = TRUE) - nan <- MeanListDim(tmp, dims = 1:2, narm = FALSE) + Mean1Dim(Mean1Dim(tmp2, 2, - narm = TRUE), - 1, narm = FALSE) - for (jdate in 1:dimexp[3]) { - for (jtime in 1:dimexp[4]) { - if (is.na(nan[jdate, jtime])) { - var_exp[, , jdate, jtime, , , ] <- NA - var_obs[, , jdate, jtime, , , ] <- NA + + ############################### + # Calculate Clim + + #---------------------------------- + # Remove all sdate if not complete along dat_dim + + pos <- rep(0, length(dat_dim)) + for (i in 1:length(dat_dim)) { #[dat, sdate] + ## dat_dim: [dataset, member] + pos[i] <- which(names(dim(obs)) == dat_dim[i]) + } + outrows_exp <- MeanListDim(exp, pos, narm = FALSE) + + MeanListDim(obs, pos, narm = FALSE) + outrows_obs <- outrows_exp + + for (i in 1:length(pos)) { + outrows_exp <- InsertDim(outrows_exp, pos[i], dim(exp)[pos[i]]) + outrows_obs <- InsertDim(outrows_obs, pos[i], dim(obs)[pos[i]]) + } + exp[which(is.na(outrows_exp))] <- NA + obs[which(is.na(outrows_obs))] <- NA + + #----------------------------------- + + if (method == 'clim') { + clim <- Apply(list(exp, obs), + target_dims = c(time_dim, dat_dim), + fun = .Clim, + method = method, time_dim = time_dim, + dat_dim = dat_dim, ftime_dim = ftime_dim, memb_dim = memb_dim, + memb = memb, na.rm = na.rm, + ncores = ncores) + # Add member dimension name back + if (memb) { + if(is.null(names(dim(clim$clim_exp))[1])) { + names(dim(clim$clim_exp))[1] <- memb_dim + names(dim(clim$clim_obs))[1] <- memb_dim } } - } - # - # Compute climatologies - # ~~~~~~~~~~~~~~~~~~~~~~~ - # - out_clim_obs <- Mean1Dim(var_obs, posdim = 3, narm = TRUE) - dim_clim_obs <- dimobs[-3] - - if (kharin == TRUE) { - tmp_obs <- Trend(var_obs, posTR = 3) - tmp_exp <- Trend(var_exp, posTR = 3) - trend_obs <- array(dim = dim(var_exp)) - trend_exp <- array(dim = dim(var_exp)) - for (jdate in 1:dimexp[3]) { - trend_exp[, , jdate, , , , ] <- tmp_exp$trend[, , 4, , , , ] + - jdate * tmp_exp$trend[, , 2, , , , ] - tmp_obs2 <- MeanListDim(tmp_obs$trend,c(2, 1)) - trend_obs[, , jdate, , , , ] <- InsertDim(InsertDim(tmp_obs2[4, , , , ] + - jdate * tmp_obs2[2, , , , ], 1, dimexp[1]), - 2, dimexp[2]) + } else if (method == 'kharin') { + clim <- Apply(list(exp, obs), + target_dims = c(time_dim, dat_dim), + fun = .Clim, + method = method, time_dim = time_dim, + dat_dim = dat_dim, ftime_dim = ftime_dim, memb_dim = memb_dim, + memb = memb, na.rm = na.rm, + ncores = ncores) + + } else if (method == 'NDV') { + clim <- Apply(list(exp, obs), + target_dims = c(time_dim, dat_dim, ftime_dim), + fun = .Clim, + method = method, time_dim = time_dim, + dat_dim = dat_dim, ftime_dim = ftime_dim, memb_dim = memb_dim, + memb = memb, na.rm = na.rm, + ncores = ncores) + } + + return(clim) +} + + +.Clim <- function(exp, obs, method = 'clim', + time_dim = 'sdate', dat_dim = c('dataset', 'member'), + ftime_dim = 'ftime', memb_dim = 'member', memb = TRUE, + na.rm = TRUE) { + + if (method == 'clim') { + # exp: [sdate, dat_dim_exp] + # obs: [sdate, dat_dim_obs] + + clim_exp <- apply(exp, which(names(dim(exp)) != time_dim), + mean, na.rm = na.rm) #average out time_dim + clim_obs <- apply(obs, which(names(dim(obs)) != time_dim), + mean, na.rm = na.rm) #[dat_dim] + + ## member mean + if (!memb) { + if (length(dim(clim_exp)) == 1) { #dim: [member] + clim_exp <- mean(clim_exp, na.rm = TRUE) + clim_obs <- mean(clim_obs, na.rm = TRUE) + } else { + pos <- which(names(dim(clim_exp)) == memb_dim) + pos <- c(1:length(dim(clim_exp)))[-pos] + dim_name <- names(dim(clim_exp)) + clim_exp <- apply(clim_exp, pos, mean, na.rm = TRUE) + clim_obs <- apply(clim_obs, pos, mean, na.rm = TRUE) + if (is.null(names(dim(as.array(clim_exp))))) { + clim_exp <- as.array(clim_exp) + clim_obs <- as.array(clim_obs) + names(dim(clim_exp)) <- dim_name[pos] + names(dim(clim_obs)) <- dim_name[pos] + } + } } - out_clim_exp <- trend_exp - trend_obs + InsertDim(InsertDim(InsertDim( - MeanListDim(out_clim_obs, c(2, 1)), 1, dimexp[1]), 2, - dimexp[2]), 3, dimexp[3]) - dim_clim_exp <- dimexp - } else if (NDV == TRUE) { - iniobs <- InsertDim(SelIndices(var_obs, 4, c(1, 1)), 4, dimobs[4]) - iniexp <- InsertDim(SelIndices(var_exp, 4, c(1, 1)), 4, dimexp[4]) - tmp_obs <- Regression(var_obs, iniobs, posREG = 3) - tmp_exp <- Regression(var_exp, iniexp, posREG = 3) - reg_obs <- array(dim = dim(var_exp)) - reg_exp <- array(dim = dim(var_exp)) - for (jdate in 1:dimexp[3]) { - reg_exp[, , jdate, , , , ] <- tmp_exp$regression[, , 4, , , , ] + - iniexp[, , jdate, , , , ] * - tmp_exp$regression[, , 2, , , , ] - tmp_obs2 <- MeanListDim(tmp_obs$regression,c(2, 1)) - reg_obs[, , jdate, , , , ] <- InsertDim(InsertDim(tmp_obs2[4, , , , ] + - MeanListDim(iniobs,c(2, 1))[jdate, , , , ] * - tmp_obs2[2, , , , ], 1, dimexp[1]), - 2, dimexp[2]) + + } else if (method == 'kharin') { + # exp: [sdate, dat_dim_exp] + # obs: [sdate, dat_dim_obs] + + # obs clim + clim_obs <- apply(obs, which(names(dim(obs)) != time_dim), + mean, na.rm = na.rm) #[dat_dim] + + # exp clim + ##--- NEW trend ---## + tmp_obs <- Trend(data = obs, time_dim = time_dim, interval = 1, + polydeg = 1, conf = FALSE)$trend + tmp_exp <- Trend(data = exp, time_dim = time_dim, interval = 1, + polydeg = 1, conf = FALSE)$trend + # tmp_exp: [stats, dat_dim)] + + tmp_obs_mean <- apply(tmp_obs, 1, mean) #average out dat_dim (dat and member) + #tmp_obs_mean: [stats = 2] + + intercept_exp <- Subset(tmp_exp, 1, 1, drop = 'selected') #[dat_dim] + slope_exp <- Subset(tmp_exp, 1, 2, drop = 'selected') #[dat_dim] + intercept_obs <- array(tmp_obs_mean[1], dim = dim(exp)[-1]) #[dat_dim] + slope_obs <- array(tmp_obs_mean[2], dim = dim(exp)[-1]) #[dat_dim] + trend_exp <- list() + trend_obs <- list() + + for (jdate in 1:dim(exp)[time_dim]) { + trend_exp[[jdate]] <- intercept_exp + jdate * slope_exp + trend_obs[[jdate]] <- intercept_obs + jdate * slope_obs } - out_clim_exp <- reg_exp - reg_obs + InsertDim(InsertDim(InsertDim( - MeanListDim(out_clim_obs, c(2, 1)), 1, dimexp[1]), 2, - dimexp[2]), 3, dimexp[3]) - dim_clim_exp <- dimexp - } else { - out_clim_exp <- Mean1Dim(var_exp, posdim = 3, narm = TRUE) - dim_clim_exp <- dimexp[-3] - } - - if (memb != TRUE) { - out_clim_obs <- Mean1Dim(out_clim_obs, posdim = 2, narm = TRUE) - out_clim_exp <- Mean1Dim(out_clim_exp, posdim = 2, narm = TRUE) - dim_clim_exp <- dim_clim_exp[-2] - dim_clim_obs <- dim_clim_obs[-2] - } - - # - # Reduce the number of dimensions to the original one - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - - clim_exp <- array(dim = dim_clim_exp) - clim_exp[] <- out_clim_exp - clim_obs <- array(dim = dim_clim_obs) - clim_obs[] <- out_clim_obs - - # - # Outputs - # ~~~~~~~~~ - # - invisible(list(clim_exp = clim_exp, clim_obs = clim_obs)) + # turn list into array + trend_exp <- array(unlist(trend_exp), dim = c(dim(exp)[-1], dim(exp)[1])) + trend_obs <- array(unlist(trend_obs), dim = c(dim(exp)[-1], dim(exp)[1])) + len <- length(dim(exp)) + trend_exp <- s2dverification:::.aperm2(trend_exp, c(len, 1:(len - 1))) + trend_obs <- s2dverification:::.aperm2(trend_obs, c(len, 1:(len - 1))) + + clim_obs_mean <- mean(apply(clim_obs, 1, mean)) #average out dat_dim, get a number + clim_obs_mean <- array(clim_obs_mean, dim = dim(exp)) #enlarge it for the next line + clim_exp <- trend_exp - trend_obs + clim_obs_mean + + ## member mean + if (!memb) { + pos <- which(names(dim(clim_exp)) == memb_dim) + pos <- c(1:length(dim(clim_exp)))[-pos] + dim_name <- names(dim(clim_exp)) + clim_exp <- apply(clim_exp, pos, mean, na.rm = TRUE) + clim_obs <- apply(clim_obs, pos, mean, na.rm = TRUE) + if (is.null(names(dim(as.array(clim_exp))))) { + clim_exp <- as.array(clim_exp) + clim_obs <- as.array(clim_obs) + names(dim(clim_exp)) <- dim_name[pos] + names(dim(clim_obs)) <- dim_name[pos] + } + } + + + } else if (method == 'NDV') { + # exp: [sdate, dat_dim, ftime] + # obs: [sdate, dat_dim, ftime] + + # obs clim + clim_obs <- apply(obs, which(names(dim(obs)) != time_dim), + mean, na.rm = na.rm) #[dat_dim, ftime] + + # exp clim + pos_ftime <- length(dim(exp)) #a number + dim_ftime <- dim(exp)[pos_ftime] #c(ftime = 4) + pos_dat <- 2:(length(dim(exp)) - 1) #1 is sdate, last is ftime + dim_dat <- dim(exp)[pos_dat] #c(dataset = 1, member = 3) + + # Create initial data set (i.e., only first ftime) + tmp <- Subset(exp, ftime_dim, 1, drop = 'selected') + ini_exp <- InsertDim(tmp, pos_ftime, dim_ftime) #only first ftime + tmp <- Subset(obs, ftime_dim, 1, drop = 'selected') + ini_obs <- InsertDim(tmp, pos_ftime, dim_ftime) #only first ftime + #ini_: [sdate, dat_dim, ftime] + tmp_exp <- Regression(datay = exp, datax = ini_exp, time_dim = time_dim, + na.action = na.omit, + pval = FALSE, conf = FALSE)$regression + tmp_obs <- Regression(datay = obs, datax = ini_obs, time_dim = time_dim, + na.action = na.omit, + pval = FALSE, conf = FALSE)$regression + #tmp_: [stats = 2, dat_dim, ftime] + tmp_obs_mean <- apply(tmp_obs, c(1, length(dim(tmp_obs))), mean) #average out dat_dim (dat and member) + #tmp_obs_mean: [stats = 2, ftime] + ini_obs_mean <- apply(ini_obs, c(1, length(dim(ini_obs))), mean) #average out dat_dim + #ini_obs_mean: [sdate, ftime] + + # Find intercept and slope + intercept_exp <- Subset(tmp_exp, 1, 1, drop = 'selected') #[dat_dim, ftime] + slope_exp <- Subset(tmp_exp, 1, 2, drop = 'selected') #[dat_dim, ftime] + intercept_obs <- array(tmp_obs_mean[1, ], dim = c(dim_ftime, dim_dat)) + #[ftime, dat_dim] exp + intercept_obs <- s2dverification:::.aperm2(intercept_obs, c(2:length(dim(intercept_obs)), 1)) + #[dat_dim, ftime] exp + slope_obs <- array(tmp_obs_mean[2, ], dim = c(dim_ftime, dim_dat)) + #[ftime, dat_dim] exp + slope_obs <- s2dverification:::.aperm2(slope_obs, c(2:length(dim(slope_obs)), 1)) + #[dat_dim, ftime] exp + + trend_exp <- list() + trend_obs <- list() + for (jdate in 1:dim(exp)[time_dim]) { + tmp <- Subset(ini_exp, time_dim, jdate, drop = 'selected') #[dat_dim, ftime] + trend_exp[[jdate]] <- intercept_exp + tmp * slope_exp #[dat_dim, ftime] + + tmp <- array(ini_obs_mean[jdate, ], dim = c(dim_ftime, dim_dat)) #[ftime, dat_dim] + tmp <- s2dverification:::.aperm2(tmp, c(2:length(dim(tmp)), 1)) #[dat_dim, ftime] + trend_obs[[jdate]] <- intercept_obs + tmp * slope_obs + } + # turn list into array + trend_exp <- array(unlist(trend_exp), dim = c(dim(exp)[-1], dim(exp)[1])) + trend_obs <- array(unlist(trend_obs), dim = c(dim(exp)[-1], dim(exp)[1])) + #trend_: [dat_dim, ftime, sdate] + len <- length(dim(exp)) + trend_exp <- s2dverification:::.aperm2(trend_exp, c(len, 1:(len - 1))) + trend_obs <- s2dverification:::.aperm2(trend_obs, c(len, 1:(len - 1))) + #trend_: [sdate, dat_dim, ftime] + + clim_obs_mean <- apply(clim_obs, length(dim(clim_obs)), mean) #average out dat_dim, [ftime] + clim_obs_mean <- array(clim_obs_mean, dim = c(dim_ftime, dim(exp)[1], dim_dat)) + #[ftime, sdate, dat_dim] + len <- length(dim(clim_obs_mean)) + clim_obs_mean <- s2dverification:::.aperm2(clim_obs_mean, c(2:len, 1)) + #[sdate, dat_dim, ftime] + + clim_exp <- trend_exp - trend_obs + clim_obs_mean + + ## member mean + if (!memb) { + pos <- which(names(dim(clim_exp)) == memb_dim) + pos <- c(1:length(dim(clim_exp)))[-pos] + dim_name <- names(dim(clim_exp)) + clim_exp <- apply(clim_exp, pos, mean, na.rm = TRUE) + clim_obs <- apply(clim_obs, pos, mean, na.rm = TRUE) + if (is.null(names(dim(as.array(clim_exp))))) { + clim_exp <- as.array(clim_exp) + clim_obs <- as.array(clim_obs) + names(dim(clim_exp)) <- dim_name[pos] + names(dim(clim_obs)) <- dim_name[pos] + } + } + + } + + return(list(clim_exp = clim_exp, clim_obs = clim_obs)) } diff --git a/R/Regression.R b/R/Regression.R index 2e736a051c8c011a5e60cd03f37e279f6756d176..1b6ae12a516a9ef8fa23537413f24b62e4a660a6 100644 --- a/R/Regression.R +++ b/R/Regression.R @@ -247,3 +247,4 @@ Regression <- function(datay, datax, time_dim = 'sdate', formula = y ~ x, } } + diff --git a/man/Clim.Rd b/man/Clim.Rd index 1ef5e3dabb76b6bf4fc7b5b3956351de2ef2f265..b17b2ee50131b168664fb80e7f6fc8c8271edd1b 100644 --- a/man/Clim.Rd +++ b/man/Clim.Rd @@ -2,65 +2,91 @@ % Please edit documentation in R/Clim.R \name{Clim} \alias{Clim} -\title{Computes Bias Corrected Climatologies} +\title{Compute Bias Corrected Climatologies} \usage{ -Clim(var_exp, var_obs, memb = TRUE, kharin = FALSE, NDV = FALSE) +Clim(exp, obs, time_dim = "sdate", dat_dim = c("dataset", "member"), + method = "clim", ftime_dim = "ftime", memb_dim = "member", + memb = TRUE, na.rm = TRUE, ncores = NULL) } \arguments{ -\item{var_exp}{Model data: c(nmod/nexp, nmemb/nparam, nsdates, nltime) up to -c(nmod/nexp, nmemb/nparam, nsdates, nltime, nlevel, nlat, nlon).} +\item{exp}{A named numeric array of experimental data, with at least two +dimensions 'time_dim' and 'dat_dim'.} -\item{var_obs}{Observational data: c(nobs, nmemb, nsdates, nltime) up to -c(nobs, nmemb, nsdates, nltime, nlevel, nlat, nlon).} +\item{obs}{A named numeric array of observational data, same dimensions as +parameter 'exp' except along 'dat_dim'.} -\item{memb}{TRUE/FALSE (1 climatology for each member). Default = TRUE.} +\item{time_dim}{A character string indicating the name of dimension along +which the climatologies are computed. The default value is 'sdate'.} -\item{kharin}{TRUE/FALSE (if Kharin method is applied or not). -Default = FALSE.} +\item{dat_dim}{A character vector indicating the name of the dataset and +member dimensions. If data at one startdate (i.e., 'time_dim') are not +complete along 'dat_dim', this startdate along 'dat_dim' will be discarded. +The default value is "c('dataset', 'member')".} -\item{NDV}{TRUE/FALSE (if Fuckar method is applied or not). Default = FALSE.} +\item{method}{A character string indicating the method to be used. The +options include 'clim', 'kharin', and 'NDV'. The default value is 'clim'.} + +\item{ftime_dim}{A character string indicating the name of forecast time +dimension. Only used when method = 'NDV'. The default value is 'ftime'.} + +\item{memb_dim}{A character string indicating the name of the member +dimension. Only used when parameter 'memb' is FALSE. It must be one element +in 'dat_dim'. The default value is 'member'.} + +\item{memb}{A logical value indicating whether to remain 'memb_dim' dimension +(TRUE) or do ensemble mean over 'memb_dim' (FALSE). The default value is TRUE.} + +\item{na.rm}{A logical value indicating whether to remove NA values along +'time_dim' when calculating climatology (TRUE) or return NA if there is NA +along 'time_dim' (FALSE). The default value is TRUE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} } \value{ -\item{clim_exp}{Array with same dimensions as var_exp except the third - (starting dates) and, depending on the parameters, the second (members), - which disappear.} -\item{clim_obs}{Array with same dimensions as var_obs except the third - (starting dates) and, depending on the parameters, the second (members), - which disappear.} +A list of 2: +\item{$clim_exp}{ + A numeric array with the same dimensions as parameter 'exp' but + dimension 'time_dim' is moved to the first position. If parameter 'method' + is 'clim', dimension 'time_dim' is removed. If parameter 'memb' is FALSE, + dimension 'memb_dim' is also removed. +} +\item{$clim_obs}{ + A numeric array with the same dimensions as parameter 'exp' + except dimension 'time_dim' is removed. If parameter 'memb' is FALSE, + dimension 'memb_dim' is also removed. +} } \description{ -This function computes only per-pair climatologies from the experimental -and observational matrices output from \code{Load()}. -To compute plain climatologies from only experimental or observational -data from \code{Load()}, the following code can be used:\cr -\code{clim <- array(apply(obs_data, c(1, 4, 5, 6), mean),}\cr -\code{ dim = dim(obs_datta)[-c(2, 3)])}\cr -The function \code{Clim()} computes per-pair climatologies using one of the -following methods: +This function computes per-pair climatologies for the experimental +and observational data using one of the following methods: \enumerate{ \item{per-pair method (Garcia-Serrano and Doblas-Reyes, CD, 2012)} \item{Kharin method (Karin et al, GRL, 2012)} \item{Fuckar method (Fuckar et al, GRL, 2014)} } -\code{Clim()} computes climatologies using the startdates covered by the -whole experiments/observational data sets. The startdates not available for -all the data (model and obs) are excluded when computing the climatologies. +Per-pair climatology means that only the startdates covered by the +whole experiments/observational dataset will be used. In other words, the +startdates which are not all available along 'dat_dim' dimension of both +the 'exp' and 'obs' are excluded when computing the climatologies. } \examples{ # Load sample data as in Load() example: example(Load) clim <- Clim(sampleData$mod, sampleData$obs) - \donttest{ +clim2 <- Clim(sampleData$mod, sampleData$obs, method = 'kharin', memb = F) +\donttest{ PlotClim(clim$clim_exp, clim$clim_obs, toptitle = paste('sea surface temperature climatologies'), ytitle = 'K', monini = 11, listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE, fileout = 'tos_clim.eps') - } +} } \author{ History:\cr 0.9 - 2011-03 (V. Guemas, \email{virginie.guemas@ic3.cat}) - Original code\cr 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Formatting to R CRAN + 3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Adopt multiApply feature } \keyword{datagen} diff --git a/tests/testthat/test-Ano.R b/tests/testthat/test-Ano.R deleted file mode 100644 index aa3d2796a73c723584af5da880f642a253ddf4a9..0000000000000000000000000000000000000000 --- a/tests/testthat/test-Ano.R +++ /dev/null @@ -1,27 +0,0 @@ -context("Generic tests") -test_that("Sanity checks", { - - var <- array(rnorm(16), c(2, 2, 2, 2)) - names(dim(var)) <- c("memb", "lon", "sdates", "lat") - clim <- apply(var, c(1, 2, 4), mean) - names(dim(clim)) <- NULL - expect_error( - Ano(var, clim), - "Provide dimension names on parameter \'var\' and \'clim\' to avoid ambiguity." - ) - - t <- array(rnorm(76), c(1, 3, 4, 3, 2, 2)) - names(dim(t)) <- c("mod", "memb", "sdates", "ltime", "lon", "lat") - c3 <- Clim(t, t, memb = TRUE)$clim_exp # Clim for each member - c1 <- InsertDim(c3[, 1, ,, ], 1, 1) # clim as if memb=FALSE but identical to member 1 - names(dim(c1)) <- c("mod", "ltime", "lon", "lat") - identical(c1[, , , ], c3[, 1, , , ]) # results in TRUE - a3 <- Ano(t, c3) # ano for each member individually - a1 <- Ano(t, c1) # ano for first member - - expect_equal( - a1[, 1, , , , ], - a3[, 1, , , , ] - ) - -}) diff --git a/tests/testthat/test-Clim.R b/tests/testthat/test-Clim.R new file mode 100644 index 0000000000000000000000000000000000000000..d9617a95272f3d151473c18409229f0b97feb668 --- /dev/null +++ b/tests/testthat/test-Clim.R @@ -0,0 +1,194 @@ +context("s2dverification::Clim tests") + +############################################## + # dat1 + set.seed(1) + exp1 <- array(rnorm(360), dim = c(dataset = 1, member = 3, sdate = 5, + ftime = 3, lon = 2, lat = 4)) + set.seed(2) + obs1 <- array(rnorm(120), dim = c(dataset = 1, member = 1, + ftime = 3, lon = 2, lat = 4, sdate = 5)) + # dat2 + exp2 <- exp1 + set.seed(1) + na <- floor(runif(5, min = 1, max = 360)) + exp2[na] <- NA + obs2 <- obs1 + set.seed(2) + na <- floor(runif(30, min = 1, max = 120)) + obs2[na] <- NA + +############################################## +test_that("1. Input checks", { + + expect_error( + Clim(c(), c()), + "Parameter 'exp' and 'obs' cannot be NULL." + ) + expect_error( + Clim(c('b'), c('a')), + "Parameter 'exp' and 'obs' must be a numeric array." + ) + expect_error( + Clim(c(1:10), c(2:4)), + paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing time_dim and dat_dim.") + ) + expect_error( + Clim(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), + "Parameter 'exp' and 'obs' must have dimension names." + ) + expect_error( + Clim(array(1:10, dim = c(a = 2, c = 5)), array(1:4, dim = c(a = 2, b = 2))), + "Parameter 'exp' and 'obs' must have same dimension name" + ) + expect_error( + Clim(exp1, obs1, method = TRUE), + "Parameter 'method' must be one of 'clim', 'kharin' or 'NDV'." + ) + expect_error( + Clim(exp1, obs1, time_dim = c('sdate','ftime')), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + Clim(exp1, obs1, time_dim = 'asd'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + Clim(exp1, obs1, dat_dim = c(1,2)), + "Parameter 'dat_dim' must be a character vector." + ) + expect_error( + Clim(exp1, obs1, dat_dim = c('member', 'dat')), + "Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + Clim(exp1, obs1, ftime_dim = 4), + "Parameter 'ftime_dim' must be a character string." + ) + expect_error( + Clim(exp1, obs1, ftime_dim = 'f'), + "Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + Clim(exp1, obs1, memb_dim = c('dataset', 'member')), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + Clim(exp1, obs1, memb_dim = 'memb'), + "Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + Clim(exp1, obs1, memb = 'member'), + "Parameter 'memb' must be one logical value." + ) + expect_error( + Clim(exp1, obs1, na.rm = na.omit), + "Parameter 'na.rm' must be one logical value." + ) + expect_error( + Clim(exp1, obs1, ncores = T), + "Parameter 'ncores' must be a positive integer." + ) + expect_error( + Clim(array(1:10, dim = c(dataset = 2, member = 5, sdate = 4, ftime = 3)), + array(1:4, dim = c(dataset = 2, member = 2, sdate = 5, ftime = 3))), + paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension expect 'dat_dim'.") + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { + + expect_equal( + dim(Clim(exp1, obs1)$clim_exp), + c(dataset = 1, member = 3, ftime = 3, lon = 2, lat = 4) + ) + expect_equal( + dim(Clim(exp1, obs1, memb = FALSE)$clim_exp), + c(dataset = 1, ftime = 3, lon = 2, lat = 4) + ) + expect_equal( + dim(Clim(exp1, obs1, time_dim = 'lon')$clim_exp), + c(dataset = 1, member = 3, sdate = 5, ftime = 3, lat = 4) + ) + expect_equal( + dim(Clim(exp1, obs1, method = 'kharin')$clim_exp), + c(sdate = 5, dataset = 1, member = 3, ftime = 3, lon = 2, lat = 4) + ) + expect_equal( + dim(Clim(exp1, obs1, method = 'NDV')$clim_exp), + c(sdate = 5, dataset = 1, member = 3, ftime = 3, lon = 2, lat = 4) + ) + expect_equal( + dim(Clim(exp1, obs1)$clim_obs), + c(dataset = 1, member = 1, ftime = 3, lon = 2, lat = 4) + ) + expect_equal( + dim(Clim(exp1, obs1, method = 'kharin')$clim_obs), + c(dataset = 1, member = 1, ftime = 3, lon = 2, lat = 4) + ) + expect_equal( + dim(Clim(exp1, obs1, method = 'NDV')$clim_obs), + c(dataset = 1, member = 1, ftime = 3, lon = 2, lat = 4) + ) + expect_equal( + (Clim(exp1, obs1)$clim_obs)[1:5], + c(0.14831161, -0.60462627, 0.06609153, -0.23378059, 0.50553522), + tolerance = 0.001 + ) + expect_equal( + (Clim(exp1, obs1, memb = FALSE)$clim_exp)[1:5], + c(0.10084284, 0.06407350, 0.09028584, 0.17526332, 0.18004387), + tolerance = 0.001 + ) + expect_equal( + max(Clim(exp1, obs1)$clim_exp, na.rm = T), + 1.026186, + tolerance = 0.001 + ) + expect_equal( + max(Clim(exp1, obs1, method = 'kharin')$clim_exp, na.rm = T), + 2.282634, + tolerance = 0.001 + ) + expect_equal( + min(Clim(exp1, obs1, method = 'NDV')$clim_exp, na.rm = T), + -4.025745, + tolerance = 0.001 + ) + +}) + +############################################## +test_that("3. Output checks: dat2", { + + expect_equal( + (Clim(exp2, obs2)$clim_obs)[1:5], + c(0.23142987, -0.60462627, -0.03669491, -0.14193572, 0.61163024), + tolerance = 0.001 + ) + expect_equal( + (Clim(exp2, obs2)$clim_exp)[1:5], + c(0.01054951, -0.04744191, -0.03533071, 0.14149945, 0.02359945), + tolerance = 0.001 + ) + expect_equal( + length(which(is.na(Clim(exp2, obs2, na.rm = FALSE)$clim_exp))), + 57 + ) + expect_equal( + length(which(is.na(Clim(exp2, obs2, na.rm = FALSE)$clim_obs))), + 19 + ) + expect_equal( + length(which(is.na(Clim(exp2, obs2)$clim_obs))), + 0 + ) + +}) + +############################################## +