From 4849b135e75d3a63927c8c313dde7c161e828251 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 4 Dec 2019 18:00:05 +0100 Subject: [PATCH 01/10] basic clim working with multiApply --- R/Clim.R | 220 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 118 insertions(+), 102 deletions(-) diff --git a/R/Clim.R b/R/Clim.R index 8445e462..0bf2d054 100644 --- a/R/Clim.R +++ b/R/Clim.R @@ -26,6 +26,7 @@ #' Default = FALSE. #'@param NDV TRUE/FALSE (if Fuckar method is applied or not). Default = FALSE. #' +#'@importFrom abind adrop #'@return #'\item{clim_exp}{Array with same dimensions as var_exp except the third #' (starting dates) and, depending on the parameters, the second (members), @@ -48,112 +49,127 @@ #' listobs = c('ERSST'), biglab = FALSE, fileout = 'tos_clim.eps') #' } #'@export -Clim <- function(var_exp, var_obs, memb = TRUE, kharin = FALSE, NDV = FALSE) { - # - # Enlarge the number of dimensions of var_exp and var_obs to 7 if necessary - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dimexp <- dim(var_exp) - dimobs <- dim(var_obs) - if (length(dimexp) < 4 | length(dimobs) < 4) { - stop("At least 4 dim needed : c(nexp/nobs, nmemb, nsdates, nltime)") - } - for (jn in 3:max(length(dimexp), length(dimobs))) { - if (dimexp[jn] != dimobs[jn]) { - stop("Wrong input dimensions") +Clim2 <- function(exp, obs, memb = TRUE, method = 'clim', + sdate_dim = 'sdates', member_dim = 'member') { + #check memb + if (!is.logical(memb)) { + stop("Parameter 'memb' must be logical.") + } + if (length(memb) > 1) { + warning("Parameter 'memb' has length greater than 1 and only the ", + "first element will be used.") + memb = memb[1] + } + #check method + if (!is.character(method)) { + stop("Parameter 'method' must be a character string indicating one", + " of 'kharin', 'NDV' or 'clim' methods.") + } + if (length(method) > 1) { + warning("Parameter 'method' has length greater than 1 and only the ", + "first element will be used.") + method = method[1] + } + #check exp + if (is.null(exp)) { + stop("Parameter 'exp' cannot be NULL.") } - } - var_exp <- Enlarge(var_exp, 7) - var_obs <- Enlarge(var_obs, 7) + + if (is.vector(exp)) { + dim(exp) <- c(length(exp)) + if (memb) { + names(dim(exp)) <- 'member' + } else { + if (is.vector(obs)) { + names(dim(exp)) <- 'Dim1' + } else if (length(dim(obs)) == 1) { + names(dim(exp)) <- names(dim(obs)) + } else { + stop("Parameter 'exp' and 'obs' must be arrays with named", + " dimensions.") + } + } + } + exp_dims <- dim(exp) - # - # Find common points to compute climatologies - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - tmp <- MeanListDim(var_obs, dims = 5:7, narm = TRUE) - tmp2 <- MeanListDim(var_exp, dims = 5:7, narm = TRUE) - nan <- MeanListDim(tmp, dims = 1:2, narm = FALSE) + Mean1Dim(Mean1Dim(tmp2, 2, - narm = TRUE), - 1, narm = FALSE) - for (jdate in 1:dimexp[3]) { - for (jtime in 1:dimexp[4]) { - if (is.na(nan[jdate, jtime])) { - var_exp[, , jdate, jtime, , , ] <- NA - var_obs[, , jdate, jtime, , , ] <- NA - } + #check obs + if (is.null(obs)) { + stop("Parameter 'obs' cannot be NULL.") } - } - - # - # Compute climatologies - # ~~~~~~~~~~~~~~~~~~~~~~~ - # - out_clim_obs <- Mean1Dim(var_obs, posdim = 3, narm = TRUE) - dim_clim_obs <- dimobs[-3] - - if (kharin == TRUE) { - tmp_obs <- Trend(var_obs, posTR = 3) - tmp_exp <- Trend(var_exp, posTR = 3) - trend_obs <- array(dim = dim(var_exp)) - trend_exp <- array(dim = dim(var_exp)) - for (jdate in 1:dimexp[3]) { - trend_exp[, , jdate, , , , ] <- tmp_exp$trend[, , 4, , , , ] + - jdate * tmp_exp$trend[, , 2, , , , ] - tmp_obs2 <- MeanListDim(tmp_obs$trend,c(2, 1)) - trend_obs[, , jdate, , , , ] <- InsertDim(InsertDim(tmp_obs2[4, , , , ] + - jdate * tmp_obs2[2, , , , ], 1, dimexp[1]), - 2, dimexp[2]) + if (is.vector(obs)) { + dim(obs) <- c(length(obs)) + if (memb) { + dim(obs) <- c(member = 1, Dim1 = length(obs)) + } else { + if (length(dim(exp)) == 1) { + names(dim(obs)) <- names(exp_dims) + } else { + stop("Parameter 'obs' must have dimension names.") + } + } } - out_clim_exp <- trend_exp - trend_obs + InsertDim(InsertDim(InsertDim( - MeanListDim(out_clim_obs, c(2, 1)), 1, dimexp[1]), 2, - dimexp[2]), 3, dimexp[3]) - dim_clim_exp <- dimexp - } else if (NDV == TRUE) { - iniobs <- InsertDim(SelIndices(var_obs, 4, c(1, 1)), 4, dimobs[4]) - iniexp <- InsertDim(SelIndices(var_exp, 4, c(1, 1)), 4, dimexp[4]) - tmp_obs <- Regression(var_obs, iniobs, posREG = 3) - tmp_exp <- Regression(var_exp, iniexp, posREG = 3) - reg_obs <- array(dim = dim(var_exp)) - reg_exp <- array(dim = dim(var_exp)) - for (jdate in 1:dimexp[3]) { - reg_exp[, , jdate, , , , ] <- tmp_exp$regression[, , 4, , , , ] + - iniexp[, , jdate, , , , ] * - tmp_exp$regression[, , 2, , , , ] - tmp_obs2 <- MeanListDim(tmp_obs$regression,c(2, 1)) - reg_obs[, , jdate, , , , ] <- InsertDim(InsertDim(tmp_obs2[4, , , , ] + - MeanListDim(iniobs,c(2, 1))[jdate, , , , ] * - tmp_obs2[2, , , , ], 1, dimexp[1]), - 2, dimexp[2]) + obs_dims <- dim(obs) + if (is.null(names(obs_dims))) { + stop("Parameter 'obs' must have dimension names.") } - out_clim_exp <- reg_exp - reg_obs + InsertDim(InsertDim(InsertDim( - MeanListDim(out_clim_obs, c(2, 1)), 1, dimexp[1]), 2, - dimexp[2]), 3, dimexp[3]) - dim_clim_exp <- dimexp - } else { - out_clim_exp <- Mean1Dim(var_exp, posdim = 3, narm = TRUE) - dim_clim_exp <- dimexp[-3] - } - - if (memb != TRUE) { - out_clim_obs <- Mean1Dim(out_clim_obs, posdim = 2, narm = TRUE) - out_clim_exp <- Mean1Dim(out_clim_exp, posdim = 2, narm = TRUE) - dim_clim_exp <- dim_clim_exp[-2] - dim_clim_obs <- dim_clim_obs[-2] - } - - # - # Reduce the number of dimensions to the original one - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - - clim_exp <- array(dim = dim_clim_exp) - clim_exp[] <- out_clim_exp - clim_obs <- array(dim = dim_clim_obs) - clim_obs[] <- out_clim_obs + if (is.null(names(exp_dims))) { + stop("Parameter 'exp' must have dimension names.") + } + obs_dimnames <- names(obs_dims) + exp_dimnames <- names(exp_dims) + if (is.null(sdate_dim)) { + sdate_dim = 'sdates' + } + if (is.null(member_dim)) { + member_dim = 'member' + } + if (!(any(obs_dimnames %in% sdate_dim) & any(exp_dimnames %in% sdate_dim))) { + stop("Parameter 'obs' and 'exp' needs one dimension for start", + " dates.") + } + if (memb) { + if (any(obs_dimnames %in% member_dim)) { + if (obs_dims[member_dim] == 1) { + var_obs <- adrop(var_obs, drop = member_dim) + } + } + clim <- Apply(list(exp, obs), + target_dims = sdate_dim, + fun = .Clim, method = method, na.rm = na.rm) + clim$clim_obs <- Subset(clim$clim_obs, along = member_dim, indices = 1) + dims_out_exp <- names(exp_dims[-which(exp_dimnames == sdate_dim)]) + dims_out_obs <- names(obs_dims[-which(obs_dimnames == sdate_dim)]) + reorder <- match(dims_out_exp, names(dim(clim$clim_exp))) + clim$clim_exp <- aperm(clim$clim_exp, reorder) + names(dim(clim$clim_exp)) <- dims_out_exp[reorder] + reorder <- match(dims_out_obs, names(dim(clim$clim_obs))) + clim$clim_obs <- aperm(clim$clim_obs, reorder) + names(dim(clim$clim_obs)) <- dims_out_obs[reorder] + } else { + clim <- Apply(list(exp, obs), target_dims = c(member_dim, sdate_dim), + fun = .Clim, method = method, na.rm = na.rm) + } + invisible(clim) +} +.Clim <- function(exp, obs, method = "clim", na.rm = TRUE, sdate_dim = 'sdates') { + if (method == "kharin") { + tmp_obs <- Trend(var = obs, sdate_dim = sdate_dim, interval = 1, pldegree = 1, + conf = FALSE, siglev = 0.95)$trend + tmp_exp <- Trend(var = exp, interval = 1, pldegree = 1, sdate_dim = sdate_dim, + conf = TRUE, siglev = 0.95)$trend + # exp2 <- tmp_exp[1] + tmp_exp[2] * exp + #trend_obs[, , jdate, , , , ] <- InsertDim(InsertDim(tmp_obs2[4, , , , ] + + # jdate * tmp_obs2[2, , , , ], 1, dimexp[1]), + # 2, dimexp[2]) - # - # Outputs - # ~~~~~~~~~ - # - invisible(list(clim_exp = clim_exp, clim_obs = clim_obs)) + } else if (method == "NDV") { + #nose aun + } else if (method == "clim") { + # does the mean along start dates: + clim_exp <- mean(exp, na.rm = na.rm) + clim_obs <- mean(obs, na.rm = na.rm) + } else { + stop("Parameter 'method' must be one of 'kharin', 'NDV' or 'clim'.") + } + return(list(clim_exp = clim_exp, clim_obs = clim_obs)) } -- GitLab From 9485c660745e320a0f4ef63d4ddf407b272801bf Mon Sep 17 00:00:00 2001 From: nperez Date: Mon, 16 Dec 2019 14:22:36 +0100 Subject: [PATCH 02/10] Parameter na.rm included --- R/Clim.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Clim.R b/R/Clim.R index 0bf2d054..59424aa4 100644 --- a/R/Clim.R +++ b/R/Clim.R @@ -50,7 +50,7 @@ #' } #'@export Clim2 <- function(exp, obs, memb = TRUE, method = 'clim', - sdate_dim = 'sdates', member_dim = 'member') { + sdate_dim = 'sdates', member_dim = 'member', na.rm = TRUE) { #check memb if (!is.logical(memb)) { stop("Parameter 'memb' must be logical.") -- GitLab From 79a4b1dd07bc333d48810b49fc23d074cbdbde67 Mon Sep 17 00:00:00 2001 From: nperez Date: Mon, 16 Dec 2019 16:57:03 +0100 Subject: [PATCH 03/10] working with basic method 'clim' --- R/Clim.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Clim.R b/R/Clim.R index 59424aa4..97268895 100644 --- a/R/Clim.R +++ b/R/Clim.R @@ -130,7 +130,7 @@ Clim2 <- function(exp, obs, memb = TRUE, method = 'clim', if (memb) { if (any(obs_dimnames %in% member_dim)) { if (obs_dims[member_dim] == 1) { - var_obs <- adrop(var_obs, drop = member_dim) + obs <- adrop(obs, drop = member_dim) } } clim <- Apply(list(exp, obs), -- GitLab From cf6416de5a0456c583d7c46ad2b91ead370a8ce2 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 22 Jan 2020 13:34:55 +0100 Subject: [PATCH 04/10] Finish clim and kharin method. Problem with NA issue --- R/Clim.R | 260 +++++++++++++++++++++++++---------------------- R/Trend.R | 299 +++++++++++++++++++++++++++++------------------------- 2 files changed, 300 insertions(+), 259 deletions(-) diff --git a/R/Clim.R b/R/Clim.R index 97268895..7590258e 100644 --- a/R/Clim.R +++ b/R/Clim.R @@ -1,4 +1,4 @@ -#'Computes Bias Corrected Climatologies +#'Compute Bias Corrected Climatologies #' #'This function computes only per-pair climatologies from the experimental #'and observational matrices output from \code{Load()}. @@ -17,21 +17,20 @@ #'whole experiments/observational data sets. The startdates not available for #'all the data (model and obs) are excluded when computing the climatologies. #' -#'@param var_exp Model data: c(nmod/nexp, nmemb/nparam, nsdates, nltime) up to -#' c(nmod/nexp, nmemb/nparam, nsdates, nltime, nlevel, nlat, nlon). -#'@param var_obs Observational data: c(nobs, nmemb, nsdates, nltime) up to -#' c(nobs, nmemb, nsdates, nltime, nlevel, nlat, nlon). +#'@param exp Model data: c(nmod/nexp, nmemb/nparam, nsdate, nltime) up to +#' c(nmod/nexp, nmemb/nparam, nsdate, nltime, nlevel, nlat, nlon). +#'@param obs Observational data: c(nobs, nmemb, nsdate, nltime) up to +#' c(nobs, nmemb, nsdate, nltime, nlevel, nlat, nlon). #'@param memb TRUE/FALSE (1 climatology for each member). Default = TRUE. #'@param kharin TRUE/FALSE (if Kharin method is applied or not). #' Default = FALSE. #'@param NDV TRUE/FALSE (if Fuckar method is applied or not). Default = FALSE. #' -#'@importFrom abind adrop #'@return -#'\item{clim_exp}{Array with same dimensions as var_exp except the third +#'\item{clim_exp}{Array with same dimensions as exp except the third #' (starting dates) and, depending on the parameters, the second (members), #' which disappear.} -#'\item{clim_obs}{Array with same dimensions as var_obs except the third +#'\item{clim_obs}{Array with same dimensions as obs except the third #' (starting dates) and, depending on the parameters, the second (members), #' which disappear.} #'@keywords datagen @@ -48,128 +47,145 @@ #' ytitle = 'K', monini = 11, listexp = c('CMIP5 IC3'), #' listobs = c('ERSST'), biglab = FALSE, fileout = 'tos_clim.eps') #' } +#'@importFrom abind adrop +#'@import multiApply #'@export -Clim2 <- function(exp, obs, memb = TRUE, method = 'clim', - sdate_dim = 'sdates', member_dim = 'member', na.rm = TRUE) { - #check memb - if (!is.logical(memb)) { - stop("Parameter 'memb' must be logical.") - } - if (length(memb) > 1) { - warning("Parameter 'memb' has length greater than 1 and only the ", - "first element will be used.") - memb = memb[1] - } - #check method - if (!is.character(method)) { - stop("Parameter 'method' must be a character string indicating one", - " of 'kharin', 'NDV' or 'clim' methods.") - } - if (length(method) > 1) { - warning("Parameter 'method' has length greater than 1 and only the ", - "first element will be used.") - method = method[1] - } - #check exp - if (is.null(exp)) { - stop("Parameter 'exp' cannot be NULL.") - } +Clim <- function(exp, obs, memb = TRUE, method = 'clim', dat_dim = NULL, comp_dim = NULL, limits = NULL, + sdate_dim = 'sdate', memb_dim = 'member', na.rm = TRUE, ncores = NULL) { - if (is.vector(exp)) { - dim(exp) <- c(length(exp)) - if (memb) { - names(dim(exp)) <- 'member' - } else { - if (is.vector(obs)) { - names(dim(exp)) <- 'Dim1' - } else if (length(dim(obs)) == 1) { - names(dim(exp)) <- names(dim(obs)) - } else { - stop("Parameter 'exp' and 'obs' must be arrays with named", - " dimensions.") - } - } - } - exp_dims <- dim(exp) - - #check obs - if (is.null(obs)) { - stop("Parameter 'obs' cannot be NULL.") - } - if (is.vector(obs)) { - dim(obs) <- c(length(obs)) - if (memb) { - dim(obs) <- c(member = 1, Dim1 = length(obs)) - } else { - if (length(dim(exp)) == 1) { - names(dim(obs)) <- names(exp_dims) - } else { - stop("Parameter 'obs' must have dimension names.") - } - } - } - obs_dims <- dim(obs) - if (is.null(names(obs_dims))) { - stop("Parameter 'obs' must have dimension names.") + + ############################### + # Calculate Clim + + #---------------------------------- + # Remove data along comp_dim dim if there is at least one NA between limits + if (!is.null(comp_dim)) { + + if (is.null(limits)) { + limits <- list(NA, length(comp_dim)) + for (ii in 1:length(comp_dim)) { + limits[[ii]] <- c(1, dim(obs)[comp_dim[ii]]) + } } - if (is.null(names(exp_dims))) { - stop("Parameter 'exp' must have dimension names.") + + for (i in 1:length(comp_dim)) { #[dat, sdate] + + pos <- which(names(dim(obs)) == comp_dim[i]) + outrows <- is.na(Mean1Dim(obs, pos, narm = FALSE, limits[[i]])) + outrows <- InsertDim(outrows, pos, dim(obs)[comp_dim[i]]) + obs[which(outrows)] <- NA } - obs_dimnames <- names(obs_dims) - exp_dimnames <- names(exp_dims) - if (is.null(sdate_dim)) { - sdate_dim = 'sdates' + } + #----------------------------------- + + + if (method == 'clim') { + clim <- Apply(list(exp, obs), + target_dims = c(sdate_dim, memb_dim, dat_dim), + fun = .Clim, + method = method, na.rm = na.rm, memb = memb, ncores = ncores) + # Add member dimension name back + if (memb) { + if(is.null(names(dim(clim$clim_exp))[1])) { + names(dim(clim$clim_exp))[1] <- memb_dim + names(dim(clim$clim_obs))[1] <- memb_dim + } } - if (is.null(member_dim)) { - member_dim = 'member' + + } else if (method == 'kharin') { + clim <- Apply(list(exp, obs), + target_dims = c(sdate_dim, memb_dim, dat_dim), + fun = .Clim, + method = method, na.rm = na.rm, memb = memb, ncores = ncores) + } + + + return(clim) +} + + +.Clim <- function(exp, obs, #sdate_dim = 'sdate', memb_dim = 'member', dat_dim2 = dat_dim, + method = method, na.rm = TRUE, memb = TRUE) { + + ## 1. sdate mean. member average is done after this + + # exp: [sdate, member_exp] + # obs: [sdate, member_obs] + + if (method == 'clim') { + clim_exp <- apply(exp, which(names(dim(exp)) != sdate_dim), + mean, na.rm = na.rm) #average out sdate_dim + clim_obs <- apply(obs, which(names(dim(obs)) != sdate_dim), + mean, na.rm = na.rm) #[memb] or [memb, dat] + + } else if (method == 'kharin') { + # obs clim + clim_obs <- apply(obs, which(names(dim(obs)) != sdate_dim), + mean, na.rm = na.rm) #[memb] or [memb, dat] + + # exp clim + ##--- NEW trend ---## +source('/home/Earth/aho/s2dverification/R/Trend.R') + tmp_obs <- Trend(data = obs, sdate_dim = sdate_dim, interval = 1, + polydeg = 1, conf = FALSE, siglev = 0.95)$trend + tmp_exp <- Trend(data = exp, sdate_dim = sdate_dim, interval = 1, + polydeg = 1, conf = FALSE, siglev = 0.95)$trend + # tmp_exp: [stats, member_exp, (dat_exp)] + + trend_obs <- array(dim = dim(exp)) #[sdate, member_exp, (dat_exp)] + trend_exp <- array(dim = dim(exp)) + +# trend_obs <- array(dim = c(dim(exp)[sdate_dim], +# dim(exp)[-which(names(dim(exp)) == sdate_dim)])) +# trend_exp <- array(dim = c(dim(exp)[sdate_dim], +# dim(exp)[-which(names(dim(exp)) == sdate_dim)])) + + tmp_obs_mean <- apply(tmp_obs, 1, mean) #average out member (and dataset) + tmp_obs_mean <- InsertDim(tmp_obs_mean, 2, dim(exp)[memb_dim]) + if (length(dim(exp)) == 3) { + tmp_obs_mean <- InsertDim(tmp_obs_mean, 3, dim(exp)[dat_dim]) } - if (!(any(obs_dimnames %in% sdate_dim) & any(exp_dimnames %in% sdate_dim))) { - stop("Parameter 'obs' and 'exp' needs one dimension for start", - " dates.") + + for (jdate in 1:dim(exp)[sdate_dim]) { + if (length(dim(exp)) == 2) { + trend_exp[jdate, ] <- tmp_exp[1, ] + jdate * tmp_exp[2, ] + trend_obs[jdate, ] <- tmp_obs_mean[1, ] + jdate * tmp_obs_mean[2, ] + } else if (length(dim(exp)) == 3) { + trend_exp[jdate, , ] <- tmp_exp[1, , ] + jdate * tmp_exp[2, , ] + trend_obs[jdate, , ] <- tmp_obs_mean[1, , ] + jdate * tmp_obs_mean[2, , ] + } } - if (memb) { - if (any(obs_dimnames %in% member_dim)) { - if (obs_dims[member_dim] == 1) { - obs <- adrop(obs, drop = member_dim) - } - } - clim <- Apply(list(exp, obs), - target_dims = sdate_dim, - fun = .Clim, method = method, na.rm = na.rm) - clim$clim_obs <- Subset(clim$clim_obs, along = member_dim, indices = 1) - dims_out_exp <- names(exp_dims[-which(exp_dimnames == sdate_dim)]) - dims_out_obs <- names(obs_dims[-which(obs_dimnames == sdate_dim)]) - reorder <- match(dims_out_exp, names(dim(clim$clim_exp))) - clim$clim_exp <- aperm(clim$clim_exp, reorder) - names(dim(clim$clim_exp)) <- dims_out_exp[reorder] - reorder <- match(dims_out_obs, names(dim(clim$clim_obs))) - clim$clim_obs <- aperm(clim$clim_obs, reorder) - names(dim(clim$clim_obs)) <- dims_out_obs[reorder] - } else { - clim <- Apply(list(exp, obs), target_dims = c(member_dim, sdate_dim), - fun = .Clim, method = method, na.rm = na.rm) + + if (length(dim(exp)) == 3) { + clim_obs_mean <- apply(clim_obs, 1, mean) + clim_obs_mean <- mean(clim_obs_mean) + clim_obs_mean <- InsertDim( + InsertDim( + InsertDim(clim_obs_mean, 1, dim(exp)[1]), + 2, dim(exp)[2]), + 3, dim(exp)[3]) #[sdate, memb_exp, dat_exp, 1] + clim_obs_mean <- Subset(clim_obs_mean, 4, 1, drop = 'selected') #[sdate, memb_exp, dat_exp] + + } else if (length(dim(exp)) == 2) { + clim_obs_mean <- mean(clim_obs) #a number + clim_obs_mean <- InsertDim( + InsertDim(clim_obs_mean, 1, dim(exp)[1]), + 2, dim(exp)[2]) #[sdate, memb_exp, 1] + clim_obs_mean <- Subset(clim_obs_mean, 3, 1, drop = 'selected') #[sdate, memb_exp] } - invisible(clim) -} -.Clim <- function(exp, obs, method = "clim", na.rm = TRUE, sdate_dim = 'sdates') { - if (method == "kharin") { - tmp_obs <- Trend(var = obs, sdate_dim = sdate_dim, interval = 1, pldegree = 1, - conf = FALSE, siglev = 0.95)$trend - tmp_exp <- Trend(var = exp, interval = 1, pldegree = 1, sdate_dim = sdate_dim, - conf = TRUE, siglev = 0.95)$trend - # exp2 <- tmp_exp[1] + tmp_exp[2] * exp - #trend_obs[, , jdate, , , , ] <- InsertDim(InsertDim(tmp_obs2[4, , , , ] + - # jdate * tmp_obs2[2, , , , ], 1, dimexp[1]), - # 2, dimexp[2]) - - } else if (method == "NDV") { + + clim_exp <- trend_exp - trend_obs + clim_obs_mean + + + } else if (method == 'NDV') { #nose aun - } else if (method == "clim") { - # does the mean along start dates: - clim_exp <- mean(exp, na.rm = na.rm) - clim_obs <- mean(obs, na.rm = na.rm) - } else { - stop("Parameter 'method' must be one of 'kharin', 'NDV' or 'clim'.") - } + } + + ## 2. member mean + if (!memb) { + clim_exp <- mean(clim_exp, na.rm = TRUE) #remove NA member + clim_obs <- mean(clim_obs, na.rm = TRUE) #return a number + } + return(list(clim_exp = clim_exp, clim_obs = clim_obs)) } diff --git a/R/Trend.R b/R/Trend.R index ba35b4d7..3ed1acd7 100644 --- a/R/Trend.R +++ b/R/Trend.R @@ -1,167 +1,192 @@ -#'Computes the Trend of the Ensemble Mean +#'Compute the Trend of the Ensemble Mean #' -#'Computes the trend along the forecast time of the ensemble mean by least -#'square fitting, and the associated error interval.\cr -#'Trend() also provides the time series of the detrended ensemble mean -#'forecasts.\cr -#'The confidence interval relies on a student-T distribution.\cr\cr -#'.Trend provides the same functionality but taking a matrix ensemble members -#'as input (exp). +#'Compute the polynomial regression along the forecast time with any degree of +#'polynomial. It returns the regression coefficients (including the intercept) +#'and the confidence intervals if needed. The detrended array is also provided.\cr +#'The confidence interval relies on the student-T distribution.\cr\cr #' -#'@param var An array of any number of dimensions up to 10. -#'@param posTR An integer indicating the position along which to compute the -#' trend. -#'@param interval A number of months/years between 2 points along posTR -#' dimension. Set 1 as default. -#'@param siglev A numeric value indicating the confidence level for the -#' computation of confidence interval. Set 0.95 as default. -#'@param conf A logical value indicating whether to compute the confidence -#' levels or not. Set TRUE as default. -#'@param exp An M by N matrix representing M forecasts from N ensemble members. +#'@param data An numeric array including the dimension along which the trend +#' is computed. +#'@param sdate_dim A character string indicating the dimension along which to +#' compute the trend. +#'@param interval A positive numeric indicating the unit length between two +#' points along 'sdate_dim' dimension. Set 1 as default. +#'@param siglev A numeric indicating the confidence level for the +#' regression computation. Set 0.95 as default. +#'@param conf A logical value indicating whether to retrieve the confidence +#' intervals or not. Set TRUE as default. +#'@param polydeg A positive integer indicating the degree of polynomial +#' regression. Set 1 as default. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. Default value is NULL. #' #'@return #'\item{$trend}{ -#' The intercept and slope coefficients for the least squares fitting of the -#' trend. -#' An array with same dimensions as parameter 'var' except along the posTR -#' dimension, which is replaced by a length 4 (or length 2 if conf = FALSE) -#' dimension, corresponding to the lower limit of the confidence interval -#' (only present if conf = TRUE), the slope, the upper limit of the confidence -#' interval (only present if conf = TRUE), and the intercept. +#' A numeric array with same dimensions as parameter 'data' except the +#' 'sdate_dim' dimension, which is replaced by a 'stats' dimension containing +#' the regression coefficients from the lowest order (i.e., intercept) to +#' the highest degree. The length of the 'stats' dimension should be +#' \code{polydeg + 1}. +#'} +#'\item{$conf.lower}{ +#' A numeric array with same dimensions as parameter 'data' except the +#' 'sdate_dim' dimension, which is replaced by a 'stats' dimension containing +#' the lower limit of the \code{siglev}\% confidence interval for all +#' the regression coefficients with the same order as $trend. The length of the +#' 'stats' dimension should be \code{polydeg + 1}. Only present if \code{conf = TRUE}. +#'} +#'\item{$conf.upper}{ +#' A numeric array with same dimensions as parameter 'data' except the +#' 'sdate_dim' dimension, which is replaced by a 'stats' dimension containing +#' the upper limit of the \code{siglev}\% confidence interval for all +#' the regression coefficients with the same order as $trend. The length of the +#' 'stats' dimension should be \code{polydeg + 1}. Only present if \code{conf = TRUE}. #'} #'\item{$detrended}{ -#' Same dimensions as var with linearly detrended var along the posTR -#' dimension. +#' A numeric array with the same dimensions as paramter 'data', the detrended +#' values along the 'sdate_dim' dimension. #'} -#'Only in .Trend: -#'\item{$conf.int}{ -#' Corresponding to the limits of the \code{siglev}\% confidence interval -#' (only present if \code{conf = TRUE}) for the slope coefficient. -#'} #' #'@keywords datagen #'@author History:\cr #'0.1 - 2011-05 (V. Guemas, \email{virginie.guemas@@ic3.cat}) - Original code\cr #'1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@@ic3.cat}) - Formatting to CRAN\cr #'2.0 - 2017-02 (A. Hunter, \email{alasdair.hunter@@bsc.es}) - Adapt to veriApply() +#'3.0 - 2019-12 (A. Ho, \email{an.ho@bsc.es}) - Adapt to multiApply() #'@examples #'# Load sample data as in Load() example: #'example(Load) #'months_between_startdates <- 60 -#'trend <- Trend(sampleData$obs, 3, months_between_startdates) -#' \donttest{ -#'PlotVsLTime(trend$trend, toptitle = "trend", ytitle = "K / (5 year)", -#' monini = 11, limits = c(-1,1), listexp = c('CMIP5 IC3'), -#' listobs = c('ERSST'), biglab = FALSE, hlines = 0, -#' fileout = 'tos_obs_trend.eps') -#'PlotAno(trend$detrended, NULL, startDates, -#' toptitle = 'detrended anomalies (along the startdates)', ytitle = 'K', -#' legends = 'ERSST', biglab = FALSE, fileout = 'tos_detrended_obs.eps') -#' } +#'trend <- Trend(sampleData$obs, polydeg = 2) #' #'@rdname Trend +#'@import multiApply #'@export -Trend <- function(var, posTR = 2, interval = 1, siglev = 0.95, conf = TRUE) { - # - # Enlarge the size of var to 10 and move posTR to first position - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dimsvar <- dim(var) - if (is.null(dimsvar)) { - dimsvar <- length(var) +Trend <- function(data, sdate_dim = 'sdate', interval = 1, siglev = 0.95, conf = TRUE, polydeg = 1, ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") } - enlvar <- Enlarge(var, 10) - outdim <- c(dimsvar, array(1, dim = (10 - length(dimsvar)))) - if (conf) { - nvals <- 4 - poscoef2 <- 2 - poscoef1 <- 4 - } else { - nvals <- 2 - poscoef2 <- 1 - poscoef1 <- 2 + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") } - outdim[posTR] <- nvals - posaperm <- 1:10 - posaperm[posTR] <- 1 - posaperm[1] <- posTR - enlvar <- aperm(enlvar, posaperm) - dimsaperm <- outdim[posaperm] - # - # Loop on all dimensions to compute trends - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - enltrend <- array(dim = dimsaperm) - enldetrend <- array(dim = dim(enlvar)) - for (j2 in 1:dimsaperm[2]) { - for (j3 in 1:dimsaperm[3]) { - for (j4 in 1:dimsaperm[4]) { - for (j5 in 1:dimsaperm[5]) { - for (j6 in 1:dimsaperm[6]) { - for (j7 in 1:dimsaperm[7]) { - for (j8 in 1:dimsaperm[8]) { - for (j9 in 1:dimsaperm[9]) { - for (j10 in 1:dimsaperm[10]) { - tmp <- enlvar[, j2, j3, j4, j5, j6, j7, j8, j9, j10] - if (any(!is.na(tmp))) { - mon <- seq(tmp) * interval - lm.out <- lm(tmp ~ mon, na.action = na.omit) - enltrend[poscoef2, j2, j3, j4, j5, j6, j7, j8, j9, - j10] <- lm.out$coefficients[2] - enltrend[poscoef1, j2, j3, j4, j5, j6, j7, j8, j9, - j10] <- lm.out$coefficients[1] - if (conf) { - enltrend[c(1, 3), j2, j3, j4, j5, j6, j7, j8, j9, - j10] <- confint(lm.out, level = siglev)[2, 1:2] - } - enldetrend[is.na(tmp) == FALSE, j2, j3, j4, j5, j6, j7, j8, - j9, j10] <- tmp[is.na(tmp) == FALSE] - lm.out$fitted.values - } - } - } - } - } - } - } - } + if (is.null(dim(data))) { #is vector + dim(data) <- c(length(data)) + names(dim(data)) <- sdate_dim + } + if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## sdate_dim + if (!is.character(sdate_dim) | length(sdate_dim) > 1) { + stop("Parameter 'sdate_dim' must be a character string.") + } + if (!sdate_dim %in% names(dim(data))) { + stop("Parameter 'sdate_dim' is not found in 'data' dimension.") + } + ## interval + if (any(!is.numeric(interval) | interval <= 0 | length(interval) > 1)) { + stop("Parameter 'interval' must be a positive number.") + } + ## siglev + if (!is.numeric(siglev) | siglev < 0 | siglev > 1 | length(siglev) > 1) { + stop("Parameter 'siglev' must be a numeric number between 0 and 1.") + } + ## conf + if (!is.logical(conf) | length(conf) > 1) { + stop("Parameter 'conf' must be one logical value.") + } + ## polydeg + if (!is.numeric(polydeg) | polydeg %% 1 != 0 | polydeg < 0 | + length(polydeg) > 1) { + stop("Parameter 'polydeg' must be a positive integer.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") } } - # - # Back to the original dimensions - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - enldetrend <- aperm(enldetrend, posaperm) - dim(enldetrend) <- dimsvar - enltrend <- aperm(enltrend, posaperm) - dimsvar[posTR] <- nvals - dim(enltrend) <- dimsvar - # - # Outputs - # ~~~~~~~~~ - # - invisible(list(trend = enltrend, detrended = enldetrend)) + + ############################### + # Calculate Trend + dim_names <- names(dim(data)) + + output <- Apply(list(data), + target_dims = sdate_dim, + fun = .Trend, + #output_dims = , + interval = interval, siglev = siglev, conf = conf, + polydeg = polydeg, + ncores = ncores) + + output <- lapply(output, .reorder, sdate_dim = sdate_dim, dim_names = dim_names) + + return(output) } -#'@rdname Trend -#'@importFrom stats lm na.omit confint -#'@export -.Trend <- function(exp, interval = 1, siglev = 0.95, conf = TRUE) { - - ensmean <- rowMeans(exp, na.rm = TRUE) - - if (any(!is.na(ensmean))) { - mon <- seq(ensmean) * interval - lm.out <- lm(ensmean ~ mon, na.action = na.omit) - trend <- c(lm.out$coefficients[2], lm.out$coefficients[1]) +.Trend <- function(x, interval = interval, siglev = siglev, conf = conf, polydeg = polydeg) { + if (any(!is.na(x))) { + mon <- seq(x) * interval + lm.out <- lm(x ~ poly(mon, degree = polydeg, raw = TRUE), na.action = na.omit) + trend <- lm.out$coefficients #intercept, slope1, slope2,... + + if (conf) { + conf.lower <- confint(lm.out, level = siglev)[, 1] + conf.upper <- confint(lm.out, level = siglev)[, 2] + } + + detrended <- c() + detrended[is.na(x) == FALSE] <- x[is.na(x) == FALSE] - lm.out$fitted.values + } else { + trend <- NA + detrend <- NA if (conf) { - conf.int <- confint(lm.out, level = siglev)[2, 1:2] + conf.lower <- NA + conf.upper <- NA } - detrend <- ensmean[is.na(ensmean) == FALSE] - lm.out$fitted.values - } - - # - # Outputs - # ~~~~~~~~~ - # - invisible(list(trend = trend, conf.int = conf.int, detrended = detrend)) + } + + if (conf) { + return(list(trend = trend, conf.lower = conf.lower, conf.upper = conf.upper, + detrended = detrended)) + } else { + return(list(trend = trend, detrended = detrended)) + } + } + + .reorder <- function(output, sdate_dim, dim_names) { + + # Add dim name back + if (is.null(dim(output))) { + dim(output) <- c(stats = length(output)) + + } else { #is an array + if (length(dim(output)) == 1) { + if (!is.null(names(dim(output)))) { + dim(output) <- c(1, dim(output)) + names(dim(output))[1] <- sdate_dim + } else { + names(dim(output)) <- sdate_dim + } + } else { # more than one dim + if (names(dim(output))[1] != "") { + dim(output) <- c(1, dim(output)) + names(dim(output))[1] <- sdate_dim + } else { #regular case + names(dim(output))[1] <- sdate_dim + } + } + } + # reorder + pos <- match(dim_names, names(dim(output))) + output <- aperm(output, pos) + names(dim(output)) <- dim_names + names(dim(output))[names(dim(output)) == sdate_dim] <- 'stats' + + return(output) + } -- GitLab From e05b03e6dd0797b283d485cad10d92d3756a09e4 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 22 Jan 2020 16:22:40 +0100 Subject: [PATCH 05/10] Small change --- R/Clim.R | 5 ----- 1 file changed, 5 deletions(-) diff --git a/R/Clim.R b/R/Clim.R index 7590258e..d8129c77 100644 --- a/R/Clim.R +++ b/R/Clim.R @@ -135,11 +135,6 @@ source('/home/Earth/aho/s2dverification/R/Trend.R') trend_obs <- array(dim = dim(exp)) #[sdate, member_exp, (dat_exp)] trend_exp <- array(dim = dim(exp)) -# trend_obs <- array(dim = c(dim(exp)[sdate_dim], -# dim(exp)[-which(names(dim(exp)) == sdate_dim)])) -# trend_exp <- array(dim = c(dim(exp)[sdate_dim], -# dim(exp)[-which(names(dim(exp)) == sdate_dim)])) - tmp_obs_mean <- apply(tmp_obs, 1, mean) #average out member (and dataset) tmp_obs_mean <- InsertDim(tmp_obs_mean, 2, dim(exp)[memb_dim]) if (length(dim(exp)) == 3) { -- GitLab From 808d40b1f61b3594b9c3b9a5c94578cbc3eed35b Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 27 Jan 2020 19:23:58 +0100 Subject: [PATCH 06/10] Finish method 'clim' and 'kharin'. Check NA issue and memb = F --- R/Clim.R | 163 ++++++++++++++++++++++++++++++++---------------------- R/Trend.R | 42 +++++++------- 2 files changed, 118 insertions(+), 87 deletions(-) diff --git a/R/Clim.R b/R/Clim.R index d8129c77..2645b9cf 100644 --- a/R/Clim.R +++ b/R/Clim.R @@ -22,9 +22,7 @@ #'@param obs Observational data: c(nobs, nmemb, nsdate, nltime) up to #' c(nobs, nmemb, nsdate, nltime, nlevel, nlat, nlon). #'@param memb TRUE/FALSE (1 climatology for each member). Default = TRUE. -#'@param kharin TRUE/FALSE (if Kharin method is applied or not). -#' Default = FALSE. -#'@param NDV TRUE/FALSE (if Fuckar method is applied or not). Default = FALSE. +#'@param method A character string indicating the method to be used. #' #'@return #'\item{clim_exp}{Array with same dimensions as exp except the third @@ -50,38 +48,49 @@ #'@importFrom abind adrop #'@import multiApply #'@export -Clim <- function(exp, obs, memb = TRUE, method = 'clim', dat_dim = NULL, comp_dim = NULL, limits = NULL, - sdate_dim = 'sdate', memb_dim = 'member', na.rm = TRUE, ncores = NULL) { +Clim <- function(exp, obs, memb = TRUE, method = 'clim', dat_dim = c('dataset', 'member'), + time_dim = 'sdate', memb_dim = NULL, na.rm = TRUE, ncores = NULL) { ############################### # Calculate Clim #---------------------------------- - # Remove data along comp_dim dim if there is at least one NA between limits - if (!is.null(comp_dim)) { + # Remove all sdate if not complete along dat_dim - if (is.null(limits)) { - limits <- list(NA, length(comp_dim)) - for (ii in 1:length(comp_dim)) { - limits[[ii]] <- c(1, dim(obs)[comp_dim[ii]]) - } + pos <- rep(0, length(dat_dim)) + for (i in 1:length(dat_dim)) { #[dat, sdate] + ## dat_dim: [dataset, member] + pos[i] <- which(names(dim(obs)) == dat_dim[i]) } - for (i in 1:length(comp_dim)) { #[dat, sdate] + outrows_exp <- MeanListDim(exp, pos, narm = FALSE) + + MeanListDim(obs, pos, narm = FALSE) + outrows_obs <- outrows_exp +#print(length(obs[which(is.na(outrows_exp))])) - pos <- which(names(dim(obs)) == comp_dim[i]) - outrows <- is.na(Mean1Dim(obs, pos, narm = FALSE, limits[[i]])) - outrows <- InsertDim(outrows, pos, dim(obs)[comp_dim[i]]) - obs[which(outrows)] <- NA + for (i in 1:length(pos)) { + outrows_exp <- InsertDim(outrows_exp, pos[i], dim(exp)[pos[i]]) + outrows_obs <- InsertDim(outrows_obs, pos[i], dim(obs)[pos[i]]) } - } +#print('before assign na') +#print(length(obs[which(is.na(exp))])) +#print(length(obs[which(is.na(obs))])) + + exp[which(is.na(outrows_exp))] <- NA + obs[which(is.na(outrows_obs))] <- NA +#print('after assign na') +#print(length(obs[which(is.na(exp))])) +#print(length(obs[which(is.na(obs))])) + + + #----------------------------------- if (method == 'clim') { clim <- Apply(list(exp, obs), - target_dims = c(sdate_dim, memb_dim, dat_dim), + target_dims = c(time_dim, dat_dim), fun = .Clim, method = method, na.rm = na.rm, memb = memb, ncores = ncores) # Add member dimension name back @@ -94,7 +103,7 @@ Clim <- function(exp, obs, memb = TRUE, method = 'clim', dat_dim = NULL, comp_di } else if (method == 'kharin') { clim <- Apply(list(exp, obs), - target_dims = c(sdate_dim, memb_dim, dat_dim), + target_dims = c(time_dim, dat_dim), fun = .Clim, method = method, na.rm = na.rm, memb = memb, ncores = ncores) } @@ -104,7 +113,7 @@ Clim <- function(exp, obs, memb = TRUE, method = 'clim', dat_dim = NULL, comp_di } -.Clim <- function(exp, obs, #sdate_dim = 'sdate', memb_dim = 'member', dat_dim2 = dat_dim, +.Clim <- function(exp, obs, #time_dim = 'sdate', memb_dim = 'member', dat_dim2 = dat_dim, method = method, na.rm = TRUE, memb = TRUE) { ## 1. sdate mean. member average is done after this @@ -113,74 +122,96 @@ Clim <- function(exp, obs, memb = TRUE, method = 'clim', dat_dim = NULL, comp_di # obs: [sdate, member_obs] if (method == 'clim') { - clim_exp <- apply(exp, which(names(dim(exp)) != sdate_dim), - mean, na.rm = na.rm) #average out sdate_dim - clim_obs <- apply(obs, which(names(dim(obs)) != sdate_dim), + clim_exp <- apply(exp, which(names(dim(exp)) != time_dim), + mean, na.rm = na.rm) #average out time_dim + clim_obs <- apply(obs, which(names(dim(obs)) != time_dim), mean, na.rm = na.rm) #[memb] or [memb, dat] + ## member mean + if (!memb) { + if (length(dim(clim_exp)) == 1) { #dim: [member] + clim_exp <- mean(clim_exp, na.rm = TRUE) + clim_obs <- mean(clim_obs, na.rm = TRUE) + } else { + pos <- which(names(dim(clim_exp)) == memb_dim) + pos <- c(1:length(dim(clim_exp)))[-pos] + dim_name <- names(dim(clim_exp)) + clim_exp <- apply(clim_exp, pos, mean, na.rm = TRUE) + clim_obs <- apply(clim_obs, pos, mean, na.rm = TRUE) + if (is.null(names(dim(as.array(clim_exp))))) { + clim_exp <- as.array(clim_exp) + clim_obs <- as.array(clim_obs) + names(dim(clim_exp)) <- dim_name[pos] + names(dim(clim_obs)) <- dim_name[pos] + } + } + } + } else if (method == 'kharin') { # obs clim - clim_obs <- apply(obs, which(names(dim(obs)) != sdate_dim), + clim_obs <- apply(obs, which(names(dim(obs)) != time_dim), mean, na.rm = na.rm) #[memb] or [memb, dat] # exp clim + ##--- OLD trend ---## +# tmp_obs <- Trend(obs, posTR = 1)$trend +# tmp_exp <- Trend(exp, posTR = 1)$trend +#print(head(tmp_obs)) +#print(head(tmp_exp)) ##--- NEW trend ---## source('/home/Earth/aho/s2dverification/R/Trend.R') - tmp_obs <- Trend(data = obs, sdate_dim = sdate_dim, interval = 1, + tmp_obs <- Trend(data = obs, time_dim = time_dim, interval = 1, polydeg = 1, conf = FALSE, siglev = 0.95)$trend - tmp_exp <- Trend(data = exp, sdate_dim = sdate_dim, interval = 1, + tmp_exp <- Trend(data = exp, time_dim = time_dim, interval = 1, polydeg = 1, conf = FALSE, siglev = 0.95)$trend - # tmp_exp: [stats, member_exp, (dat_exp)] + # tmp_exp: [stats, dat_dim)] - trend_obs <- array(dim = dim(exp)) #[sdate, member_exp, (dat_exp)] - trend_exp <- array(dim = dim(exp)) + tmp_obs_mean <- apply(tmp_obs, 1, mean) #average out dat_dim (dat and member) + #tmp_obs_mean: [stats = 2] - tmp_obs_mean <- apply(tmp_obs, 1, mean) #average out member (and dataset) - tmp_obs_mean <- InsertDim(tmp_obs_mean, 2, dim(exp)[memb_dim]) - if (length(dim(exp)) == 3) { - tmp_obs_mean <- InsertDim(tmp_obs_mean, 3, dim(exp)[dat_dim]) - } + intercept_exp <- Subset(tmp_exp, 1, 1, drop = 'selected') + slope_exp <- Subset(tmp_exp, 1, 2, drop = 'selected') + intercept_obs <- array(tmp_obs_mean[1], dim = dim(exp)[-1]) + slope_obs <- array(tmp_obs_mean[2], dim = dim(exp)[-1]) + trend_exp <- list() + trend_obs <- list() - for (jdate in 1:dim(exp)[sdate_dim]) { - if (length(dim(exp)) == 2) { - trend_exp[jdate, ] <- tmp_exp[1, ] + jdate * tmp_exp[2, ] - trend_obs[jdate, ] <- tmp_obs_mean[1, ] + jdate * tmp_obs_mean[2, ] - } else if (length(dim(exp)) == 3) { - trend_exp[jdate, , ] <- tmp_exp[1, , ] + jdate * tmp_exp[2, , ] - trend_obs[jdate, , ] <- tmp_obs_mean[1, , ] + jdate * tmp_obs_mean[2, , ] - } + for (jdate in 1:dim(exp)[time_dim]) { + trend_exp[[jdate]] <- intercept_exp + jdate * slope_exp + trend_obs[[jdate]] <- intercept_obs + jdate * slope_obs } + # turn list into array + trend_exp <- array(unlist(trend_exp), dim = c(dim(exp)[-1], dim(exp)[1])) + trend_obs <- array(unlist(trend_obs), dim = c(dim(exp)[-1], dim(exp)[1])) + len <- length(dim(exp)) + trend_exp <- s2dverification:::.aperm2(trend_exp, c(len, 1:(len - 1))) + trend_obs <- s2dverification:::.aperm2(trend_obs, c(len, 1:(len - 1))) + + clim_obs_mean <- mean(apply(clim_obs, 1, mean)) #a number + clim_obs_mean <- array(clim_obs_mean, dim = dim(exp)) - if (length(dim(exp)) == 3) { - clim_obs_mean <- apply(clim_obs, 1, mean) - clim_obs_mean <- mean(clim_obs_mean) - clim_obs_mean <- InsertDim( - InsertDim( - InsertDim(clim_obs_mean, 1, dim(exp)[1]), - 2, dim(exp)[2]), - 3, dim(exp)[3]) #[sdate, memb_exp, dat_exp, 1] - clim_obs_mean <- Subset(clim_obs_mean, 4, 1, drop = 'selected') #[sdate, memb_exp, dat_exp] - - } else if (length(dim(exp)) == 2) { - clim_obs_mean <- mean(clim_obs) #a number - clim_obs_mean <- InsertDim( - InsertDim(clim_obs_mean, 1, dim(exp)[1]), - 2, dim(exp)[2]) #[sdate, memb_exp, 1] - clim_obs_mean <- Subset(clim_obs_mean, 3, 1, drop = 'selected') #[sdate, memb_exp] - } - clim_exp <- trend_exp - trend_obs + clim_obs_mean + ## member mean + if (!memb) { + pos <- which(names(dim(clim_exp)) == memb_dim) + pos <- c(1:length(dim(clim_exp)))[-pos] + dim_name <- names(dim(clim_exp)) + clim_exp <- apply(clim_exp, pos, mean, na.rm = TRUE) + clim_obs <- apply(clim_obs, pos, mean, na.rm = TRUE) + if (is.null(names(dim(as.array(clim_exp))))) { + clim_exp <- as.array(clim_exp) + clim_obs <- as.array(clim_obs) + names(dim(clim_exp)) <- dim_name[pos] + names(dim(clim_obs)) <- dim_name[pos] + } + } + } else if (method == 'NDV') { #nose aun } - ## 2. member mean - if (!memb) { - clim_exp <- mean(clim_exp, na.rm = TRUE) #remove NA member - clim_obs <- mean(clim_obs, na.rm = TRUE) #return a number - } return(list(clim_exp = clim_exp, clim_obs = clim_obs)) } diff --git a/R/Trend.R b/R/Trend.R index 3ed1acd7..e161176c 100644 --- a/R/Trend.R +++ b/R/Trend.R @@ -7,10 +7,10 @@ #' #'@param data An numeric array including the dimension along which the trend #' is computed. -#'@param sdate_dim A character string indicating the dimension along which to +#'@param time_dim A character string indicating the dimension along which to #' compute the trend. #'@param interval A positive numeric indicating the unit length between two -#' points along 'sdate_dim' dimension. Set 1 as default. +#' points along 'time_dim' dimension. Set 1 as default. #'@param siglev A numeric indicating the confidence level for the #' regression computation. Set 0.95 as default. #'@param conf A logical value indicating whether to retrieve the confidence @@ -23,28 +23,28 @@ #'@return #'\item{$trend}{ #' A numeric array with same dimensions as parameter 'data' except the -#' 'sdate_dim' dimension, which is replaced by a 'stats' dimension containing +#' 'time_dim' dimension, which is replaced by a 'stats' dimension containing #' the regression coefficients from the lowest order (i.e., intercept) to #' the highest degree. The length of the 'stats' dimension should be #' \code{polydeg + 1}. #'} #'\item{$conf.lower}{ #' A numeric array with same dimensions as parameter 'data' except the -#' 'sdate_dim' dimension, which is replaced by a 'stats' dimension containing +#' 'time_dim' dimension, which is replaced by a 'stats' dimension containing #' the lower limit of the \code{siglev}\% confidence interval for all #' the regression coefficients with the same order as $trend. The length of the #' 'stats' dimension should be \code{polydeg + 1}. Only present if \code{conf = TRUE}. #'} #'\item{$conf.upper}{ #' A numeric array with same dimensions as parameter 'data' except the -#' 'sdate_dim' dimension, which is replaced by a 'stats' dimension containing +#' 'time_dim' dimension, which is replaced by a 'stats' dimension containing #' the upper limit of the \code{siglev}\% confidence interval for all #' the regression coefficients with the same order as $trend. The length of the #' 'stats' dimension should be \code{polydeg + 1}. Only present if \code{conf = TRUE}. #'} #'\item{$detrended}{ #' A numeric array with the same dimensions as paramter 'data', the detrended -#' values along the 'sdate_dim' dimension. +#' values along the 'time_dim' dimension. #'} #' #'@keywords datagen @@ -62,7 +62,7 @@ #'@rdname Trend #'@import multiApply #'@export -Trend <- function(data, sdate_dim = 'sdate', interval = 1, siglev = 0.95, conf = TRUE, polydeg = 1, ncores = NULL) { +Trend <- function(data, time_dim = 'sdate', interval = 1, siglev = 0.95, conf = TRUE, polydeg = 1, ncores = NULL) { # Check inputs ## data @@ -74,17 +74,17 @@ Trend <- function(data, sdate_dim = 'sdate', interval = 1, siglev = 0.95, conf = } if (is.null(dim(data))) { #is vector dim(data) <- c(length(data)) - names(dim(data)) <- sdate_dim + names(dim(data)) <- time_dim } if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { stop("Parameter 'data' must have dimension names.") } - ## sdate_dim - if (!is.character(sdate_dim) | length(sdate_dim) > 1) { - stop("Parameter 'sdate_dim' must be a character string.") + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") } - if (!sdate_dim %in% names(dim(data))) { - stop("Parameter 'sdate_dim' is not found in 'data' dimension.") + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") } ## interval if (any(!is.numeric(interval) | interval <= 0 | length(interval) > 1)) { @@ -116,14 +116,14 @@ Trend <- function(data, sdate_dim = 'sdate', interval = 1, siglev = 0.95, conf = dim_names <- names(dim(data)) output <- Apply(list(data), - target_dims = sdate_dim, + target_dims = time_dim, fun = .Trend, #output_dims = , interval = interval, siglev = siglev, conf = conf, polydeg = polydeg, ncores = ncores) - output <- lapply(output, .reorder, sdate_dim = sdate_dim, dim_names = dim_names) + output <- lapply(output, .reorder, time_dim = time_dim, dim_names = dim_names) return(output) } @@ -159,7 +159,7 @@ Trend <- function(data, sdate_dim = 'sdate', interval = 1, siglev = 0.95, conf = } - .reorder <- function(output, sdate_dim, dim_names) { + .reorder <- function(output, time_dim, dim_names) { # Add dim name back if (is.null(dim(output))) { @@ -169,16 +169,16 @@ Trend <- function(data, sdate_dim = 'sdate', interval = 1, siglev = 0.95, conf = if (length(dim(output)) == 1) { if (!is.null(names(dim(output)))) { dim(output) <- c(1, dim(output)) - names(dim(output))[1] <- sdate_dim + names(dim(output))[1] <- time_dim } else { - names(dim(output)) <- sdate_dim + names(dim(output)) <- time_dim } } else { # more than one dim if (names(dim(output))[1] != "") { dim(output) <- c(1, dim(output)) - names(dim(output))[1] <- sdate_dim + names(dim(output))[1] <- time_dim } else { #regular case - names(dim(output))[1] <- sdate_dim + names(dim(output))[1] <- time_dim } } } @@ -186,7 +186,7 @@ Trend <- function(data, sdate_dim = 'sdate', interval = 1, siglev = 0.95, conf = pos <- match(dim_names, names(dim(output))) output <- aperm(output, pos) names(dim(output)) <- dim_names - names(dim(output))[names(dim(output)) == sdate_dim] <- 'stats' + names(dim(output))[names(dim(output)) == time_dim] <- 'stats' return(output) } -- GitLab From 2ff6910388aa32059e795c28614273a8068b0739 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 28 Jan 2020 15:30:56 +0100 Subject: [PATCH 07/10] Complete Clim()+Apply --- DESCRIPTION | 3 +- NAMESPACE | 2 +- R/Clim.R | 325 +++++++++++++++++++++++++++++++++++--------- R/Regression.R | 340 +++++++++++++++++++++++++++++----------------- man/Clim.Rd | 83 +++++++---- man/Regression.Rd | 115 ++++++++++------ man/Trend.Rd | 91 ++++++------- 7 files changed, 653 insertions(+), 306 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 85b2eb8b..3be1e882 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -51,7 +51,8 @@ Imports: ncdf4, parallel, plyr, - SpecsVerification (>= 0.5.0) + SpecsVerification (>= 0.5.0), + multiApply (>= 2.0.0) Suggests: easyVerification, testthat diff --git a/NAMESPACE b/NAMESPACE index 00c59bfa..6aef0ad9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,7 +6,6 @@ export(.RMS) export(.RMSSS) export(.RatioRMS) export(.RatioSDRMS) -export(.Trend) export(ACC) export(Alpha) export(AnimateMap) @@ -94,6 +93,7 @@ import(multiApply) import(ncdf4) import(parallel) import(plyr) +importFrom(abind,adrop) importFrom(grDevices,bmp) importFrom(grDevices,col2rgb) importFrom(grDevices,colorRampPalette) diff --git a/R/Clim.R b/R/Clim.R index 2645b9cf..45d73e47 100644 --- a/R/Clim.R +++ b/R/Clim.R @@ -1,55 +1,157 @@ #'Compute Bias Corrected Climatologies #' -#'This function computes only per-pair climatologies from the experimental -#'and observational matrices output from \code{Load()}. -#'To compute plain climatologies from only experimental or observational -#'data from \code{Load()}, the following code can be used:\cr -#'\code{clim <- array(apply(obs_data, c(1, 4, 5, 6), mean),}\cr -#'\code{ dim = dim(obs_datta)[-c(2, 3)])}\cr -#'The function \code{Clim()} computes per-pair climatologies using one of the -#'following methods: +#'This function computes per-pair climatologies for the experimental +#'and observational data using one of the following methods: #'\enumerate{ #' \item{per-pair method (Garcia-Serrano and Doblas-Reyes, CD, 2012)} #' \item{Kharin method (Karin et al, GRL, 2012)} #' \item{Fuckar method (Fuckar et al, GRL, 2014)} #'} -#'\code{Clim()} computes climatologies using the startdates covered by the -#'whole experiments/observational data sets. The startdates not available for -#'all the data (model and obs) are excluded when computing the climatologies. +#'Per-pair climatology means that only the startdates covered by the +#'whole experiments/observational dataset will be used. In other words, the +#'startdates which are not all available along 'dat_dim' dimension of both +#'the 'exp' and 'obs' are excluded when computing the climatologies. #' -#'@param exp Model data: c(nmod/nexp, nmemb/nparam, nsdate, nltime) up to -#' c(nmod/nexp, nmemb/nparam, nsdate, nltime, nlevel, nlat, nlon). -#'@param obs Observational data: c(nobs, nmemb, nsdate, nltime) up to -#' c(nobs, nmemb, nsdate, nltime, nlevel, nlat, nlon). -#'@param memb TRUE/FALSE (1 climatology for each member). Default = TRUE. -#'@param method A character string indicating the method to be used. +#'@param exp A named numeric array of experimental data, with at least two +#' dimensions 'time_dim' and 'dat_dim'. +#'@param obs A named numeric array of observational data, same dimensions as +#' parameter 'exp' except along 'dat_dim'. +#'@param method A character string indicating the method to be used. The +#' options include 'clim', 'kharin', and 'NDV'. The default value is 'clim'. +#'@param time_dim A character string indicating the name of dimension along +#' which the climatologies are computed. The default value is 'sdate'. +#'@param dat_dim A character vector indicating the name of the dataset and +#' member dimensions. If data at one startdate (i.e., 'time_dim') are not +#' complete along 'dat_dim', this startdate along 'dat_dim' will be discarded. +#' The default value is "c('dataset', 'member')". +#'@param ftime_dim A character string indicating the name of forecast time +#' dimension. Only used when method = 'NDV'. The default value is 'ftime'. +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one element in 'dat_dim'. The default value is 'member'. +#'@param memb A logical value indicating whether to remain 'memb_dim' dimension +#' (TRUE) or do ensemble mean over 'memb_dim' (FALSE). The default value is TRUE. +#'@param na.rm A logical value indicating whether to remove NA values along +#' 'time_dim' when calculating climatology (TRUE) or return NA if there is NA +#' along 'time_dim' (FALSE). The default value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. #' #'@return -#'\item{clim_exp}{Array with same dimensions as exp except the third -#' (starting dates) and, depending on the parameters, the second (members), -#' which disappear.} -#'\item{clim_obs}{Array with same dimensions as obs except the third -#' (starting dates) and, depending on the parameters, the second (members), -#' which disappear.} +#'A list of 2: +#'\item{$clim_exp}{ +#' A numeric array with the same dimensions as parameter 'exp' and 'obs' but +#' dimension 'time_dim' is moved to the first position. If parameter 'method' +#' is 'clim', dimension 'time_dim' is removed. If parameter 'memb' is FALSE, +#' dimension 'memb_dim' is also removed. +#'} +#'\item{$clim_obs}{ +#' A numeric array with the same dimensions as parameter 'exp' and 'obs' +#' except dimension 'time_dim' is removed. If parameter 'memb' is FALSE, +#' dimension 'memb_dim' is also removed. +#'} +#' #'@keywords datagen #'@author History:\cr -#' 0.9 - 2011-03 (V. Guemas, \email{virginie.guemas@@ic3.cat}) - Original code\cr -#' 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@@ic3.cat}) - Formatting to R CRAN +#' 0.9 - 2011-03 (V. Guemas, \email{virginie.guemas@ic3.cat}) - Original code\cr +#' 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Formatting to R CRAN +#' 3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Modify with multiApply feature #'@examples #'# Load sample data as in Load() example: #'example(Load) #'clim <- Clim(sampleData$mod, sampleData$obs) -#' \donttest{ +#'clim2 <- Clim(sampleData$mod, sampleData$obs, method = 'kharin', memb = F) +#'\donttest{ #'PlotClim(clim$clim_exp, clim$clim_obs, #' toptitle = paste('sea surface temperature climatologies'), #' ytitle = 'K', monini = 11, listexp = c('CMIP5 IC3'), #' listobs = c('ERSST'), biglab = FALSE, fileout = 'tos_clim.eps') -#' } +#'} #'@importFrom abind adrop #'@import multiApply #'@export -Clim <- function(exp, obs, memb = TRUE, method = 'clim', dat_dim = c('dataset', 'member'), - time_dim = 'sdate', memb_dim = NULL, na.rm = TRUE, ncores = NULL) { +Clim <- function(exp, obs, method = 'clim', time_dim = 'sdate', + dat_dim = c('dataset', 'member'), + ftime_dim = 'ftime', memb_dim = 'member', memb = TRUE, + na.rm = TRUE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing time_dim and dat_dim.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if(!all(names(dim(exp)) %in% names(dim(obs))) | + !all(names(dim(obs)) %in% names(dim(exp)))) { + stop("Parameter 'exp' and 'obs' must have same dimension name") + } + ## method + if (!(method %in% c("clim", "kharin", "NDV"))) { + stop("Parameter 'method' must be one of 'clim', 'kharin' or 'NDV'.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## dat_dim + if (!is.character(dat_dim)) { + stop("Parameter 'dat_dim' must be a character vector.") + } + if (!all(dat_dim %in% names(dim(exp))) | !all(dat_dim %in% names(dim(obs)))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") + } + ## ftime_dim + if (!is.character(ftime_dim) | length(ftime_dim) > 1) { + stop("Parameter 'ftime_dim' must be a character string.") + } + if (!ftime_dim %in% names(dim(exp)) | !ftime_dim %in% names(dim(obs))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb + if (!is.logical(memb) | length(memb) > 1) { + stop("Parameter 'memb' must be one logical value.") + } + ## na.rm + if (!is.logical(na.rm) | length(na.rm) > 1) { + stop("Parameter 'na.rm' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + for (i in 1:length(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim[i])] + name_obs <- name_obs[-which(name_obs == dat_dim[i])] + } + if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension expect 'dat_dim'.")) + } ############################### @@ -57,42 +159,33 @@ Clim <- function(exp, obs, memb = TRUE, method = 'clim', dat_dim = c('dataset', #---------------------------------- # Remove all sdate if not complete along dat_dim - + pos <- rep(0, length(dat_dim)) for (i in 1:length(dat_dim)) { #[dat, sdate] ## dat_dim: [dataset, member] pos[i] <- which(names(dim(obs)) == dat_dim[i]) } - outrows_exp <- MeanListDim(exp, pos, narm = FALSE) + MeanListDim(obs, pos, narm = FALSE) outrows_obs <- outrows_exp -#print(length(obs[which(is.na(outrows_exp))])) for (i in 1:length(pos)) { outrows_exp <- InsertDim(outrows_exp, pos[i], dim(exp)[pos[i]]) outrows_obs <- InsertDim(outrows_obs, pos[i], dim(obs)[pos[i]]) } -#print('before assign na') -#print(length(obs[which(is.na(exp))])) -#print(length(obs[which(is.na(obs))])) - exp[which(is.na(outrows_exp))] <- NA obs[which(is.na(outrows_obs))] <- NA -#print('after assign na') -#print(length(obs[which(is.na(exp))])) -#print(length(obs[which(is.na(obs))])) - - #----------------------------------- - if (method == 'clim') { clim <- Apply(list(exp, obs), target_dims = c(time_dim, dat_dim), - fun = .Clim, - method = method, na.rm = na.rm, memb = memb, ncores = ncores) + fun = .Clim, + method = method, time_dim = time_dim, + dat_dim = dat_dim, ftime_dim = ftime_dim, memb_dim = memb_dim, + memb = memb, na.rm = na.rm, + ncores = ncores) # Add member dimension name back if (memb) { if(is.null(names(dim(clim$clim_exp))[1])) { @@ -105,27 +198,38 @@ Clim <- function(exp, obs, memb = TRUE, method = 'clim', dat_dim = c('dataset', clim <- Apply(list(exp, obs), target_dims = c(time_dim, dat_dim), fun = .Clim, - method = method, na.rm = na.rm, memb = memb, ncores = ncores) - } + method = method, time_dim = time_dim, + dat_dim = dat_dim, ftime_dim = ftime_dim, memb_dim = memb_dim, + memb = memb, na.rm = na.rm, + ncores = ncores) + } else if (method == 'NDV') { + clim <- Apply(list(exp, obs), + target_dims = c(time_dim, dat_dim, ftime_dim), + fun = .Clim, + method = method, time_dim = time_dim, + dat_dim = dat_dim, ftime_dim = ftime_dim, memb_dim = memb_dim, + memb = memb, na.rm = na.rm, + ncores = ncores) + } return(clim) } -.Clim <- function(exp, obs, #time_dim = 'sdate', memb_dim = 'member', dat_dim2 = dat_dim, - method = method, na.rm = TRUE, memb = TRUE) { - - ## 1. sdate mean. member average is done after this - - # exp: [sdate, member_exp] - # obs: [sdate, member_obs] +.Clim <- function(exp, obs, method = 'clim', + time_dim = 'sdate', dat_dim = c('dataset', 'member'), + ftime_dim = 'ftime', memb_dim = 'member', memb = TRUE, + na.rm = TRUE) { if (method == 'clim') { + # exp: [sdate, dat_dim_exp] + # obs: [sdate, dat_dim_obs] + clim_exp <- apply(exp, which(names(dim(exp)) != time_dim), mean, na.rm = na.rm) #average out time_dim clim_obs <- apply(obs, which(names(dim(obs)) != time_dim), - mean, na.rm = na.rm) #[memb] or [memb, dat] + mean, na.rm = na.rm) #[dat_dim] ## member mean if (!memb) { @@ -148,9 +252,12 @@ Clim <- function(exp, obs, memb = TRUE, method = 'clim', dat_dim = c('dataset', } } else if (method == 'kharin') { + # exp: [sdate, dat_dim_exp] + # obs: [sdate, dat_dim_obs] + # obs clim clim_obs <- apply(obs, which(names(dim(obs)) != time_dim), - mean, na.rm = na.rm) #[memb] or [memb, dat] + mean, na.rm = na.rm) #[dat_dim] # exp clim ##--- OLD trend ---## @@ -161,18 +268,18 @@ Clim <- function(exp, obs, memb = TRUE, method = 'clim', dat_dim = c('dataset', ##--- NEW trend ---## source('/home/Earth/aho/s2dverification/R/Trend.R') tmp_obs <- Trend(data = obs, time_dim = time_dim, interval = 1, - polydeg = 1, conf = FALSE, siglev = 0.95)$trend + polydeg = 1, conf = FALSE)$trend tmp_exp <- Trend(data = exp, time_dim = time_dim, interval = 1, - polydeg = 1, conf = FALSE, siglev = 0.95)$trend + polydeg = 1, conf = FALSE)$trend # tmp_exp: [stats, dat_dim)] tmp_obs_mean <- apply(tmp_obs, 1, mean) #average out dat_dim (dat and member) #tmp_obs_mean: [stats = 2] - intercept_exp <- Subset(tmp_exp, 1, 1, drop = 'selected') - slope_exp <- Subset(tmp_exp, 1, 2, drop = 'selected') - intercept_obs <- array(tmp_obs_mean[1], dim = dim(exp)[-1]) - slope_obs <- array(tmp_obs_mean[2], dim = dim(exp)[-1]) + intercept_exp <- Subset(tmp_exp, 1, 1, drop = 'selected') #[dat_dim] + slope_exp <- Subset(tmp_exp, 1, 2, drop = 'selected') #[dat_dim] + intercept_obs <- array(tmp_obs_mean[1], dim = dim(exp)[-1]) #[dat_dim] + slope_obs <- array(tmp_obs_mean[2], dim = dim(exp)[-1]) #[dat_dim] trend_exp <- list() trend_obs <- list() @@ -187,8 +294,8 @@ source('/home/Earth/aho/s2dverification/R/Trend.R') trend_exp <- s2dverification:::.aperm2(trend_exp, c(len, 1:(len - 1))) trend_obs <- s2dverification:::.aperm2(trend_obs, c(len, 1:(len - 1))) - clim_obs_mean <- mean(apply(clim_obs, 1, mean)) #a number - clim_obs_mean <- array(clim_obs_mean, dim = dim(exp)) + clim_obs_mean <- mean(apply(clim_obs, 1, mean)) #average out dat_dim, get a number + clim_obs_mean <- array(clim_obs_mean, dim = dim(exp)) #enlarge it for the next line clim_exp <- trend_exp - trend_obs + clim_obs_mean @@ -209,9 +316,95 @@ source('/home/Earth/aho/s2dverification/R/Trend.R') } else if (method == 'NDV') { - #nose aun - } + # exp: [sdate, dat_dim, ftime] + # obs: [sdate, dat_dim, ftime] + + # obs clim + clim_obs <- apply(obs, which(names(dim(obs)) != time_dim), + mean, na.rm = na.rm) #[dat_dim, ftime] + + # exp clim + ##--- NEW Regression ---## +source('/home/Earth/aho/s2dverification/R/Regression.R') + pos_ftime <- length(dim(exp)) #a number + dim_ftime <- dim(exp)[pos_ftime] #c(ftime = 4) + pos_dat <- 2:(length(dim(exp)) - 1) #1 is sdate, last is ftime + dim_dat <- dim(exp)[pos_dat] #c(dataset = 1, member = 3) + + # Create initial data set (i.e., only first ftime) + tmp <- Subset(exp, ftime_dim, 1, drop = 'selected') + ini_exp <- InsertDim(tmp, pos_ftime, dim_ftime) #only first ftime + tmp <- Subset(obs, ftime_dim, 1, drop = 'selected') + ini_obs <- InsertDim(tmp, pos_ftime, dim_ftime) #only first ftime + #ini_: [sdate, dat_dim, ftime] + tmp_exp <- Regression(datay = exp, datax = ini_exp, time_dim = time_dim, + na.action = na.omit, + pval = FALSE, conf = FALSE)$regression + tmp_obs <- Regression(datay = obs, datax = ini_obs, time_dim = time_dim, + na.action = na.omit, + pval = FALSE, conf = FALSE)$regression + #tmp_: [stats = 2, dat_dim, ftime] + tmp_obs_mean <- apply(tmp_obs, c(1, length(dim(tmp_obs))), mean) #average out dat_dim (dat and member) + #tmp_obs_mean: [stats = 2, ftime] + ini_obs_mean <- apply(ini_obs, c(1, length(dim(ini_obs))), mean) #average out dat_dim + #ini_obs_mean: [sdate, ftime] + + # Find intercept and slope + intercept_exp <- Subset(tmp_exp, 1, 1, drop = 'selected') #[dat_dim, ftime] + slope_exp <- Subset(tmp_exp, 1, 2, drop = 'selected') #[dat_dim, ftime] + intercept_obs <- array(tmp_obs_mean[1, ], dim = c(dim_ftime, dim_dat)) + #[ftime, dat_dim] exp + intercept_obs <- s2dverification:::.aperm2(intercept_obs, c(2:length(dim(intercept_obs)), 1)) + #[dat_dim, ftime] exp + slope_obs <- array(tmp_obs_mean[2, ], dim = c(dim_ftime, dim_dat)) + #[ftime, dat_dim] exp + slope_obs <- s2dverification:::.aperm2(slope_obs, c(2:length(dim(slope_obs)), 1)) + #[dat_dim, ftime] exp + + trend_exp <- list() + trend_obs <- list() + for (jdate in 1:dim(exp)[time_dim]) { + tmp <- Subset(ini_exp, time_dim, jdate, drop = 'selected') #[dat_dim, ftime] + trend_exp[[jdate]] <- intercept_exp + tmp * slope_exp #[dat_dim, ftime] + tmp <- array(ini_obs_mean[jdate, ], dim = c(dim_ftime, dim_dat)) #[ftime, dat_dim] + tmp <- s2dverification:::.aperm2(tmp, c(2:length(dim(tmp)), 1)) #[dat_dim, ftime] + trend_obs[[jdate]] <- intercept_obs + tmp * slope_obs + } + # turn list into array + trend_exp <- array(unlist(trend_exp), dim = c(dim(exp)[-1], dim(exp)[1])) + trend_obs <- array(unlist(trend_obs), dim = c(dim(exp)[-1], dim(exp)[1])) + #trend_: [dat_dim, ftime, sdate] + len <- length(dim(exp)) + trend_exp <- s2dverification:::.aperm2(trend_exp, c(len, 1:(len - 1))) + trend_obs <- s2dverification:::.aperm2(trend_obs, c(len, 1:(len - 1))) + #trend_: [sdate, dat_dim, ftime] + + clim_obs_mean <- apply(clim_obs, length(dim(clim_obs)), mean) #average out dat_dim, [ftime] + clim_obs_mean <- array(clim_obs_mean, dim = c(dim_ftime, dim(exp)[1], dim_dat)) + #[ftime, sdate, dat_dim] + len <- length(dim(clim_obs_mean)) + clim_obs_mean <- s2dverification:::.aperm2(clim_obs_mean, c(2:len, 1)) + #[sdate, dat_dim, ftime] + + clim_exp <- trend_exp - trend_obs + clim_obs_mean + + ## member mean + if (!memb) { + pos <- which(names(dim(clim_exp)) == memb_dim) + pos <- c(1:length(dim(clim_exp)))[-pos] + dim_name <- names(dim(clim_exp)) + clim_exp <- apply(clim_exp, pos, mean, na.rm = TRUE) + clim_obs <- apply(clim_obs, pos, mean, na.rm = TRUE) + if (is.null(names(dim(as.array(clim_exp))))) { + clim_exp <- as.array(clim_exp) + clim_obs <- as.array(clim_obs) + names(dim(clim_exp)) <- dim_name[pos] + names(dim(clim_obs)) <- dim_name[pos] + } + } + + } - return(list(clim_exp = clim_exp, clim_obs = clim_obs)) + return(list(clim_exp = clim_exp, clim_obs = clim_obs)) } diff --git a/R/Regression.R b/R/Regression.R index bacc5454..605cd41b 100644 --- a/R/Regression.R +++ b/R/Regression.R @@ -1,142 +1,240 @@ -#'Computes The Regression Of An Array On Another Along A Dimension +#'Compute the regression of an array on another along a dimension. #' -#'Computes the regression of the input matrice vary on the input matrice varx -#'along the posREG dimension by least square fitting. Provides the slope of -#'the regression, the associated confidence interval, and the intercept.\cr -#'Provides also the vary data filtered out from the regression onto varx.\cr -#'The confidence interval relies on a student-T distribution. +#'Compute the regression of the array 'datay' on the array 'datax' along the +#''time_dim' dimension by least square fitting (default) or self-defined model. +#'The function provides the slope of the regression, the intercept, and the +#'associated p-value and confidence interval. The filtered datay from the +#'regression onto datax is also provided.\cr +#'The p-value relies on the F distribution, and the confidence interval relies +#'on the student-T distribution. #' -#'@param vary Array of any number of named dimensions. -#'@param varx Array of any number of named dimensions. -#'The same dimensions as vary are expected. -#'@param posREG The name of the dimension along which to compute the regression. -#'@param na.action a logical value indicating whether NA values should be stripped before the computation proceeds or a numeric value indicating the maximum number of NA values allowed to compute the regression. -#'@param formula an object of class "formula" (see function \code{link[stats]{lm}}). +#'@param datay An numeric array as predictand including the dimension along +#' which the regression is computed. +#'@param datax An numeric array as predictor. The dimension should be identical +#' as parameter 'datay'. +#'@param time_dim A character string indicating the dimension along which to +#' compute the trend. +#'@param na.action A function or an integer. A function (e.g., na.omit, +#' na.exclude, na.fail, na.pass) indicates what should happen when the data +#' contain NAs. A numeric indicates the maximum number of NA position (it +#' counts as long as one of datay and datax is NA) allowed for compute +#' regression. Set na.omit as default. +#'@param formula An object of class "formula" (see function \code{link[stats]{lm}}). +#'@param pval A logical value indicating whether to retrieve the p-value +#' or not. The default value is TRUE. +#'@param conf A logical value indicating whether to retrieve the confidence +#' intervals or not. The default value is TRUE. +#'@param conf.lev A numeric indicating the confidence level for the +#' regression computation. The default value is 0.95. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. Default value is NULL. #' #'@import multiApply #'@return -#' \item{$regression}{ -#' Array with same dimensions as varx and vary except along posREG -#' dimension which is replaced by a length 4 dimension, corresponding -#' to the lower limit of the 95\% confidence interval, the slope, -#' the upper limit of the 95\% confidence interval and the intercept. -#' } -#' \item{$filtered}{ -#' Same dimensions as vary filtered out from the regression onto varx -#' along the posREG dimension. -#' } +#'\item{$regression}{ +#' A numeric array with same dimensions as parameter 'datay' and 'datax' except +#' the 'time_dim' dimension, which is replaced by a 'stats' dimension containing +#' the regression coefficients from the lowest order (i.e., intercept) to +#' the highest degree. The length of the 'stats' dimension should be +#' \code{polydeg + 1}. +#'} +#'\item{$conf.lower}{ +#' A numeric array with same dimensions as parameter 'daty' and 'datax' except +#' the 'time_dim' dimension, which is replaced by a 'stats' dimension containing +#' the lower value of the \code{siglev}\% confidence interval for all +#' the regression coefficients with the same order as $regression. The length +#' of 'stats' dimension should be \code{polydeg + 1}. Only present if +#' \code{conf = TRUE}. +#'} +#'\item{$conf.upper}{ +#' A numeric array with same dimensions as parameter 'daty' and 'datax' except +#' the 'time_dim' dimension, which is replaced by a 'stats' dimension containing +#' the upper value of the \code{siglev}\% confidence interval for all +#' the regression coefficients with the same order as $regression. The length +#' of 'stats' dimension should be \code{polydeg + 1}. Only present if +#' \code{conf = TRUE}. +#'} +#'\item{$p.val}{ +#' A numeric array with same dimensions as parameter 'daty' and 'datax' except +#' the 'time_dim' dimension, the p-value. +#'} +#'\item{$filtered}{ +#' A numeric array with the same dimension as paramter 'datay' and 'datax', +#' the filtered datay from the regression onto datax along the 'time_dim' +#' dimension. +#'} #' #'@keywords datagen #'@author History:\cr #'0.1 - 2013-05 (V. Guemas, \email{virginie.guemas@ic3.cat}) - Original code\cr #'1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Formatting to CRAN #'2.0 - 2019-10 (N. Perez-Zanon, \email{nuria.perez@bsc.es}) - Formatting to multiApply +#'3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Formatting to multiApply #'@examples -#'vary <- 1:(2*3*4*5*6) -#'dim(vary) <- c(lat = 2, lon = 3, member = 4, time = 6, sdate = 5) -#'varx <- vary -#'res <- Regression(vary, varx, posREG = 'time') -#'str(res) -#'varx[c(1,5,7,9:22)] <- NA -#'res <- Regression(vary, varx, posREG = 'time', na.action = 2) -#'length(which(is.na(res$filtered))) -#'res <- Regression(vary, varx, posREG = 'time', na.action = 0) -#'length(which(is.na(res$filtered))) -#'vary[1] <- NA -#'res <- Regression(vary, varx, posREG = 'time') -#'res <- Regression(vary, varx, posREG = 'time', na.action = 5) -#'exp <- 1 : 70 -#'obs <- rnorm(70) -#'dim(exp) <- c(time = 70) -#'dim(obs) <- c(time = 70) -#'reg <- Regression(exp, obs, posREG = 'time') -#'reg <- Regression(exp, obs, posREG = 'time', na.action = 10) +#'# Load sample data as in Load() example: +#'example(Load) +#'datay <- sampleData$mod +#'datax <- sampleData$obs +#'datay <- Subset(datay, 'member', 2) +#'res1 <- Regression(datay, datax, formula = y~poly(x, 2, raw = TRUE)) +#'res2 <- Regression(datay, datax, conf.lev = 0.9) #' #'@importFrom stats lm na.omit confint -#' +#'@import multiApply #'@export -Regression <- function(vary, varx, posREG = 'member', na.action = na.omit, formula = y ~ x) { - if (is.vector(vary)) { - dim(vary) <- c(length(vary)) - if (is.character(posREG)) { - names(dim(vary)) <- posREG - } else { - if (is.vector(varx)) { - posREG <- 'Dim1' - } else if (length(dim(varx)) == 1) { - posREG <- names(dim(varx)) - } else { - stop("Parameter 'vary' and 'varx' must be arrays with named", - " dimensions.") - } - names(dim(vary)) <- posREG - } - } - if (is.vector(varx)) { - if (dim(vary)[posREG] == length(varx)) { - if (length(dim(vary)) == 1) { - dim(varx) <- c(length(varx)) - names(varx) <- posREG - } else { - varx <- rep(varx, prod(dim(vary)[-posREG])) - dim(varx) <- dim(vary) - } - } - } - #compare dimensions - dimsy <- dim(vary) - dimsx <- dim(varx) - if (length(dimsy) != length(dimsx)) { - stop("Parameter 'vary' and 'varx' must have the same ", - "dimensions.") +Regression <- function(datay, datax, time_dim = 'sdate', na.action = na.omit, + formula = y ~ x, pval = T, conf = T, + conf.lev = 0.95, ncores = NULL) { + + # Check inputs + ## datay and datax + if (is.null(datay) | is.null(datax)) { + stop("Parameter 'datay' and 'datax' cannot be NULL.") + } + if (!is.numeric(datay) | !is.numeric(datax)) { + stop("Parameter 'datay' and 'datax' must be a numeric array.") + } + if (is.null(dim(datay)) | is.null(dim(datax))) { + stop("Parameter 'datay' and 'datax' must be at least one dimension 'time_dim'.") + } + if(any(is.null(names(dim(datay))))| any(nchar(names(dim(datay))) == 0) | + any(is.null(names(dim(datax))))| any(nchar(names(dim(datax))) == 0)) { + stop("Parameter 'datay' and 'datax' must have dimension names.") + } + if(!all(names(dim(datay)) %in% names(dim(datax))) | + !all(names(dim(datax)) %in% names(dim(datay)))) { + stop("Parameter 'datay' and 'datax' must have same dimension name") + } + name_datay <- sort(names(dim(datay))) + name_datax <- sort(names(dim(datax))) + if(!all(dim(datay)[name_datay] == dim(datax)[name_datax])) { + stop("Parameter 'datay' and 'datax' must have same length of all dimensions.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(datay)) | !time_dim %in% names(dim(datax))) { + stop("Parameter 'time_dim' is not found in 'datay' or 'datax' dimension.") + } + ## na.action + if (!is.function(na.action) & !is.numeric(na.action)) { + stop(paste0("Parameter 'na.action' must be a function for NA values or ", + "a numeric indicating the number of NA values allowed ", + "before returning NA.")) + } + if (is.numeric(na.action)) { + if (any(na.action %% 1 != 0) | any(na.action < 0) | length(na.action) > 1) { + stop(paste0("Parameter 'na.action' must be a function for NA values or ", + "a numeric indicating the number of NA values allowed ", + "before returning NA.")) } - if (is.null(names(dimsy)) & is.null(names(dimsx))) { - names(dim(vary)) <- paste0("Dim", 1 : length(dimsy)) - names(dim(vary)) <- paste0("Dim", 1 : length(dimsy)) - if (is.character(posREG)) { - stop("Parameters 'vary' and 'varx' must have dimension names.") - } + } + ## formula + if (class(formula) != 'formula') { + stop("Parameter 'formula' must the an object of class 'formula'.") + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## conf + if (!is.logical(conf) | length(conf) > 1) { + stop("Parameter 'conf' must be one logical value.") + } + ##conf.lev + if (!is.numeric(conf.lev) | conf.lev < 0 | conf.lev > 1 | length(conf.lev) > 1) { + stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") } - - adjusted <- Apply(list(vary, varx), target_dims = posREG, fun = .Regression, - na.action = na.action, formula = formula, - output_dims = list(regression = 'coeff', filtered = posREG)) - invisible(list(regression = adjusted$regression, filtered = adjusted$filtered)) + } + + ############################### + # Calculate Regression + if (conf & pval) { + output_dims <- list(regression = 'stats', conf.lower = 'stats', + conf.upper = 'stats', p.val = NULL, filtered = time_dim) + } else if (conf & !pval) { + output_dims <- list(regression = 'stats', conf.lower = 'stats', + conf.upper = 'stats', filtered = time_dim) + } else if (!conf & pval) { + output_dims <- list(regression = 'stats', + p.val = NULL, filtered = time_dim) + } else if (!conf & !pval) { + output_dims <- list(regression = 'stats', filtered = time_dim) + } + + res <- Apply(list(datay, datax), + target_dims = time_dim, + output_dims = output_dims, + fun = .Regression, conf.lev = conf.lev, + na.action = na.action, formula = formula, + conf = conf, pval = pval, ncores = ncores) + + return(invisible(res)) } -.Regression <- function(y, x, na.action = na.omit, formula = y ~ x) { - if (!is.numeric(x) | !is.numeric(y)) { - stop("Parameter 'x' and 'y' must be numeric.") - } - if (!is.function(na.action) & !is.numeric(na.action)) { - warning("Parameter 'na.action' must be a function for dealing ", - "with NA values (e.g.: na.omit, na.exclude, na.fail, na.pass)", - " or a numeric value indicating the number of NA values ", - "allowed before returning an NA.") + + +.Regression <- function(y, x, na.action = na.omit, formula = y ~ x, conf.lev = 0.95, + conf = conf, pval = pval) { + NApos <- 1:length(x) + NApos[which(is.na(x) | is.na(y))] <- NA + filtered <- rep(NA, length(x)) + check_na <- FALSE + + if (is.numeric(na.action)) { + na_threshold <- na.action + na.action <- na.omit + check_na <- TRUE + } + + # remove NAs for potential poly() + x2 <- x[!is.na(NApos)] + y2 <- y[!is.na(NApos)] + lm.out <- lm(formula, data = data.frame(x = x2, y = y2), na.action = na.action) + coeff <- lm.out$coefficients + if (conf) { + conf.lower <- confint(lm.out, level = conf.lev)[, 1] + conf.upper <- confint(lm.out, level = conf.lev)[, 2] + } + if (pval) { + f <- summary(lm.out)$fstatistic + p.val <- pf(f[1], f[2], f[3],lower.tail = F) + } + filtered[!is.na(NApos)] <- y[!is.na(NApos)] - lm.out$fitted.values + + # Check if NA is too many + if (check_na) { + if (sum(is.na(NApos)) > na_threshold) { #turn everything into NA + coeff[which(!is.na(coeff))] <- NA + if (conf) { + conf.lower[which(!is.na(conf.lower))] <- NA + conf.upper[which(!is.na(conf.upper))] <- NA + } + if (pval) { + p.val[which(!is.na(p.val))] <- NA + } + filtered[which(!is.na(filtered))] <- NA } - if (is.numeric(na.action)) { - NApos <- 1 : length(x) - NApos[which(is.na(x))] <- NA - NApos[which(is.na(y))] <- NA - if (sum(is.na(NApos)) > na.action) { - filter <- rep(NA, length(x)) - regress <- rep(NA, 4) - } else { - adj <- lm(formula, data = data.frame(x = x, y = y), na.action = na.omit) - filter <- NApos - filter[!is.na(filter)] <- adj$fitted.values - regress <- c(confint(adj)[2, 1], adj$coefficients[2], - confint(adj)[2, 2], adj$coefficients[1]) - } - } else { - NApos <- 1 : length(x) - NApos[which(is.na(x))] <- NA - NApos[which(is.na(y))] <- NA - adj <- lm(formula, data = data.frame(x = x, y = y), na.action = na.action) - filter <- NApos - filter[!is.na(filter)] <- adj$fitted.values - regress <- c(confint(adj)[2, 1], adj$coefficients[2], - confint(adj)[2, 2], adj$coefficients[1]) - } + } + + if (conf & pval) { + return(list(regression = coeff, conf.lower = conf.lower, conf.upper = conf.upper, + p.val = p.val, filtered = filtered)) + } else if (conf & !pval) { + return(list(regression = coeff, conf.lower = conf.lower, conf.upper = conf.upper, + filtered = filtered)) + } else if (!conf & pval) { + return(list(regression = coeff, + p.val = p.val, filtered = filtered)) + } else if (!conf & !pval) { + return(list(regression = coeff, filtered = filtered)) + } - return(list(regression = regress, filtered = filter)) } + diff --git a/man/Clim.Rd b/man/Clim.Rd index 1ef5e3da..b96c2315 100644 --- a/man/Clim.Rd +++ b/man/Clim.Rd @@ -2,65 +2,90 @@ % Please edit documentation in R/Clim.R \name{Clim} \alias{Clim} -\title{Computes Bias Corrected Climatologies} +\title{Compute Bias Corrected Climatologies} \usage{ -Clim(var_exp, var_obs, memb = TRUE, kharin = FALSE, NDV = FALSE) +Clim(exp, obs, method = "clim", time_dim = "sdate", dat_dim = c("dataset", + "member"), ftime_dim = "ftime", memb_dim = "member", memb = TRUE, + na.rm = TRUE, ncores = NULL) } \arguments{ -\item{var_exp}{Model data: c(nmod/nexp, nmemb/nparam, nsdates, nltime) up to -c(nmod/nexp, nmemb/nparam, nsdates, nltime, nlevel, nlat, nlon).} +\item{exp}{A named numeric array of experimental data, with at least two +dimensions 'time_dim' and 'dat_dim'.} -\item{var_obs}{Observational data: c(nobs, nmemb, nsdates, nltime) up to -c(nobs, nmemb, nsdates, nltime, nlevel, nlat, nlon).} +\item{obs}{A named numeric array of observational data, same dimensions as +parameter 'exp' except along 'dat_dim'.} -\item{memb}{TRUE/FALSE (1 climatology for each member). Default = TRUE.} +\item{method}{A character string indicating the method to be used. The +options include 'clim', 'kharin', and 'NDV'. The default value is 'clim'.} -\item{kharin}{TRUE/FALSE (if Kharin method is applied or not). -Default = FALSE.} +\item{time_dim}{A character string indicating the name of dimension along +which the climatologies are computed. The default value is 'sdate'.} -\item{NDV}{TRUE/FALSE (if Fuckar method is applied or not). Default = FALSE.} +\item{dat_dim}{A character vector indicating the name of the dataset and +member dimensions. If data at one startdate (i.e., 'time_dim') are not +complete along 'dat_dim', this startdate along 'dat_dim' will be discarded. +The default value is "c('dataset', 'member')".} + +\item{ftime_dim}{A character string indicating the name of forecast time +dimension. Only used when method = 'NDV'. The default value is 'ftime'.} + +\item{memb_dim}{A character string indicating the name of the member +dimension. It must be one element in 'dat_dim'. The default value is 'member'.} + +\item{memb}{A logical value indicating whether to remain 'memb_dim' dimension +(TRUE) or do ensemble mean over 'memb_dim' (FALSE). The default value is TRUE.} + +\item{na.rm}{A logical value indicating whether to remove NA values along +'time_dim' when calculating climatology (TRUE) or return NA if there is NA +along 'time_dim' (FALSE). The default value is TRUE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} } \value{ -\item{clim_exp}{Array with same dimensions as var_exp except the third - (starting dates) and, depending on the parameters, the second (members), - which disappear.} -\item{clim_obs}{Array with same dimensions as var_obs except the third - (starting dates) and, depending on the parameters, the second (members), - which disappear.} +A list of 2: +\item{$clim_exp}{ + A numeric array with the same dimensions as parameter 'exp' and 'obs' but + dimension 'time_dim' is moved to the first position. If parameter 'method' + is 'clim', dimension 'time_dim' is removed. If parameter 'memb' is FALSE, + dimension 'memb_dim' is also removed. +} +\item{$clim_obs}{ + A numeric array with the same dimensions as parameter 'exp' and 'obs' + except dimension 'time_dim' is removed. If parameter 'memb' is FALSE, + dimension 'memb_dim' is also removed. +} } \description{ -This function computes only per-pair climatologies from the experimental -and observational matrices output from \code{Load()}. -To compute plain climatologies from only experimental or observational -data from \code{Load()}, the following code can be used:\cr -\code{clim <- array(apply(obs_data, c(1, 4, 5, 6), mean),}\cr -\code{ dim = dim(obs_datta)[-c(2, 3)])}\cr -The function \code{Clim()} computes per-pair climatologies using one of the -following methods: +This function computes per-pair climatologies for the experimental +and observational data using one of the following methods: \enumerate{ \item{per-pair method (Garcia-Serrano and Doblas-Reyes, CD, 2012)} \item{Kharin method (Karin et al, GRL, 2012)} \item{Fuckar method (Fuckar et al, GRL, 2014)} } -\code{Clim()} computes climatologies using the startdates covered by the -whole experiments/observational data sets. The startdates not available for -all the data (model and obs) are excluded when computing the climatologies. +Per-pair climatology means that only the startdates covered by the +whole experiments/observational dataset will be used. In other words, the +startdates which are not all available along 'dat_dim' dimension of both +the 'exp' and 'obs' are excluded when computing the climatologies. } \examples{ # Load sample data as in Load() example: example(Load) clim <- Clim(sampleData$mod, sampleData$obs) - \donttest{ +clim2 <- Clim(sampleData$mod, sampleData$obs, method = 'kharin', memb = F) +\donttest{ PlotClim(clim$clim_exp, clim$clim_obs, toptitle = paste('sea surface temperature climatologies'), ytitle = 'K', monini = 11, listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE, fileout = 'tos_clim.eps') - } +} } \author{ History:\cr 0.9 - 2011-03 (V. Guemas, \email{virginie.guemas@ic3.cat}) - Original code\cr 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Formatting to R CRAN + 3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Modify with multiApply feature } \keyword{datagen} diff --git a/man/Regression.Rd b/man/Regression.Rd index 0eb240b7..e0775c95 100644 --- a/man/Regression.Rd +++ b/man/Regression.Rd @@ -2,62 +2,92 @@ % Please edit documentation in R/Regression.R \name{Regression} \alias{Regression} -\title{Computes The Regression Of An Array On Another Along A Dimension} +\title{Compute the regression of an array on another along a dimension.} \usage{ -Regression(vary, varx, posREG = "member", na.action = na.omit, formula = y - ~ x) +Regression(datay, datax, time_dim = "sdate", na.action = na.omit, + formula = y ~ x, pval = T, conf = T, conf.lev = 0.95, ncores = NULL) } \arguments{ -\item{vary}{Array of any number of named dimensions.} +\item{datay}{An numeric array as predictand including the dimension along +which the regression is computed.} -\item{varx}{Array of any number of named dimensions. -The same dimensions as vary are expected.} +\item{datax}{An numeric array as predictor. The dimension should be identical +as parameter 'datay'.} -\item{posREG}{The name of the dimension along which to compute the regression.} +\item{time_dim}{A character string indicating the dimension along which to +compute the trend.} -\item{na.action}{a logical value indicating whether NA values should be stripped before the computation proceeds or a numeric value indicating the maximum number of NA values allowed to compute the regression.} +\item{na.action}{A function or an integer. A function (e.g., na.omit, +na.exclude, na.fail, na.pass) indicates what should happen when the data +contain NAs. A numeric indicates the maximum number of NA position (it +counts as long as one of datay and datax is NA) allowed for compute +regression. Set na.omit as default.} -\item{formula}{an object of class "formula" (see function \code{link[stats]{lm}}).} +\item{formula}{An object of class "formula" (see function \code{link[stats]{lm}}).} + +\item{pval}{A logical value indicating whether to retrieve the p-value +or not. The default value is TRUE.} + +\item{conf}{A logical value indicating whether to retrieve the confidence +intervals or not. The default value is TRUE.} + +\item{conf.lev}{A numeric indicating the confidence level for the +regression computation. The default value is 0.95.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. Default value is NULL.} } \value{ \item{$regression}{ - Array with same dimensions as varx and vary except along posREG - dimension which is replaced by a length 4 dimension, corresponding - to the lower limit of the 95\% confidence interval, the slope, - the upper limit of the 95\% confidence interval and the intercept. - } - \item{$filtered}{ - Same dimensions as vary filtered out from the regression onto varx - along the posREG dimension. - } + A numeric array with same dimensions as parameter 'datay' and 'datax' except + the 'time_dim' dimension, which is replaced by a 'stats' dimension containing + the regression coefficients from the lowest order (i.e., intercept) to + the highest degree. The length of the 'stats' dimension should be + \code{polydeg + 1}. +} +\item{$conf.lower}{ + A numeric array with same dimensions as parameter 'daty' and 'datax' except + the 'time_dim' dimension, which is replaced by a 'stats' dimension containing + the lower value of the \code{siglev}\% confidence interval for all + the regression coefficients with the same order as $regression. The length + of 'stats' dimension should be \code{polydeg + 1}. Only present if + \code{conf = TRUE}. +} +\item{$conf.upper}{ + A numeric array with same dimensions as parameter 'daty' and 'datax' except + the 'time_dim' dimension, which is replaced by a 'stats' dimension containing + the upper value of the \code{siglev}\% confidence interval for all + the regression coefficients with the same order as $regression. The length + of 'stats' dimension should be \code{polydeg + 1}. Only present if + \code{conf = TRUE}. +} +\item{$p.val}{ + A numeric array with same dimensions as parameter 'daty' and 'datax' except + the 'time_dim' dimension, the p-value. +} +\item{$filtered}{ + A numeric array with the same dimension as paramter 'datay' and 'datax', + the filtered datay from the regression onto datax along the 'time_dim' + dimension. +} } \description{ -Computes the regression of the input matrice vary on the input matrice varx -along the posREG dimension by least square fitting. Provides the slope of -the regression, the associated confidence interval, and the intercept.\cr -Provides also the vary data filtered out from the regression onto varx.\cr -The confidence interval relies on a student-T distribution. +Compute the regression of the array 'datay' on the array 'datax' along the +'time_dim' dimension by least square fitting (default) or self-defined model. +The function provides the slope of the regression, the intercept, and the +associated p-value and confidence interval. The filtered datay from the +regression onto datax is also provided.\cr +The p-value relies on the F distribution, and the confidence interval relies +on the student-T distribution. } \examples{ -vary <- 1:(2*3*4*5*6) -dim(vary) <- c(lat = 2, lon = 3, member = 4, time = 6, sdate = 5) -varx <- vary -res <- Regression(vary, varx, posREG = 'time') -str(res) -varx[c(1,5,7,9:22)] <- NA -res <- Regression(vary, varx, posREG = 'time', na.action = 2) -length(which(is.na(res$filtered))) -res <- Regression(vary, varx, posREG = 'time', na.action = 0) -length(which(is.na(res$filtered))) -vary[1] <- NA -res <- Regression(vary, varx, posREG = 'time') -res <- Regression(vary, varx, posREG = 'time', na.action = 5) -exp <- 1 : 70 -obs <- rnorm(70) -dim(exp) <- c(time = 70) -dim(obs) <- c(time = 70) -reg <- Regression(exp, obs, posREG = 'time') -reg <- Regression(exp, obs, posREG = 'time', na.action = 10) +# Load sample data as in Load() example: +example(Load) +datay <- sampleData$mod +datax <- sampleData$obs +datay <- Subset(datay, 'member', 2) +res1 <- Regression(datay, datax, formula = y~poly(x, 2, raw = TRUE)) +res2 <- Regression(datay, datax, conf.lev = 0.9) } \author{ @@ -65,6 +95,7 @@ History:\cr 0.1 - 2013-05 (V. Guemas, \email{virginie.guemas@ic3.cat}) - Original code\cr 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Formatting to CRAN 2.0 - 2019-10 (N. Perez-Zanon, \email{nuria.perez@bsc.es}) - Formatting to multiApply +3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Formatting to multiApply } \keyword{datagen} diff --git a/man/Trend.Rd b/man/Trend.Rd index 3b7f7bfd..ac7811f5 100644 --- a/man/Trend.Rd +++ b/man/Trend.Rd @@ -1,74 +1,72 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/Trend.R \name{Trend} -\alias{.Trend} \alias{Trend} -\title{Computes the Trend of the Ensemble Mean} +\title{Compute the Trend of the Ensemble Mean} \usage{ -Trend(var, posTR = 2, interval = 1, siglev = 0.95, conf = TRUE) - -.Trend(exp, interval = 1, siglev = 0.95, conf = TRUE) +Trend(data, time_dim = "sdate", interval = 1, siglev = 0.95, + conf = TRUE, polydeg = 1, ncores = NULL) } \arguments{ -\item{var}{An array of any number of dimensions up to 10.} +\item{data}{An numeric array including the dimension along which the trend +is computed.} + +\item{time_dim}{A character string indicating the dimension along which to +compute the trend.} -\item{posTR}{An integer indicating the position along which to compute the -trend.} +\item{interval}{A positive numeric indicating the unit length between two +points along 'time_dim' dimension. Set 1 as default.} -\item{interval}{A number of months/years between 2 points along posTR -dimension. Set 1 as default.} +\item{siglev}{A numeric indicating the confidence level for the +regression computation. Set 0.95 as default.} -\item{siglev}{A numeric value indicating the confidence level for the -computation of confidence interval. Set 0.95 as default.} +\item{conf}{A logical value indicating whether to retrieve the confidence +intervals or not. Set TRUE as default.} -\item{conf}{A logical value indicating whether to compute the confidence -levels or not. Set TRUE as default.} +\item{polydeg}{A positive integer indicating the degree of polynomial +regression. Set 1 as default.} -\item{exp}{An M by N matrix representing M forecasts from N ensemble members.} +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. Default value is NULL.} } \value{ \item{$trend}{ - The intercept and slope coefficients for the least squares fitting of the - trend. - An array with same dimensions as parameter 'var' except along the posTR - dimension, which is replaced by a length 4 (or length 2 if conf = FALSE) - dimension, corresponding to the lower limit of the confidence interval - (only present if conf = TRUE), the slope, the upper limit of the confidence - interval (only present if conf = TRUE), and the intercept. + A numeric array with same dimensions as parameter 'data' except the + 'time_dim' dimension, which is replaced by a 'stats' dimension containing + the regression coefficients from the lowest order (i.e., intercept) to + the highest degree. The length of the 'stats' dimension should be + \code{polydeg + 1}. } -\item{$detrended}{ - Same dimensions as var with linearly detrended var along the posTR - dimension. +\item{$conf.lower}{ + A numeric array with same dimensions as parameter 'data' except the + 'time_dim' dimension, which is replaced by a 'stats' dimension containing + the lower limit of the \code{siglev}\% confidence interval for all + the regression coefficients with the same order as $trend. The length of the + 'stats' dimension should be \code{polydeg + 1}. Only present if \code{conf = TRUE}. } -Only in .Trend: -\item{$conf.int}{ - Corresponding to the limits of the \code{siglev}\% confidence interval - (only present if \code{conf = TRUE}) for the slope coefficient. +\item{$conf.upper}{ + A numeric array with same dimensions as parameter 'data' except the + 'time_dim' dimension, which is replaced by a 'stats' dimension containing + the upper limit of the \code{siglev}\% confidence interval for all + the regression coefficients with the same order as $trend. The length of the + 'stats' dimension should be \code{polydeg + 1}. Only present if \code{conf = TRUE}. +} +\item{$detrended}{ + A numeric array with the same dimensions as paramter 'data', the detrended + values along the 'time_dim' dimension. } } \description{ -Computes the trend along the forecast time of the ensemble mean by least -square fitting, and the associated error interval.\cr -Trend() also provides the time series of the detrended ensemble mean -forecasts.\cr -The confidence interval relies on a student-T distribution.\cr\cr -.Trend provides the same functionality but taking a matrix ensemble members -as input (exp). +Compute the polynomial regression along the forecast time with any degree of +polynomial. It returns the regression coefficients (including the intercept) +and the confidence intervals if needed. The detrended array is also provided.\cr +The confidence interval relies on the student-T distribution.\cr\cr } \examples{ # Load sample data as in Load() example: example(Load) months_between_startdates <- 60 -trend <- Trend(sampleData$obs, 3, months_between_startdates) - \donttest{ -PlotVsLTime(trend$trend, toptitle = "trend", ytitle = "K / (5 year)", - monini = 11, limits = c(-1,1), listexp = c('CMIP5 IC3'), - listobs = c('ERSST'), biglab = FALSE, hlines = 0, - fileout = 'tos_obs_trend.eps') -PlotAno(trend$detrended, NULL, startDates, - toptitle = 'detrended anomalies (along the startdates)', ytitle = 'K', - legends = 'ERSST', biglab = FALSE, fileout = 'tos_detrended_obs.eps') - } +trend <- Trend(sampleData$obs, polydeg = 2) } \author{ @@ -76,6 +74,7 @@ History:\cr 0.1 - 2011-05 (V. Guemas, \email{virginie.guemas@ic3.cat}) - Original code\cr 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Formatting to CRAN\cr 2.0 - 2017-02 (A. Hunter, \email{alasdair.hunter@bsc.es}) - Adapt to veriApply() +3.0 - 2019-12 (A. Ho, \email{an.ho@bsc.es}) - Adapt to multiApply() } \keyword{datagen} -- GitLab From 187c52b832e54d3f2a03a5bdc1d1a02d1ffb8436 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 28 Jan 2020 17:22:13 +0100 Subject: [PATCH 08/10] Add Clim() unit test --- R/Clim.R | 13 ++- man/Clim.Rd | 4 +- tests/testthat/test-Clim.R | 194 +++++++++++++++++++++++++++++++++++++ 3 files changed, 207 insertions(+), 4 deletions(-) create mode 100644 tests/testthat/test-Clim.R diff --git a/R/Clim.R b/R/Clim.R index 45d73e47..8dced02b 100644 --- a/R/Clim.R +++ b/R/Clim.R @@ -39,13 +39,13 @@ #'@return #'A list of 2: #'\item{$clim_exp}{ -#' A numeric array with the same dimensions as parameter 'exp' and 'obs' but +#' A numeric array with the same dimensions as parameter 'exp' but #' dimension 'time_dim' is moved to the first position. If parameter 'method' #' is 'clim', dimension 'time_dim' is removed. If parameter 'memb' is FALSE, #' dimension 'memb_dim' is also removed. #'} #'\item{$clim_obs}{ -#' A numeric array with the same dimensions as parameter 'exp' and 'obs' +#' A numeric array with the same dimensions as parameter 'exp' #' except dimension 'time_dim' is removed. If parameter 'memb' is FALSE, #' dimension 'memb_dim' is also removed. #'} @@ -153,6 +153,15 @@ Clim <- function(exp, obs, method = 'clim', time_dim = 'sdate', "all dimension expect 'dat_dim'.")) } + ############################### + # Sort dimension + name_exp <- names(dim(exp)) + name_obs <- names(dim(obs)) + #order_exp <- match(name_exp, names(dim(exp))) + order_obs <- match(name_exp, name_obs) + #exp <- s2dverification:::.aperm2(exp, order_exp) + obs <- s2dverification:::.aperm2(obs, order_obs) + ############################### # Calculate Clim diff --git a/man/Clim.Rd b/man/Clim.Rd index b96c2315..95195707 100644 --- a/man/Clim.Rd +++ b/man/Clim.Rd @@ -45,13 +45,13 @@ computation. The default value is NULL.} \value{ A list of 2: \item{$clim_exp}{ - A numeric array with the same dimensions as parameter 'exp' and 'obs' but + A numeric array with the same dimensions as parameter 'exp' but dimension 'time_dim' is moved to the first position. If parameter 'method' is 'clim', dimension 'time_dim' is removed. If parameter 'memb' is FALSE, dimension 'memb_dim' is also removed. } \item{$clim_obs}{ - A numeric array with the same dimensions as parameter 'exp' and 'obs' + A numeric array with the same dimensions as parameter 'exp' except dimension 'time_dim' is removed. If parameter 'memb' is FALSE, dimension 'memb_dim' is also removed. } diff --git a/tests/testthat/test-Clim.R b/tests/testthat/test-Clim.R new file mode 100644 index 00000000..d9617a95 --- /dev/null +++ b/tests/testthat/test-Clim.R @@ -0,0 +1,194 @@ +context("s2dverification::Clim tests") + +############################################## + # dat1 + set.seed(1) + exp1 <- array(rnorm(360), dim = c(dataset = 1, member = 3, sdate = 5, + ftime = 3, lon = 2, lat = 4)) + set.seed(2) + obs1 <- array(rnorm(120), dim = c(dataset = 1, member = 1, + ftime = 3, lon = 2, lat = 4, sdate = 5)) + # dat2 + exp2 <- exp1 + set.seed(1) + na <- floor(runif(5, min = 1, max = 360)) + exp2[na] <- NA + obs2 <- obs1 + set.seed(2) + na <- floor(runif(30, min = 1, max = 120)) + obs2[na] <- NA + +############################################## +test_that("1. Input checks", { + + expect_error( + Clim(c(), c()), + "Parameter 'exp' and 'obs' cannot be NULL." + ) + expect_error( + Clim(c('b'), c('a')), + "Parameter 'exp' and 'obs' must be a numeric array." + ) + expect_error( + Clim(c(1:10), c(2:4)), + paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing time_dim and dat_dim.") + ) + expect_error( + Clim(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), + "Parameter 'exp' and 'obs' must have dimension names." + ) + expect_error( + Clim(array(1:10, dim = c(a = 2, c = 5)), array(1:4, dim = c(a = 2, b = 2))), + "Parameter 'exp' and 'obs' must have same dimension name" + ) + expect_error( + Clim(exp1, obs1, method = TRUE), + "Parameter 'method' must be one of 'clim', 'kharin' or 'NDV'." + ) + expect_error( + Clim(exp1, obs1, time_dim = c('sdate','ftime')), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + Clim(exp1, obs1, time_dim = 'asd'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + Clim(exp1, obs1, dat_dim = c(1,2)), + "Parameter 'dat_dim' must be a character vector." + ) + expect_error( + Clim(exp1, obs1, dat_dim = c('member', 'dat')), + "Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + Clim(exp1, obs1, ftime_dim = 4), + "Parameter 'ftime_dim' must be a character string." + ) + expect_error( + Clim(exp1, obs1, ftime_dim = 'f'), + "Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + Clim(exp1, obs1, memb_dim = c('dataset', 'member')), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + Clim(exp1, obs1, memb_dim = 'memb'), + "Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + Clim(exp1, obs1, memb = 'member'), + "Parameter 'memb' must be one logical value." + ) + expect_error( + Clim(exp1, obs1, na.rm = na.omit), + "Parameter 'na.rm' must be one logical value." + ) + expect_error( + Clim(exp1, obs1, ncores = T), + "Parameter 'ncores' must be a positive integer." + ) + expect_error( + Clim(array(1:10, dim = c(dataset = 2, member = 5, sdate = 4, ftime = 3)), + array(1:4, dim = c(dataset = 2, member = 2, sdate = 5, ftime = 3))), + paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension expect 'dat_dim'.") + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { + + expect_equal( + dim(Clim(exp1, obs1)$clim_exp), + c(dataset = 1, member = 3, ftime = 3, lon = 2, lat = 4) + ) + expect_equal( + dim(Clim(exp1, obs1, memb = FALSE)$clim_exp), + c(dataset = 1, ftime = 3, lon = 2, lat = 4) + ) + expect_equal( + dim(Clim(exp1, obs1, time_dim = 'lon')$clim_exp), + c(dataset = 1, member = 3, sdate = 5, ftime = 3, lat = 4) + ) + expect_equal( + dim(Clim(exp1, obs1, method = 'kharin')$clim_exp), + c(sdate = 5, dataset = 1, member = 3, ftime = 3, lon = 2, lat = 4) + ) + expect_equal( + dim(Clim(exp1, obs1, method = 'NDV')$clim_exp), + c(sdate = 5, dataset = 1, member = 3, ftime = 3, lon = 2, lat = 4) + ) + expect_equal( + dim(Clim(exp1, obs1)$clim_obs), + c(dataset = 1, member = 1, ftime = 3, lon = 2, lat = 4) + ) + expect_equal( + dim(Clim(exp1, obs1, method = 'kharin')$clim_obs), + c(dataset = 1, member = 1, ftime = 3, lon = 2, lat = 4) + ) + expect_equal( + dim(Clim(exp1, obs1, method = 'NDV')$clim_obs), + c(dataset = 1, member = 1, ftime = 3, lon = 2, lat = 4) + ) + expect_equal( + (Clim(exp1, obs1)$clim_obs)[1:5], + c(0.14831161, -0.60462627, 0.06609153, -0.23378059, 0.50553522), + tolerance = 0.001 + ) + expect_equal( + (Clim(exp1, obs1, memb = FALSE)$clim_exp)[1:5], + c(0.10084284, 0.06407350, 0.09028584, 0.17526332, 0.18004387), + tolerance = 0.001 + ) + expect_equal( + max(Clim(exp1, obs1)$clim_exp, na.rm = T), + 1.026186, + tolerance = 0.001 + ) + expect_equal( + max(Clim(exp1, obs1, method = 'kharin')$clim_exp, na.rm = T), + 2.282634, + tolerance = 0.001 + ) + expect_equal( + min(Clim(exp1, obs1, method = 'NDV')$clim_exp, na.rm = T), + -4.025745, + tolerance = 0.001 + ) + +}) + +############################################## +test_that("3. Output checks: dat2", { + + expect_equal( + (Clim(exp2, obs2)$clim_obs)[1:5], + c(0.23142987, -0.60462627, -0.03669491, -0.14193572, 0.61163024), + tolerance = 0.001 + ) + expect_equal( + (Clim(exp2, obs2)$clim_exp)[1:5], + c(0.01054951, -0.04744191, -0.03533071, 0.14149945, 0.02359945), + tolerance = 0.001 + ) + expect_equal( + length(which(is.na(Clim(exp2, obs2, na.rm = FALSE)$clim_exp))), + 57 + ) + expect_equal( + length(which(is.na(Clim(exp2, obs2, na.rm = FALSE)$clim_obs))), + 19 + ) + expect_equal( + length(which(is.na(Clim(exp2, obs2)$clim_obs))), + 0 + ) + +}) + +############################################## + -- GitLab From 788aa05bfad94e4c1bc549fc8ac84f24dc3ca273 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 28 Jan 2020 17:38:23 +0100 Subject: [PATCH 09/10] Delete confronted file test-Ano.R --- tests/testthat/test-Ano.R | 27 --------------------------- 1 file changed, 27 deletions(-) delete mode 100644 tests/testthat/test-Ano.R diff --git a/tests/testthat/test-Ano.R b/tests/testthat/test-Ano.R deleted file mode 100644 index aa3d2796..00000000 --- a/tests/testthat/test-Ano.R +++ /dev/null @@ -1,27 +0,0 @@ -context("Generic tests") -test_that("Sanity checks", { - - var <- array(rnorm(16), c(2, 2, 2, 2)) - names(dim(var)) <- c("memb", "lon", "sdates", "lat") - clim <- apply(var, c(1, 2, 4), mean) - names(dim(clim)) <- NULL - expect_error( - Ano(var, clim), - "Provide dimension names on parameter \'var\' and \'clim\' to avoid ambiguity." - ) - - t <- array(rnorm(76), c(1, 3, 4, 3, 2, 2)) - names(dim(t)) <- c("mod", "memb", "sdates", "ltime", "lon", "lat") - c3 <- Clim(t, t, memb = TRUE)$clim_exp # Clim for each member - c1 <- InsertDim(c3[, 1, ,, ], 1, 1) # clim as if memb=FALSE but identical to member 1 - names(dim(c1)) <- c("mod", "ltime", "lon", "lat") - identical(c1[, , , ], c3[, 1, , , ]) # results in TRUE - a3 <- Ano(t, c3) # ano for each member individually - a1 <- Ano(t, c1) # ano for first member - - expect_equal( - a1[, 1, , , , ], - a3[, 1, , , , ] - ) - -}) -- GitLab From 8e2b9c25a5e0b3fd9831f04beef28b4c4a65dcc5 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 30 Jan 2020 10:37:47 +0100 Subject: [PATCH 10/10] Small change in Clim() --- R/Clim.R | 35 ++++++++++++----------------------- man/Clim.Rd | 17 +++++++++-------- man/Regression.Rd | 1 - 3 files changed, 21 insertions(+), 32 deletions(-) diff --git a/R/Clim.R b/R/Clim.R index 8dced02b..2ed1ffe9 100644 --- a/R/Clim.R +++ b/R/Clim.R @@ -16,18 +16,19 @@ #' dimensions 'time_dim' and 'dat_dim'. #'@param obs A named numeric array of observational data, same dimensions as #' parameter 'exp' except along 'dat_dim'. -#'@param method A character string indicating the method to be used. The -#' options include 'clim', 'kharin', and 'NDV'. The default value is 'clim'. #'@param time_dim A character string indicating the name of dimension along #' which the climatologies are computed. The default value is 'sdate'. #'@param dat_dim A character vector indicating the name of the dataset and #' member dimensions. If data at one startdate (i.e., 'time_dim') are not #' complete along 'dat_dim', this startdate along 'dat_dim' will be discarded. #' The default value is "c('dataset', 'member')". +#'@param method A character string indicating the method to be used. The +#' options include 'clim', 'kharin', and 'NDV'. The default value is 'clim'. #'@param ftime_dim A character string indicating the name of forecast time #' dimension. Only used when method = 'NDV'. The default value is 'ftime'. #'@param memb_dim A character string indicating the name of the member -#' dimension. It must be one element in 'dat_dim'. The default value is 'member'. +#' dimension. Only used when parameter 'memb' is FALSE. It must be one element +#' in 'dat_dim'. The default value is 'member'. #'@param memb A logical value indicating whether to remain 'memb_dim' dimension #' (TRUE) or do ensemble mean over 'memb_dim' (FALSE). The default value is TRUE. #'@param na.rm A logical value indicating whether to remove NA values along @@ -54,7 +55,7 @@ #'@author History:\cr #' 0.9 - 2011-03 (V. Guemas, \email{virginie.guemas@ic3.cat}) - Original code\cr #' 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Formatting to R CRAN -#' 3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Modify with multiApply feature +#' 3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Adopt multiApply feature #'@examples #'# Load sample data as in Load() example: #'example(Load) @@ -69,10 +70,9 @@ #'@importFrom abind adrop #'@import multiApply #'@export -Clim <- function(exp, obs, method = 'clim', time_dim = 'sdate', - dat_dim = c('dataset', 'member'), - ftime_dim = 'ftime', memb_dim = 'member', memb = TRUE, - na.rm = TRUE, ncores = NULL) { +Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), + method = 'clim', ftime_dim = 'ftime', memb_dim = 'member', + memb = TRUE, na.rm = TRUE, ncores = NULL) { # Check inputs ## exp and obs (1) @@ -94,10 +94,6 @@ Clim <- function(exp, obs, method = 'clim', time_dim = 'sdate', !all(names(dim(obs)) %in% names(dim(exp)))) { stop("Parameter 'exp' and 'obs' must have same dimension name") } - ## method - if (!(method %in% c("clim", "kharin", "NDV"))) { - stop("Parameter 'method' must be one of 'clim', 'kharin' or 'NDV'.") - } ## time_dim if (!is.character(time_dim) | length(time_dim) > 1) { stop("Parameter 'time_dim' must be a character string.") @@ -112,6 +108,10 @@ Clim <- function(exp, obs, method = 'clim', time_dim = 'sdate', if (!all(dat_dim %in% names(dim(exp))) | !all(dat_dim %in% names(dim(obs)))) { stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") } + ## method + if (!(method %in% c("clim", "kharin", "NDV"))) { + stop("Parameter 'method' must be one of 'clim', 'kharin' or 'NDV'.") + } ## ftime_dim if (!is.character(ftime_dim) | length(ftime_dim) > 1) { stop("Parameter 'ftime_dim' must be a character string.") @@ -157,9 +157,7 @@ Clim <- function(exp, obs, method = 'clim', time_dim = 'sdate', # Sort dimension name_exp <- names(dim(exp)) name_obs <- names(dim(obs)) - #order_exp <- match(name_exp, names(dim(exp))) order_obs <- match(name_exp, name_obs) - #exp <- s2dverification:::.aperm2(exp, order_exp) obs <- s2dverification:::.aperm2(obs, order_obs) @@ -269,13 +267,7 @@ Clim <- function(exp, obs, method = 'clim', time_dim = 'sdate', mean, na.rm = na.rm) #[dat_dim] # exp clim - ##--- OLD trend ---## -# tmp_obs <- Trend(obs, posTR = 1)$trend -# tmp_exp <- Trend(exp, posTR = 1)$trend -#print(head(tmp_obs)) -#print(head(tmp_exp)) ##--- NEW trend ---## -source('/home/Earth/aho/s2dverification/R/Trend.R') tmp_obs <- Trend(data = obs, time_dim = time_dim, interval = 1, polydeg = 1, conf = FALSE)$trend tmp_exp <- Trend(data = exp, time_dim = time_dim, interval = 1, @@ -305,7 +297,6 @@ source('/home/Earth/aho/s2dverification/R/Trend.R') clim_obs_mean <- mean(apply(clim_obs, 1, mean)) #average out dat_dim, get a number clim_obs_mean <- array(clim_obs_mean, dim = dim(exp)) #enlarge it for the next line - clim_exp <- trend_exp - trend_obs + clim_obs_mean ## member mean @@ -333,8 +324,6 @@ source('/home/Earth/aho/s2dverification/R/Trend.R') mean, na.rm = na.rm) #[dat_dim, ftime] # exp clim - ##--- NEW Regression ---## -source('/home/Earth/aho/s2dverification/R/Regression.R') pos_ftime <- length(dim(exp)) #a number dim_ftime <- dim(exp)[pos_ftime] #c(ftime = 4) pos_dat <- 2:(length(dim(exp)) - 1) #1 is sdate, last is ftime diff --git a/man/Clim.Rd b/man/Clim.Rd index 95195707..b17b2ee5 100644 --- a/man/Clim.Rd +++ b/man/Clim.Rd @@ -4,9 +4,9 @@ \alias{Clim} \title{Compute Bias Corrected Climatologies} \usage{ -Clim(exp, obs, method = "clim", time_dim = "sdate", dat_dim = c("dataset", - "member"), ftime_dim = "ftime", memb_dim = "member", memb = TRUE, - na.rm = TRUE, ncores = NULL) +Clim(exp, obs, time_dim = "sdate", dat_dim = c("dataset", "member"), + method = "clim", ftime_dim = "ftime", memb_dim = "member", + memb = TRUE, na.rm = TRUE, ncores = NULL) } \arguments{ \item{exp}{A named numeric array of experimental data, with at least two @@ -15,9 +15,6 @@ dimensions 'time_dim' and 'dat_dim'.} \item{obs}{A named numeric array of observational data, same dimensions as parameter 'exp' except along 'dat_dim'.} -\item{method}{A character string indicating the method to be used. The -options include 'clim', 'kharin', and 'NDV'. The default value is 'clim'.} - \item{time_dim}{A character string indicating the name of dimension along which the climatologies are computed. The default value is 'sdate'.} @@ -26,11 +23,15 @@ member dimensions. If data at one startdate (i.e., 'time_dim') are not complete along 'dat_dim', this startdate along 'dat_dim' will be discarded. The default value is "c('dataset', 'member')".} +\item{method}{A character string indicating the method to be used. The +options include 'clim', 'kharin', and 'NDV'. The default value is 'clim'.} + \item{ftime_dim}{A character string indicating the name of forecast time dimension. Only used when method = 'NDV'. The default value is 'ftime'.} \item{memb_dim}{A character string indicating the name of the member -dimension. It must be one element in 'dat_dim'. The default value is 'member'.} +dimension. Only used when parameter 'memb' is FALSE. It must be one element +in 'dat_dim'. The default value is 'member'.} \item{memb}{A logical value indicating whether to remain 'memb_dim' dimension (TRUE) or do ensemble mean over 'memb_dim' (FALSE). The default value is TRUE.} @@ -85,7 +86,7 @@ PlotClim(clim$clim_exp, clim$clim_obs, History:\cr 0.9 - 2011-03 (V. Guemas, \email{virginie.guemas@ic3.cat}) - Original code\cr 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Formatting to R CRAN - 3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Modify with multiApply feature + 3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Adopt multiApply feature } \keyword{datagen} diff --git a/man/Regression.Rd b/man/Regression.Rd index aae7546b..f6a28a25 100644 --- a/man/Regression.Rd +++ b/man/Regression.Rd @@ -2,7 +2,6 @@ % Please edit documentation in R/Regression.R \name{Regression} \alias{Regression} - \title{Compute the regression of an array on another along one dimension.} \usage{ Regression(datay, datax, time_dim = "sdate", formula = y ~ x, pval = TRUE, -- GitLab