From d63cf7574f2052bf8688353d02ef02a047688e9b Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 25 Oct 2019 13:09:52 +0200 Subject: [PATCH 01/16] Function Regression using multiApply --- R/Regression.R | 214 +++++++++++++++++++--------------------------- man/Regression.Rd | 55 ++++-------- 2 files changed, 108 insertions(+), 161 deletions(-) diff --git a/R/Regression.R b/R/Regression.R index 2afd14b8..09e972b4 100644 --- a/R/Regression.R +++ b/R/Regression.R @@ -6,10 +6,12 @@ #'Provides also the vary data filtered out from the regression onto varx.\cr #'The confidence interval relies on a student-T distribution. #' -#'@param vary Array of any number of dimensions up to 10. -#'@param varx Array of any number of dimensions up to 10. -#' Same dimensions as vary. -#'@param posREG Position along which to compute the regression. +#'@param vary Array of any number of named dimensions. +#'@param varx Array of any number of named dimensions. +#'The same dimensions as vary are expected. +#'@param posREG The name of the dimension along which to compute the regression. +#'@param na.action a logical value indicating whether NA values should be stripped before the computation proceeds or a numeric value indicating the maximum number of NA values allowed to compute the regression. +#'@param formula an object of class "formula" (see function \code{link[stats]{lm}}). #' #'@return #' \item{$regression}{ @@ -27,131 +29,95 @@ #'@author History:\cr #'0.1 - 2013-05 (V. Guemas, \email{virginie.guemas@ic3.cat}) - Original code\cr #'1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Formatting to CRAN +#'2.0 - 2019-10 (N. Perez-Zanon, \email{nuria.perez@bsc.es}) - Formatting to multiApply #'@examples -#'# See examples on Load() to understand the first lines in this example -#' \dontrun{ -#'data_path <- system.file('sample_data', package = 's2dverification') -#'expA <- list(name = 'experiment', path = file.path(data_path, -#' 'model/$EXP_NAME$/$STORE_FREQ$_mean/$VAR_NAME$_3hourly', -#' '$VAR_NAME$_$START_DATE$.nc')) -#'obsX <- list(name = 'observation', path = file.path(data_path, -#' '$OBS_NAME$/$STORE_FREQ$_mean/$VAR_NAME$', -#' '$VAR_NAME$_$YEAR$$MONTH$.nc')) -#' -#'# Now we are ready to use Load(). -#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -#'sampleData <- Load('tos', list(expA), list(obsX), startDates, -#' output = 'lonlat', latmin = 27, latmax = 48, -#' lonmin = -12, lonmax = 40) -#' } -#' \dontshow{ -#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -#'sampleData <- s2dverification:::.LoadSampleData('tos', c('experiment'), -#' c('observation'), startDates, -#' output = 'lonlat', -#' latmin = 27, latmax = 48, -#' lonmin = -12, lonmax = 40) -#' } -#'sampleData$mod <- Season(sampleData$mod, 4, 11, 12, 2) -#'sampleData$obs <- Season(sampleData$obs, 4, 11, 12, 2) -#'reg <- Regression(Mean1Dim(sampleData$mod, 2), -#' Mean1Dim(sampleData$obs, 2), 2) -#'PlotEquiMap(reg$regression[1, 2, 1, , ], sampleData$lon, sampleData$lat, -#' toptitle='Regression of the prediction on the observations', -#' sizetit = 0.5) -#' +#'vary <- 1:(2*3*4*5*6) +#'dim(vary) <- c(lat = 2, lon = 3, member = 4, time = 6, sdate = 5) +#'varx <- vary +#'res <- Regression(vary, varx, posREG = 'time') +#'str(res) +#'varx <- 1:(2*3*4*5*6) +#'dim(vary) <- c(lat = 2, lon = 3, member = 4, time = 6, sdate = 5) + #'@importFrom stats lm na.omit confint #'@export -Regression <- function(vary, varx, posREG = 2) { - # - # Enlarge the size of varx and vary to 10 - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dimsvar <- dim(vary) - if (is.null(dimsvar)) { - dimsvar <- length(vary) - } - if (length(dimsvar) == 1) { - if (length(varx) != dimsvar[1]) { - stop("varx and vary should have the same dimensions") - } - } else { - for (jdim in 1:length(dimsvar)) { - if (dim(varx)[jdim] != dimsvar[jdim]) { - stop("varx and vary should have the same dimensions") - } +Regression <- function(vary, varx, posREG = 'member', na.action = na.omit, formula = y ~ x) { + if (is.vector(vary)) { + dim(vary) <- c(length(vary)) + if (is.character(posREG)) { + names(dim(vary)) <- posREG + } else { + if (is.vector(varx)) { + posREG <- 'Dim1' + } else if (length(dim(varx)) == 1) { + posREG <- names(dim(varx)) + } else { + stop("Parameter 'vary' and 'varx' must be arrays with named", + " dimensions.") + } + names(dim(vary)) <- posREG + } } - } - enlvarx <- Enlarge(varx, 10) - enlvary <- Enlarge(vary, 10) - outdim <- dim(enlvarx) - - # - # Initialize intermediate and output matrices - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - posaperm <- 1:10 - posaperm[1] <- posREG - posaperm[posREG] <- 1 - enlvarx <- aperm(enlvarx, posaperm) - enlvary <- aperm(enlvary, posaperm) - dimsaperm <- outdim[posaperm] - - enlfilt <- array(dim = dimsaperm) - filtered <- array(dim = dimsvar) - dimsaperm[1] <- 4 - enlreg <- array(dim = dimsaperm) - dimsvar[posREG] <- 4 - regression <- array(dim = dimsvar) - # - # Loop on all dimensions to compute regressions - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - for (j2 in 1:dimsaperm[2]){ - for (j3 in 1:dimsaperm[3]){ - for (j4 in 1:dimsaperm[4]){ - for (j5 in 1:dimsaperm[5]){ - for (j6 in 1:dimsaperm[6]){ - for (j7 in 1:dimsaperm[7]){ - for (j8 in 1:dimsaperm[8]){ - for (j9 in 1:dimsaperm[9]){ - for (j10 in 1:dimsaperm[10]){ - tmpy <- enlvary[, j2, j3, j4, j5, j6, j7, j8, j9, j10] - tmpx <- enlvarx[, j2, j3, j4, j5, j6, j7, j8, j9, j10] - if ((length(sort(tmpy)) > 0) & (length(sort(tmpx)) > 0)) { - lm.out <- lm(tmpy ~ tmpx, na.action = na.omit) - enlreg[1, j2, j3, j4, j5, j6, j7, j8, j9, - j10] <- confint(lm.out)[2, 1] - enlreg[2, j2, j3, j4, j5, j6, j7, j8, j9, - j10] <- lm.out$coefficients[2] - enlreg[3, j2, j3, j4, j5, j6, j7, j8, j9, - j10] <- confint(lm.out)[2, 2] - enlreg[4, j2, j3, j4, j5, j6, j7, j8, j9, - j10] <- lm.out$coefficients[1] - enlfilt[which(is.na(tmpx) == FALSE & is.na(tmpy) == FALSE), j2, - j3, j4, j5, j6, j7, j8, j9, j10] <- tmpy[which( - is.na(tmpx) == FALSE & is.na(tmpy - ) == FALSE)] - lm.out$fitted.values - } - } - } - } + if (is.vector(varx)) { + if (dim(vary)[posREG] == length(varx)) { + if (length(dim(vary)) == 1) { + dim(varx) <- c(length(varx)) + names(varx) <- posREG + } else { + varx <- rep(varx, prod(dim(vary)[-posREG])) + dim(varx) <- dim(vary) } - } } - } } - } - - # - # Back to the original dimensions - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - regression[] <- aperm(enlreg, posaperm) - filtered[] <- aperm(enlfilt, posaperm) - - # - # Outputs - # ~~~~~~~~~ - # - invisible(list(regression = regression, filtered = filtered)) + #compare dimensions + dimsy <- dim(vary) + dimsx <- dim(varx) + if (length(dimsy) != length(dimsx)) { + stop("Parameter 'vary' and 'varx' must have the same ", + "dimensions.") + } + if (is.null(names(dimsy)) & is.null(names(dimsx))) { + names(dim(vary)) <- paste0("Dim", 1 : length(dimsy)) + names(dim(vary)) <- paste0("Dim", 1 : length(dimsy)) + if (is.character(posREG)) { + stop("Parameters 'vary' and 'varx' must have dimension names.") + } + } + + adjusted <- Apply(list(vary, varx), target_dims = posREG, fun = .Regression, + na.action = na.action, formula = formula, + output_dims = list(regression = 'coeff', filtered = 'val')) + invisible(list(regression = adjusted$regression, filtered = adjusted$filtered)) +} +.Regression <- function(y, x, na.action = na.omit, formula = y ~ x) { + if (!is.numeric(x) | !is.numeric(y)) { + stop("Parameter 'x' and 'y' must be numeric.") + } + if (!is.function(na.action) & !is.numeric(na.action)) { + warning("Parameter 'na.action' must be a function for dealing ", + "with NA values (e.g.: na.omit, na.exclude, na.fail, na.pass)", + " or a numeric value indicating the number of NA values ", + "allowed before returning an NA.") + } + if (is.numeric(na.action)) { + NApos <- 1 : length(x) + NApos[which(is.na(x))] <- NA + NApos[which(is.na(y))] <- NA + if (sum(is.na(NApos)) > na.action) { + filter <- rep(NA, length(x)) + regress <- rep(NA, 4) + } else { + adj <- lm(formula, na.action = na.action) + filter <- NApos + filter[!is.na(filter)] <- adj$fitted.values + regress <- c(confint(adj)[2, 1], adj$coefficients[2], + confint(adj)[2, 2], adj$coefficients[1]) + } + } else { + adj <- lm(formula, data = data.frame(x = x, y = y), na.action = na.action) + filter <- adj$fitted.values + regress <- c(confint(adj)[2, 1], adj$coefficients[2], + confint(adj)[2, 2], adj$coefficients[1]) + } + return(list(regression = regress, filtered = filter)) } diff --git a/man/Regression.Rd b/man/Regression.Rd index 55646576..dd107159 100644 --- a/man/Regression.Rd +++ b/man/Regression.Rd @@ -4,15 +4,20 @@ \alias{Regression} \title{Computes The Regression Of An Array On Another Along A Dimension} \usage{ -Regression(vary, varx, posREG = 2) +Regression(vary, varx, posREG = "member", na.action = na.omit, formula = y + ~ x) } \arguments{ -\item{vary}{Array of any number of dimensions up to 10.} +\item{vary}{Array of any number of named dimensions.} -\item{varx}{Array of any number of dimensions up to 10. -Same dimensions as vary.} +\item{varx}{Array of any number of named dimensions. +The same dimensions as vary are expected.} -\item{posREG}{Position along which to compute the regression.} +\item{posREG}{The name of the dimension along which to compute the regression.} + +\item{na.action}{a logical value indicating whether NA values should be stripped before the computation proceeds or a numeric value indicating the maximum number of NA values allowed to compute the regression.} + +\item{formula}{an object of class "formula" (see function \code{link[stats]{lm}}).} } \value{ \item{$regression}{ @@ -34,43 +39,19 @@ Provides also the vary data filtered out from the regression onto varx.\cr The confidence interval relies on a student-T distribution. } \examples{ -# See examples on Load() to understand the first lines in this example - \dontrun{ -data_path <- system.file('sample_data', package = 's2dverification') -expA <- list(name = 'experiment', path = file.path(data_path, - 'model/$EXP_NAME$/$STORE_FREQ$_mean/$VAR_NAME$_3hourly', - '$VAR_NAME$_$START_DATE$.nc')) -obsX <- list(name = 'observation', path = file.path(data_path, - '$OBS_NAME$/$STORE_FREQ$_mean/$VAR_NAME$', - '$VAR_NAME$_$YEAR$$MONTH$.nc')) - -# Now we are ready to use Load(). -startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -sampleData <- Load('tos', list(expA), list(obsX), startDates, - output = 'lonlat', latmin = 27, latmax = 48, - lonmin = -12, lonmax = 40) - } - \dontshow{ -startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -sampleData <- s2dverification:::.LoadSampleData('tos', c('experiment'), - c('observation'), startDates, - output = 'lonlat', - latmin = 27, latmax = 48, - lonmin = -12, lonmax = 40) - } -sampleData$mod <- Season(sampleData$mod, 4, 11, 12, 2) -sampleData$obs <- Season(sampleData$obs, 4, 11, 12, 2) -reg <- Regression(Mean1Dim(sampleData$mod, 2), - Mean1Dim(sampleData$obs, 2), 2) -PlotEquiMap(reg$regression[1, 2, 1, , ], sampleData$lon, sampleData$lat, - toptitle='Regression of the prediction on the observations', - sizetit = 0.5) - +vary <- 1:(2*3*4*5*6) +dim(vary) <- c(lat = 2, lon = 3, member = 4, time = 6, sdate = 5) +varx <- vary +res <- Regression(vary, varx, posREG = 'time') +str(res) +varx <- 1:(2*3*4*5*6) +dim(vary) <- c(lat = 2, lon = 3, member = 4, time = 6, sdate = 5) } \author{ History:\cr 0.1 - 2013-05 (V. Guemas, \email{virginie.guemas@ic3.cat}) - Original code\cr 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Formatting to CRAN +2.0 - 2019-10 (N. Perez-Zanon, \email{nuria.perez@bsc.es}) - Formatting to multiApply } \keyword{datagen} -- GitLab From 4ded226aeac9b7ca6cd8ed1e6e570ef5e87e1098 Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 25 Oct 2019 13:17:52 +0200 Subject: [PATCH 02/16] Adding import multiApply --- NAMESPACE | 1 + R/Regression.R | 1 + 2 files changed, 2 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 4e855789..00c59bfa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -90,6 +90,7 @@ import(graphics) import(mapproj) import(maps) import(methods) +import(multiApply) import(ncdf4) import(parallel) import(plyr) diff --git a/R/Regression.R b/R/Regression.R index 09e972b4..134cb6b0 100644 --- a/R/Regression.R +++ b/R/Regression.R @@ -13,6 +13,7 @@ #'@param na.action a logical value indicating whether NA values should be stripped before the computation proceeds or a numeric value indicating the maximum number of NA values allowed to compute the regression. #'@param formula an object of class "formula" (see function \code{link[stats]{lm}}). #' +#'@import multiApply #'@return #' \item{$regression}{ #' Array with same dimensions as varx and vary except along posREG -- GitLab From 67273a93f19db8bfb4ca6fac463e3a279fb0d847 Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 25 Oct 2019 14:20:41 +0200 Subject: [PATCH 03/16] Import in description file --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 0045265c..85b2eb8b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -43,6 +43,7 @@ Depends: Imports: abind, bigmemory, + multiApply, GEOmap, geomapdata, mapproj, -- GitLab From a6ba581bf6d67f48afde95fdc4c5a3b182113c91 Mon Sep 17 00:00:00 2001 From: nperez Date: Mon, 28 Oct 2019 18:06:46 +0100 Subject: [PATCH 04/16] correction na.action not numeric --- R/Regression.R | 16 ++++++++++++---- man/Regression.Rd | 4 ++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/R/Regression.R b/R/Regression.R index 134cb6b0..e197ba5c 100644 --- a/R/Regression.R +++ b/R/Regression.R @@ -37,9 +37,8 @@ #'varx <- vary #'res <- Regression(vary, varx, posREG = 'time') #'str(res) -#'varx <- 1:(2*3*4*5*6) -#'dim(vary) <- c(lat = 2, lon = 3, member = 4, time = 6, sdate = 5) - +#'vary[1] <- NA +#'res <- Regression(vary, varx, posREG = 'time') #'@importFrom stats lm na.omit confint #'@export Regression <- function(vary, varx, posREG = 'member', na.action = na.omit, formula = y ~ x) { @@ -90,6 +89,10 @@ Regression <- function(vary, varx, posREG = 'member', na.action = na.omit, formu output_dims = list(regression = 'coeff', filtered = 'val')) invisible(list(regression = adjusted$regression, filtered = adjusted$filtered)) } +y <- 1: 10 +x <- 1:10 +y[1] <- NA + .Regression <- function(y, x, na.action = na.omit, formula = y ~ x) { if (!is.numeric(x) | !is.numeric(y)) { stop("Parameter 'x' and 'y' must be numeric.") @@ -115,10 +118,15 @@ Regression <- function(vary, varx, posREG = 'member', na.action = na.omit, formu confint(adj)[2, 2], adj$coefficients[1]) } } else { + NApos <- 1 : length(x) + NApos[which(is.na(x))] <- NA + NApos[which(is.na(y))] <- NA adj <- lm(formula, data = data.frame(x = x, y = y), na.action = na.action) - filter <- adj$fitted.values + filter <- NApos + filter[!is.na(filter)] <- adj$fitted.values regress <- c(confint(adj)[2, 1], adj$coefficients[2], confint(adj)[2, 2], adj$coefficients[1]) } + return(list(regression = regress, filtered = filter)) } diff --git a/man/Regression.Rd b/man/Regression.Rd index dd107159..adc005e4 100644 --- a/man/Regression.Rd +++ b/man/Regression.Rd @@ -44,8 +44,8 @@ dim(vary) <- c(lat = 2, lon = 3, member = 4, time = 6, sdate = 5) varx <- vary res <- Regression(vary, varx, posREG = 'time') str(res) -varx <- 1:(2*3*4*5*6) -dim(vary) <- c(lat = 2, lon = 3, member = 4, time = 6, sdate = 5) +vary[1] <- NA +res <- Regression(vary, varx, posREG = 'time') } \author{ History:\cr -- GitLab From 40a27a19bbb6666bb06d3dbfd66714ff5481f729 Mon Sep 17 00:00:00 2001 From: nperez Date: Mon, 28 Oct 2019 18:09:23 +0100 Subject: [PATCH 05/16] remove unnecessary lines --- R/Regression.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/R/Regression.R b/R/Regression.R index e197ba5c..48d5551e 100644 --- a/R/Regression.R +++ b/R/Regression.R @@ -89,10 +89,6 @@ Regression <- function(vary, varx, posREG = 'member', na.action = na.omit, formu output_dims = list(regression = 'coeff', filtered = 'val')) invisible(list(regression = adjusted$regression, filtered = adjusted$filtered)) } -y <- 1: 10 -x <- 1:10 -y[1] <- NA - .Regression <- function(y, x, na.action = na.omit, formula = y ~ x) { if (!is.numeric(x) | !is.numeric(y)) { stop("Parameter 'x' and 'y' must be numeric.") -- GitLab From 12fa419fe9c90d6e59763de12b12217b6405a8e0 Mon Sep 17 00:00:00 2001 From: nperez Date: Mon, 28 Oct 2019 18:37:04 +0100 Subject: [PATCH 06/16] Example working in arrays of 1 Dimension --- R/Regression.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/Regression.R b/R/Regression.R index 48d5551e..f639e9b8 100644 --- a/R/Regression.R +++ b/R/Regression.R @@ -39,6 +39,11 @@ #'str(res) #'vary[1] <- NA #'res <- Regression(vary, varx, posREG = 'time') +#'exp <- 1 : 70 +#'obs <- rnorm(70) +#'dim(exp) <- c(time = 70) +#'dim(obs) <- c(time = 70) +#'reg <- Regression(exp, obs, posREG = 'time') #'@importFrom stats lm na.omit confint #'@export Regression <- function(vary, varx, posREG = 'member', na.action = na.omit, formula = y ~ x) { -- GitLab From 6f39076f75c3f6715368a1ea23604560cb60a85c Mon Sep 17 00:00:00 2001 From: nperez Date: Mon, 28 Oct 2019 18:47:50 +0100 Subject: [PATCH 07/16] na.action is set as na.omit when a thershold is selected and not reached --- R/Regression.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/Regression.R b/R/Regression.R index f639e9b8..43ed91b9 100644 --- a/R/Regression.R +++ b/R/Regression.R @@ -44,7 +44,10 @@ #'dim(exp) <- c(time = 70) #'dim(obs) <- c(time = 70) #'reg <- Regression(exp, obs, posREG = 'time') +#'reg <- Regression(exp, obs, posREG = 'time', na.action = 10) +#' #'@importFrom stats lm na.omit confint +#' #'@export Regression <- function(vary, varx, posREG = 'member', na.action = na.omit, formula = y ~ x) { if (is.vector(vary)) { @@ -112,7 +115,7 @@ Regression <- function(vary, varx, posREG = 'member', na.action = na.omit, formu filter <- rep(NA, length(x)) regress <- rep(NA, 4) } else { - adj <- lm(formula, na.action = na.action) + adj <- lm(formula, na.action = na.omit) filter <- NApos filter[!is.na(filter)] <- adj$fitted.values regress <- c(confint(adj)[2, 1], adj$coefficients[2], -- GitLab From 842ed1bbc979de1f0e3a3b22d0f1d35aba5099b5 Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 29 Oct 2019 10:29:00 +0100 Subject: [PATCH 08/16] more examples --- R/Regression.R | 9 ++++++++- man/Regression.Rd | 13 +++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/R/Regression.R b/R/Regression.R index 43ed91b9..f897d5d9 100644 --- a/R/Regression.R +++ b/R/Regression.R @@ -37,8 +37,14 @@ #'varx <- vary #'res <- Regression(vary, varx, posREG = 'time') #'str(res) +#'varx[c(1,5,7,9:22)] <- NA +#'res <- Regression(vary, varx, posREG = 'time', na.action = 2) +#'length(which(is.na(res$filtered))) +#'res <- Regression(vary, varx, posREG = 'time', na.action = 0) +#'length(which(is.na(res$filtered))) #'vary[1] <- NA #'res <- Regression(vary, varx, posREG = 'time') +#'res <- Regression(vary, varx, posREG = 'time', na.action = 5) #'exp <- 1 : 70 #'obs <- rnorm(70) #'dim(exp) <- c(time = 70) @@ -94,10 +100,11 @@ Regression <- function(vary, varx, posREG = 'member', na.action = na.omit, formu adjusted <- Apply(list(vary, varx), target_dims = posREG, fun = .Regression, na.action = na.action, formula = formula, - output_dims = list(regression = 'coeff', filtered = 'val')) + output_dims = list(regression = 'coeff', filtered = 'time')) invisible(list(regression = adjusted$regression, filtered = adjusted$filtered)) } .Regression <- function(y, x, na.action = na.omit, formula = y ~ x) { +print(x) if (!is.numeric(x) | !is.numeric(y)) { stop("Parameter 'x' and 'y' must be numeric.") } diff --git a/man/Regression.Rd b/man/Regression.Rd index adc005e4..0eb240b7 100644 --- a/man/Regression.Rd +++ b/man/Regression.Rd @@ -44,8 +44,21 @@ dim(vary) <- c(lat = 2, lon = 3, member = 4, time = 6, sdate = 5) varx <- vary res <- Regression(vary, varx, posREG = 'time') str(res) +varx[c(1,5,7,9:22)] <- NA +res <- Regression(vary, varx, posREG = 'time', na.action = 2) +length(which(is.na(res$filtered))) +res <- Regression(vary, varx, posREG = 'time', na.action = 0) +length(which(is.na(res$filtered))) vary[1] <- NA res <- Regression(vary, varx, posREG = 'time') +res <- Regression(vary, varx, posREG = 'time', na.action = 5) +exp <- 1 : 70 +obs <- rnorm(70) +dim(exp) <- c(time = 70) +dim(obs) <- c(time = 70) +reg <- Regression(exp, obs, posREG = 'time') +reg <- Regression(exp, obs, posREG = 'time', na.action = 10) + } \author{ History:\cr -- GitLab From a67a2f03b960200e4065796f6f4d9fa8018c2026 Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 29 Oct 2019 10:31:18 +0100 Subject: [PATCH 09/16] Change name dimension output --- R/Regression.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/Regression.R b/R/Regression.R index f897d5d9..f708f277 100644 --- a/R/Regression.R +++ b/R/Regression.R @@ -100,11 +100,10 @@ Regression <- function(vary, varx, posREG = 'member', na.action = na.omit, formu adjusted <- Apply(list(vary, varx), target_dims = posREG, fun = .Regression, na.action = na.action, formula = formula, - output_dims = list(regression = 'coeff', filtered = 'time')) + output_dims = list(regression = 'coeff', filtered = posREG)) invisible(list(regression = adjusted$regression, filtered = adjusted$filtered)) } .Regression <- function(y, x, na.action = na.omit, formula = y ~ x) { -print(x) if (!is.numeric(x) | !is.numeric(y)) { stop("Parameter 'x' and 'y' must be numeric.") } -- GitLab From 81c57517da57978fdc799e7a33b936bee1957619 Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 29 Oct 2019 11:27:27 +0100 Subject: [PATCH 10/16] added parameter data to lm function --- R/Regression.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Regression.R b/R/Regression.R index f708f277..bacc5454 100644 --- a/R/Regression.R +++ b/R/Regression.R @@ -121,7 +121,7 @@ Regression <- function(vary, varx, posREG = 'member', na.action = na.omit, formu filter <- rep(NA, length(x)) regress <- rep(NA, 4) } else { - adj <- lm(formula, na.action = na.omit) + adj <- lm(formula, data = data.frame(x = x, y = y), na.action = na.omit) filter <- NApos filter[!is.na(filter)] <- adj$fitted.values regress <- c(confint(adj)[2, 1], adj$coefficients[2], -- GitLab From 2ba48eddd2afb7e03c32e86d26756ebc08bd0ae9 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 24 Jan 2020 11:51:09 +0100 Subject: [PATCH 11/16] Complete Regression()+Apply() --- DESCRIPTION | 3 +- R/Regression.R | 290 +++++++++++++++++++++++++++++----------------- man/Regression.Rd | 79 ++++++++----- 3 files changed, 237 insertions(+), 135 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 85b2eb8b..3be1e882 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -51,7 +51,8 @@ Imports: ncdf4, parallel, plyr, - SpecsVerification (>= 0.5.0) + SpecsVerification (>= 0.5.0), + multiApply (>= 2.0.0) Suggests: easyVerification, testthat diff --git a/R/Regression.R b/R/Regression.R index bacc5454..fe4647b5 100644 --- a/R/Regression.R +++ b/R/Regression.R @@ -1,29 +1,45 @@ -#'Computes The Regression Of An Array On Another Along A Dimension +#'Compute the regression of an array on another along a dimension. #' -#'Computes the regression of the input matrice vary on the input matrice varx -#'along the posREG dimension by least square fitting. Provides the slope of -#'the regression, the associated confidence interval, and the intercept.\cr -#'Provides also the vary data filtered out from the regression onto varx.\cr -#'The confidence interval relies on a student-T distribution. +#'Compute the regression of the array 'datay' on the array 'datax' along the +#''time_dim' dimension by least square fitting (default) or self-defined model. +#'The function provides the slope of the regression, the intercept, and the +#'associated p-value and confidence interval. The filtered datay from the +#'regression onto datax is also provided.\cr +#'The p-value relies on the F distribution, and the confidence interval relies +#'on the student-T distribution. #' -#'@param vary Array of any number of named dimensions. -#'@param varx Array of any number of named dimensions. -#'The same dimensions as vary are expected. -#'@param posREG The name of the dimension along which to compute the regression. -#'@param na.action a logical value indicating whether NA values should be stripped before the computation proceeds or a numeric value indicating the maximum number of NA values allowed to compute the regression. -#'@param formula an object of class "formula" (see function \code{link[stats]{lm}}). +#'@param datay An numeric array as predictand including the dimension along +#' which the regression is computed. +#'@param datax An numeric array as predictor. The dimension should be identical +#' as parameter 'datay'. +#'@param time_dim A character string indicating the dimension along which to +#' compute the trend. +#'@param na.action A function or an integer. A function (e.g., na.omit, +#' na.exclude, na.fail, na.pass) indicates what should happen when the data +#' contain NAs. A numeric indicates the maximum number of NA position (it +#' counts as long as one of datay and datax is NA) allowed for compute +#' regression. Set na.omit as default. +#'@param formula An object of class "formula" (see function \code{link[stats]{lm}}). +#'@param pval A logical value indicating whether to retrieve the p-value +#' or not. The default value is TRUE. +#'@param conf A logical value indicating whether to retrieve the confidence +#' intervals or not. The default value is TRUE. +#'@param conf.lev A numeric indicating the confidence level for the +#' regression computation. The default value is 0.95. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. Default value is NULL. #' #'@import multiApply #'@return #' \item{$regression}{ -#' Array with same dimensions as varx and vary except along posREG +#' Array with same dimensions as datax and datay except along time_dim #' dimension which is replaced by a length 4 dimension, corresponding #' to the lower limit of the 95\% confidence interval, the slope, #' the upper limit of the 95\% confidence interval and the intercept. #' } #' \item{$filtered}{ -#' Same dimensions as vary filtered out from the regression onto varx -#' along the posREG dimension. +#' Same dimensions as datay filtered out from the regression onto datax +#' along the time_dim dimension. #' } #' #'@keywords datagen @@ -31,112 +47,176 @@ #'0.1 - 2013-05 (V. Guemas, \email{virginie.guemas@ic3.cat}) - Original code\cr #'1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Formatting to CRAN #'2.0 - 2019-10 (N. Perez-Zanon, \email{nuria.perez@bsc.es}) - Formatting to multiApply +#'3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Formatting to multiApply #'@examples -#'vary <- 1:(2*3*4*5*6) -#'dim(vary) <- c(lat = 2, lon = 3, member = 4, time = 6, sdate = 5) -#'varx <- vary -#'res <- Regression(vary, varx, posREG = 'time') +#'datay <- 1:(2*3*4*5*6) +#'dim(datay) <- c(lat = 2, lon = 3, member = 4, time = 6, sdate = 5) +#'datax <- datay +#'res <- Regression(datay, datax, time_dim = 'time') #'str(res) -#'varx[c(1,5,7,9:22)] <- NA -#'res <- Regression(vary, varx, posREG = 'time', na.action = 2) +#'datax[c(1,5,7,9:22)] <- NA +#'res <- Regression(datay, datax, time_dim = 'time', na.action = 2) #'length(which(is.na(res$filtered))) -#'res <- Regression(vary, varx, posREG = 'time', na.action = 0) +#'res <- Regression(datay, datax, time_dim = 'time', na.action = 0) #'length(which(is.na(res$filtered))) -#'vary[1] <- NA -#'res <- Regression(vary, varx, posREG = 'time') -#'res <- Regression(vary, varx, posREG = 'time', na.action = 5) +#'datay[1] <- NA +#'res <- Regression(datay, datax, time_dim = 'time') +#'res <- Regression(datay, datax, time_dim = 'time', na.action = 5) #'exp <- 1 : 70 #'obs <- rnorm(70) #'dim(exp) <- c(time = 70) #'dim(obs) <- c(time = 70) -#'reg <- Regression(exp, obs, posREG = 'time') -#'reg <- Regression(exp, obs, posREG = 'time', na.action = 10) +#'reg <- Regression(exp, obs, time_dim = 'time') +#'reg <- Regression(exp, obs, time_dim = 'time', na.action = 10) #' #'@importFrom stats lm na.omit confint #' #'@export -Regression <- function(vary, varx, posREG = 'member', na.action = na.omit, formula = y ~ x) { - if (is.vector(vary)) { - dim(vary) <- c(length(vary)) - if (is.character(posREG)) { - names(dim(vary)) <- posREG - } else { - if (is.vector(varx)) { - posREG <- 'Dim1' - } else if (length(dim(varx)) == 1) { - posREG <- names(dim(varx)) - } else { - stop("Parameter 'vary' and 'varx' must be arrays with named", - " dimensions.") - } - names(dim(vary)) <- posREG - } - } - if (is.vector(varx)) { - if (dim(vary)[posREG] == length(varx)) { - if (length(dim(vary)) == 1) { - dim(varx) <- c(length(varx)) - names(varx) <- posREG - } else { - varx <- rep(varx, prod(dim(vary)[-posREG])) - dim(varx) <- dim(vary) - } - } - } - #compare dimensions - dimsy <- dim(vary) - dimsx <- dim(varx) - if (length(dimsy) != length(dimsx)) { - stop("Parameter 'vary' and 'varx' must have the same ", - "dimensions.") +Regression <- function(datay, datax, time_dim = 'sdate', na.action = na.omit, + formula = y ~ x, pval = T, conf = T, + conf.lev = 0.95, ncores = NULL) { + + # Check inputs + ## datay and datax + if (is.null(datay) | is.null(datax)) { + stop("Parameter 'datay' and 'datax' cannot be NULL.") + } + if (!is.numeric(datay) | !is.numeric(datax)) { + stop("Parameter 'datay' and 'datax' must be a numeric array.") + } + if (is.null(dim(datay)) | is.null(dim(datax))) { + stop("Parameter 'datay' and 'datax' must be at least one dimension 'time_dim'.") + } + if(any(is.null(names(dim(datay))))| any(nchar(names(dim(datay))) == 0) | + any(is.null(names(dim(datax))))| any(nchar(names(dim(datax))) == 0)) { + stop("Parameter 'datay' and 'datax' must have dimension names.") + } + if(!all(names(dim(datay)) %in% names(dim(datax))) | + !all(names(dim(datax)) %in% names(dim(datay)))) { + stop("Parameter 'datay' and 'datax' must have same dimension name") + } + name_datay <- sort(names(dim(datay))) + name_datax <- sort(names(dim(datax))) + if(!all(dim(datay)[name_datay] == dim(datax)[name_datax])) { + stop("Parameter 'datay' and 'datax' must have same length of all dimensions.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(datay)) | !time_dim %in% names(dim(datax))) { + stop("Parameter 'time_dim' is not found in 'datay' or 'datax' dimension.") + } + ## na.action + if (!is.function(na.action) & !is.numeric(na.action)) { + stop(paste0("Parameter 'na.action' must be a function for NA values ", + "(e.g.,: na.omit, na.exclude, na.fail, na.pass) or a numeric", + "indicating the number of NA values allowed before returning NA.")) + } + if (is.numeric(na.action)) { + if (na.action %% 1 != 0 | na.action < 0 | length(na.action) > 1) { + stop(paste0("Parameter 'na.action' must be a function for NA values ", + "(e.g.,: na.omit, na.exclude, na.fail, na.pass) or a numeric", + "indicating the number of NA values allowed before returning NA.")) } - if (is.null(names(dimsy)) & is.null(names(dimsx))) { - names(dim(vary)) <- paste0("Dim", 1 : length(dimsy)) - names(dim(vary)) <- paste0("Dim", 1 : length(dimsy)) - if (is.character(posREG)) { - stop("Parameters 'vary' and 'varx' must have dimension names.") - } + } + ## formula + if (class(formula) != 'formula') { + stop("Parameter 'formula' must the an object of class 'formula'.") + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## conf + if (!is.logical(conf) | length(conf) > 1) { + stop("Parameter 'conf' must be one logical value.") + } + ##conf.lev + if (!is.numeric(conf.lev) | conf.lev < 0 | conf.lev > 1 | length(conf.lev) > 1) { + stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") } + } + + ############################### + # Calculate Regression - adjusted <- Apply(list(vary, varx), target_dims = posREG, fun = .Regression, - na.action = na.action, formula = formula, - output_dims = list(regression = 'coeff', filtered = posREG)) - invisible(list(regression = adjusted$regression, filtered = adjusted$filtered)) + res <- Apply(list(datay, datax), + target_dims = time_dim, + output_dims = list(regression = 'stats', + conf.lower = 'stats', + conf.upper = 'stats', + p.val = NULL, + filtered = time_dim), + fun = .Regression, conf.lev = conf.lev, + na.action = na.action, formula = formula, + conf = conf, pval = pval, ncores = ncores) + + return(invisible(res)) } -.Regression <- function(y, x, na.action = na.omit, formula = y ~ x) { - if (!is.numeric(x) | !is.numeric(y)) { - stop("Parameter 'x' and 'y' must be numeric.") - } - if (!is.function(na.action) & !is.numeric(na.action)) { - warning("Parameter 'na.action' must be a function for dealing ", - "with NA values (e.g.: na.omit, na.exclude, na.fail, na.pass)", - " or a numeric value indicating the number of NA values ", - "allowed before returning an NA.") + + +.Regression <- function(y, x, na.action = na.omit, formula = y ~ x, conf.lev = 0.95, + conf = conf, pval = pval) { + NApos <- 1:length(x) + NApos[which(is.na(x) | is.na(y))] <- NA + filtered <- rep(NA, length(x)) + check_na <- FALSE + + if (is.numeric(na.action)) { + na_threshold <- na.action + na.action <- na.omit + check_na <- TRUE + } + + # remove NAs for potential poly() + x2 <- x[!is.na(NApos)] + y2 <- y[!is.na(NApos)] + + lm.out <- lm(formula, data = data.frame(x = x2, y = y2), na.action = na.action) + coeff <- lm.out$coefficients + if (conf) { + conf.lower <- confint(lm.out, level = conf.lev)[, 1] + conf.upper <- confint(lm.out, level = conf.lev)[, 2] + } + if (pval) { + f <- summary(lm.out)$fstatistic + p.val <- pf(f[1], f[2], f[3],lower.tail = F) + } + filtered[!is.na(NApos)] <- y[!is.na(NApos)] - lm.out$fitted.values + + # Check if NA is too many + if (check_na) { + if (sum(is.na(NApos)) > na_threshold) { #turn everything into NA + coeff[which(!is.na(coeff))] <- NA + if (conf) { + conf.lower[which(!is.na(conf.lower))] <- NA + conf.upper[which(!is.na(conf.upper))] <- NA + } + if (pval) { + p.val[which(!is.na(p.val))] <- NA + } + filtered[which(!is.na(filtered))] <- NA } - if (is.numeric(na.action)) { - NApos <- 1 : length(x) - NApos[which(is.na(x))] <- NA - NApos[which(is.na(y))] <- NA - if (sum(is.na(NApos)) > na.action) { - filter <- rep(NA, length(x)) - regress <- rep(NA, 4) - } else { - adj <- lm(formula, data = data.frame(x = x, y = y), na.action = na.omit) - filter <- NApos - filter[!is.na(filter)] <- adj$fitted.values - regress <- c(confint(adj)[2, 1], adj$coefficients[2], - confint(adj)[2, 2], adj$coefficients[1]) - } - } else { - NApos <- 1 : length(x) - NApos[which(is.na(x))] <- NA - NApos[which(is.na(y))] <- NA - adj <- lm(formula, data = data.frame(x = x, y = y), na.action = na.action) - filter <- NApos - filter[!is.na(filter)] <- adj$fitted.values - regress <- c(confint(adj)[2, 1], adj$coefficients[2], - confint(adj)[2, 2], adj$coefficients[1]) - } + } + + if (conf & pval) { + return(list(regression = coeff, conf.lower = conf.lower, conf.upper = conf.upper, + p.val = p.val, filtered = filtered)) + } else if (conf & !pval) { + return(list(regression = coeff, conf.lower = conf.lower, conf.upper = conf.upper, + filtered = filtered)) + } else if (!conf & pval) { + return(list(regression = coeff, + p.val = p.val, filtered = filtered)) + } else if (!conf & !pval) { + return(list(regression = coeff, filtered = filtered)) + } - return(list(regression = regress, filtered = filter)) } diff --git a/man/Regression.Rd b/man/Regression.Rd index 0eb240b7..02e0db4a 100644 --- a/man/Regression.Rd +++ b/man/Regression.Rd @@ -2,62 +2,82 @@ % Please edit documentation in R/Regression.R \name{Regression} \alias{Regression} -\title{Computes The Regression Of An Array On Another Along A Dimension} +\title{Compute the regression of an array on another along a dimension.} \usage{ -Regression(vary, varx, posREG = "member", na.action = na.omit, formula = y - ~ x) +Regression(datay, datax, time_dim = "sdate", na.action = na.omit, + formula = y ~ x, pval = T, conf = T, conf.lev = 0.95, ncores = NULL) } \arguments{ -\item{vary}{Array of any number of named dimensions.} +\item{datay}{An numeric array as predictand including the dimension along +which the regression is computed.} -\item{varx}{Array of any number of named dimensions. -The same dimensions as vary are expected.} +\item{datax}{An numeric array as predictor. The dimension should be identical +as parameter 'datay'.} -\item{posREG}{The name of the dimension along which to compute the regression.} +\item{time_dim}{A character string indicating the dimension along which to +compute the trend.} -\item{na.action}{a logical value indicating whether NA values should be stripped before the computation proceeds or a numeric value indicating the maximum number of NA values allowed to compute the regression.} +\item{na.action}{A function or an integer. A function (e.g., na.omit, +na.exclude, na.fail, na.pass) indicates what should happen when the data +contain NAs. A numeric indicates the maximum number of NA position (it +counts as long as one of datay and datax is NA) allowed for compute +regression. Set na.omit as default.} -\item{formula}{an object of class "formula" (see function \code{link[stats]{lm}}).} +\item{formula}{An object of class "formula" (see function \code{link[stats]{lm}}).} + +\item{pval}{A logical value indicating whether to retrieve the p-value +or not. The default value is TRUE.} + +\item{conf}{A logical value indicating whether to retrieve the confidence +intervals or not. The default value is TRUE.} + +\item{conf.lev}{A numeric indicating the confidence level for the +regression computation. The default value is 0.95.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. Default value is NULL.} } \value{ \item{$regression}{ - Array with same dimensions as varx and vary except along posREG + Array with same dimensions as datax and datay except along time_dim dimension which is replaced by a length 4 dimension, corresponding to the lower limit of the 95\% confidence interval, the slope, the upper limit of the 95\% confidence interval and the intercept. } \item{$filtered}{ - Same dimensions as vary filtered out from the regression onto varx - along the posREG dimension. + Same dimensions as datay filtered out from the regression onto datax + along the time_dim dimension. } } \description{ -Computes the regression of the input matrice vary on the input matrice varx -along the posREG dimension by least square fitting. Provides the slope of -the regression, the associated confidence interval, and the intercept.\cr -Provides also the vary data filtered out from the regression onto varx.\cr -The confidence interval relies on a student-T distribution. +Compute the regression of the array 'datay' on the array 'datax' along the +'time_dim' dimension by least square fitting (default) or self-defined model. +The function provides the slope of the regression, the intercept, and the +associated p-value and confidence interval. The filtered datay from the +regression onto datax is also provided.\cr +The p-value relies on the F distribution, and the confidence interval relies +on the student-T distribution. } \examples{ -vary <- 1:(2*3*4*5*6) -dim(vary) <- c(lat = 2, lon = 3, member = 4, time = 6, sdate = 5) -varx <- vary -res <- Regression(vary, varx, posREG = 'time') +datay <- 1:(2*3*4*5*6) +dim(datay) <- c(lat = 2, lon = 3, member = 4, time = 6, sdate = 5) +datax <- datay +res <- Regression(datay, datax, time_dim = 'time') str(res) -varx[c(1,5,7,9:22)] <- NA -res <- Regression(vary, varx, posREG = 'time', na.action = 2) +datax[c(1,5,7,9:22)] <- NA +res <- Regression(datay, datax, time_dim = 'time', na.action = 2) length(which(is.na(res$filtered))) -res <- Regression(vary, varx, posREG = 'time', na.action = 0) +res <- Regression(datay, datax, time_dim = 'time', na.action = 0) length(which(is.na(res$filtered))) -vary[1] <- NA -res <- Regression(vary, varx, posREG = 'time') -res <- Regression(vary, varx, posREG = 'time', na.action = 5) +datay[1] <- NA +res <- Regression(datay, datax, time_dim = 'time') +res <- Regression(datay, datax, time_dim = 'time', na.action = 5) exp <- 1 : 70 obs <- rnorm(70) dim(exp) <- c(time = 70) dim(obs) <- c(time = 70) -reg <- Regression(exp, obs, posREG = 'time') -reg <- Regression(exp, obs, posREG = 'time', na.action = 10) +reg <- Regression(exp, obs, time_dim = 'time') +reg <- Regression(exp, obs, time_dim = 'time', na.action = 10) } \author{ @@ -65,6 +85,7 @@ History:\cr 0.1 - 2013-05 (V. Guemas, \email{virginie.guemas@ic3.cat}) - Original code\cr 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Formatting to CRAN 2.0 - 2019-10 (N. Perez-Zanon, \email{nuria.perez@bsc.es}) - Formatting to multiApply +3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Formatting to multiApply } \keyword{datagen} -- GitLab From 1753e42786d31959ccd824fc60f0e96d822dfd4e Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 24 Jan 2020 14:55:29 +0100 Subject: [PATCH 12/16] Add Regression() unit test --- R/Regression.R | 34 ++++--- tests/testthat/test-Regression.R | 162 +++++++++++++++++++++++++++++++ 2 files changed, 183 insertions(+), 13 deletions(-) create mode 100644 tests/testthat/test-Regression.R diff --git a/R/Regression.R b/R/Regression.R index fe4647b5..b8ecef54 100644 --- a/R/Regression.R +++ b/R/Regression.R @@ -109,15 +109,15 @@ Regression <- function(datay, datax, time_dim = 'sdate', na.action = na.omit, } ## na.action if (!is.function(na.action) & !is.numeric(na.action)) { - stop(paste0("Parameter 'na.action' must be a function for NA values ", - "(e.g.,: na.omit, na.exclude, na.fail, na.pass) or a numeric", - "indicating the number of NA values allowed before returning NA.")) + stop(paste0("Parameter 'na.action' must be a function for NA values or ", + "a numeric indicating the number of NA values allowed ", + "before returning NA.")) } if (is.numeric(na.action)) { - if (na.action %% 1 != 0 | na.action < 0 | length(na.action) > 1) { - stop(paste0("Parameter 'na.action' must be a function for NA values ", - "(e.g.,: na.omit, na.exclude, na.fail, na.pass) or a numeric", - "indicating the number of NA values allowed before returning NA.")) + if (any(na.action %% 1 != 0) | any(na.action < 0) | length(na.action) > 1) { + stop(paste0("Parameter 'na.action' must be a function for NA values or ", + "a numeric indicating the number of NA values allowed ", + "before returning NA.")) } } ## formula @@ -146,14 +146,22 @@ Regression <- function(datay, datax, time_dim = 'sdate', na.action = na.omit, ############################### # Calculate Regression - + if (conf & pval) { + output_dims <- list(regression = 'stats', conf.lower = 'stats', + conf.upper = 'stats', p.val = NULL, filtered = time_dim) + } else if (conf & !pval) { + output_dims <- list(regression = 'stats', conf.lower = 'stats', + conf.upper = 'stats', filtered = time_dim) + } else if (!conf & pval) { + output_dims <- list(regression = 'stats', + p.val = NULL, filtered = time_dim) + } else if (!conf & !pval) { + output_dims <- list(regression = 'stats', filtered = time_dim) + } + res <- Apply(list(datay, datax), target_dims = time_dim, - output_dims = list(regression = 'stats', - conf.lower = 'stats', - conf.upper = 'stats', - p.val = NULL, - filtered = time_dim), + output_dims = output_dims, fun = .Regression, conf.lev = conf.lev, na.action = na.action, formula = formula, conf = conf, pval = pval, ncores = ncores) diff --git a/tests/testthat/test-Regression.R b/tests/testthat/test-Regression.R new file mode 100644 index 00000000..82af262e --- /dev/null +++ b/tests/testthat/test-Regression.R @@ -0,0 +1,162 @@ +context("s2dverification::Regression tests") + +############################################## + # dat1 + set.seed(1) + datay1 <- array(c(-39:40) + rnorm(80), + dim = c(sdate = 5, ftime = 2, lon = 2, lat = 4)) + set.seed(2) + datax1 <- array(c(1:80) + rnorm(80), + dim = c(sdate = 5, ftime = 2, lon = 2, lat = 4)) + # dat2 + datay2 <- datay1 + set.seed(1) + na <- floor(runif(5, min = 1, max = 80)) + datay2[na] <- NA + datax2 <- datax1 + set.seed(2) + na <- floor(runif(5, min = 1, max = 80)) + datax2[na] <- NA + +############################################## +test_that("1. Input checks", { + + expect_error( + Regression(c(), c()), + "Parameter 'datay' and 'datax' cannot be NULL." + ) + expect_error( + Regression(c('b'), c('a')), + "Parameter 'datay' and 'datax' must be a numeric array." + ) + expect_error( + Regression(c(1:10), c(2:4)), + "Parameter 'datay' and 'datax' must be at least one dimension 'time_dim'." + ) + expect_error( + Regression(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), + "Parameter 'datay' and 'datax' must have dimension names." + ) + expect_error( + Regression(array(1:10, dim = c(a = 2, c = 5)), array(1:4, dim = c(a = 2, b = 2))), + "Parameter 'datay' and 'datax' must have same dimension name" + ) + expect_error( + Regression(array(1:10, dim = c(a = 2, b = 5)), array(1:4, dim = c(a = 2, b = 2))), + "Parameter 'datay' and 'datax' must have same length of all dimensions." + ) + expect_error( + Regression(datay1, datax1, time_dim = 1), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + Regression(datay1, datax1, time_dim = 'asd'), + "Parameter 'time_dim' is not found in 'datay' or 'datax' dimension." + ) + expect_error( + Regression(datay1, datax1, na.action = TRUE), + paste0("Parameter 'na.action' must be a function for NA values or ", + "a numeric indicating the number of NA values allowed ", + "before returning NA.") + ) + expect_error( + Regression(datay1, datax1, na.action = c(1,2)), + paste0("Parameter 'na.action' must be a function for NA values or ", + "a numeric indicating the number of NA values allowed ", + "before returning NA.") + ) + expect_error( + Regression(datay1, datax1, formula =T), + "Parameter 'formula' must the an object of class 'formula'." + ) + expect_error( + Regression(datay1, datax1, pval = 0.05), + "Parameter 'pval' must be one logical value." + ) + expect_error( + Regression(datay1, datax1, conf = 0.05), + "Parameter 'conf' must be one logical value." + ) + expect_error( + Regression(datay1, datax1, conf.lev = 1.5), + "Parameter 'conf.lev' must be a numeric number between 0 and 1." + ) + expect_error( + Regression(datay1, datax1, ncores = T), + "Parameter 'ncores' must be a positive integer." + ) +}) + + +############################################## +test_that("2. Output checks: dat1", { + + expect_equal( + dim(Regression(datay1, datax1)$regression), + c(stats = 2, ftime = 2, lon = 2, lat = 4) + ) + expect_equal( + head(Regression(datay1, datax1)$regression), + c(-39.0091480, 0.7290814, -39.1853129, 0.8623175, -37.4342099, 0.7844530), + tolerance = 0.001 + ) + expect_equal( + length(which(is.na(Regression(datay1, datax1)$conf.low))), + 0 + ) + expect_equal( + max(Regression(datay1, datax1)$conf.upper, na.rm = T), + 127.4267, + tolerance = 0.001 + ) + expect_equal( + length(Regression(datay1, datax1, conf = F)), + 3 + ) + expect_equal( + length(Regression(datay1, datax1, pval = F)), + 4 + ) + expect_equal( + length(Regression(datay1, datax1, pval = F, conf = F)), + 2 + ) + expect_equal( + range(Regression(datay1, datax1, conf.lev = 0.99)$conf.low, na.rm = T), + c(-380.888744, 0.220794), + tolerance = 0.001 + ) + expect_equal( + summary(Regression(datay1, datax1)$p.val)[1], + c(Min. = 0.005335), + tolerance = 0.0001 + ) + +}) + +############################################## +test_that("3. Output checks: dat2", { + expect_equal( + length(which(is.na(Regression(datay2, datax2, na.action = 0)$conf.lower))), + 14 + ) + expect_equal( + length(which(is.na(Regression(datay2, datax2, na.action = 2)$conf.lower))), + 0 + ) + expect_equal( + length(which(is.na(Regression(datay2, datax2, na.action = 1)$p.val))), + 2 + ) + expect_equal( + length(which(is.na(Regression(datay2, datax2, na.action = na.pass)$p.val))), + 0 + ) + expect_equal( + which(is.na(Regression(datay2, datax2, na.action = 1)$p.val)), + c(3,15) + ) +}) + +############################################## + -- GitLab From 19440e775c1280d30ed5fa4df3e38951cdfdf731 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 24 Jan 2020 17:40:31 +0100 Subject: [PATCH 13/16] Modify example in Regression() and add unit tests --- R/Regression.R | 69 ++++++++++++++++++-------------- man/Regression.Rd | 64 ++++++++++++++++------------- tests/testthat/test-Regression.R | 11 ++++- 3 files changed, 84 insertions(+), 60 deletions(-) diff --git a/R/Regression.R b/R/Regression.R index b8ecef54..644e043e 100644 --- a/R/Regression.R +++ b/R/Regression.R @@ -31,16 +31,38 @@ #' #'@import multiApply #'@return -#' \item{$regression}{ -#' Array with same dimensions as datax and datay except along time_dim -#' dimension which is replaced by a length 4 dimension, corresponding -#' to the lower limit of the 95\% confidence interval, the slope, -#' the upper limit of the 95\% confidence interval and the intercept. -#' } -#' \item{$filtered}{ -#' Same dimensions as datay filtered out from the regression onto datax -#' along the time_dim dimension. -#' } +#'\item{$regression}{ +#' A numeric array with same dimensions as parameter 'datay' and 'datax' except +#' the 'time_dim' dimension, which is replaced by a 'stats' dimension containing +#' the regression coefficients from the lowest order (i.e., intercept) to +#' the highest degree. The length of the 'stats' dimension should be +#' \code{polydeg + 1}. +#'} +#'\item{$conf.lower}{ +#' A numeric array with same dimensions as parameter 'daty' and 'datax' except +#' the 'time_dim' dimension, which is replaced by a 'stats' dimension containing +#' the lower value of the \code{siglev}\% confidence interval for all +#' the regression coefficients with the same order as $regression. The length +#' of 'stats' dimension should be \code{polydeg + 1}. Only present if +#' \code{conf = TRUE}. +#'} +#'\item{$conf.upper}{ +#' A numeric array with same dimensions as parameter 'daty' and 'datax' except +#' the 'time_dim' dimension, which is replaced by a 'stats' dimension containing +#' the upper value of the \code{siglev}\% confidence interval for all +#' the regression coefficients with the same order as $regression. The length +#' of 'stats' dimension should be \code{polydeg + 1}. Only present if +#' \code{conf = TRUE}. +#'} +#'\item{$p.val}{ +#' A numeric array with same dimensions as parameter 'daty' and 'datax' except +#' the 'time_dim' dimension, the p-value. +#'} +#'\item{$filtered}{ +#' A numeric array with the same dimension as paramter 'datay' and 'datax', +#' the filtered datay from the regression onto datax along the 'time_dim' +#' dimension. +#'} #' #'@keywords datagen #'@author History:\cr @@ -49,28 +71,14 @@ #'2.0 - 2019-10 (N. Perez-Zanon, \email{nuria.perez@bsc.es}) - Formatting to multiApply #'3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Formatting to multiApply #'@examples -#'datay <- 1:(2*3*4*5*6) -#'dim(datay) <- c(lat = 2, lon = 3, member = 4, time = 6, sdate = 5) -#'datax <- datay -#'res <- Regression(datay, datax, time_dim = 'time') -#'str(res) -#'datax[c(1,5,7,9:22)] <- NA -#'res <- Regression(datay, datax, time_dim = 'time', na.action = 2) -#'length(which(is.na(res$filtered))) -#'res <- Regression(datay, datax, time_dim = 'time', na.action = 0) -#'length(which(is.na(res$filtered))) -#'datay[1] <- NA -#'res <- Regression(datay, datax, time_dim = 'time') -#'res <- Regression(datay, datax, time_dim = 'time', na.action = 5) -#'exp <- 1 : 70 -#'obs <- rnorm(70) -#'dim(exp) <- c(time = 70) -#'dim(obs) <- c(time = 70) -#'reg <- Regression(exp, obs, time_dim = 'time') -#'reg <- Regression(exp, obs, time_dim = 'time', na.action = 10) +#'datay <- sampleData$mod +#'datax <- sampleData$obs +#'datay <- Subset(datay, 'member', 2) +#'res1 <- Regression(datay, datax, formula = y~poly(x, 2, raw = T)) +#'res2 <- Regression(datay, datax, conf.lev = 0.9) #' #'@importFrom stats lm na.omit confint -#' +#'@import multiApply #'@export Regression <- function(datay, datax, time_dim = 'sdate', na.action = na.omit, formula = y ~ x, pval = T, conf = T, @@ -186,7 +194,6 @@ Regression <- function(datay, datax, time_dim = 'sdate', na.action = na.omit, # remove NAs for potential poly() x2 <- x[!is.na(NApos)] y2 <- y[!is.na(NApos)] - lm.out <- lm(formula, data = data.frame(x = x2, y = y2), na.action = na.action) coeff <- lm.out$coefficients if (conf) { diff --git a/man/Regression.Rd b/man/Regression.Rd index 02e0db4a..4c7fc165 100644 --- a/man/Regression.Rd +++ b/man/Regression.Rd @@ -39,15 +39,37 @@ computation. Default value is NULL.} } \value{ \item{$regression}{ - Array with same dimensions as datax and datay except along time_dim - dimension which is replaced by a length 4 dimension, corresponding - to the lower limit of the 95\% confidence interval, the slope, - the upper limit of the 95\% confidence interval and the intercept. - } - \item{$filtered}{ - Same dimensions as datay filtered out from the regression onto datax - along the time_dim dimension. - } + A numeric array with same dimensions as parameter 'datay' and 'datax' except + the 'time_dim' dimension, which is replaced by a 'stats' dimension containing + the regression coefficients from the lowest order (i.e., intercept) to + the highest degree. The length of the 'stats' dimension should be + \code{polydeg + 1}. +} +\item{$conf.lower}{ + A numeric array with same dimensions as parameter 'daty' and 'datax' except + the 'time_dim' dimension, which is replaced by a 'stats' dimension containing + the lower value of the \code{siglev}\% confidence interval for all + the regression coefficients with the same order as $regression. The length + of 'stats' dimension should be \code{polydeg + 1}. Only present if + \code{conf = TRUE}. +} +\item{$conf.upper}{ + A numeric array with same dimensions as parameter 'daty' and 'datax' except + the 'time_dim' dimension, which is replaced by a 'stats' dimension containing + the upper value of the \code{siglev}\% confidence interval for all + the regression coefficients with the same order as $regression. The length + of 'stats' dimension should be \code{polydeg + 1}. Only present if + \code{conf = TRUE}. +} +\item{$p.val}{ + A numeric array with same dimensions as parameter 'daty' and 'datax' except + the 'time_dim' dimension, the p-value. +} +\item{$filtered}{ + A numeric array with the same dimension as paramter 'datay' and 'datax', + the filtered datay from the regression onto datax along the 'time_dim' + dimension. +} } \description{ Compute the regression of the array 'datay' on the array 'datax' along the @@ -59,25 +81,11 @@ The p-value relies on the F distribution, and the confidence interval relies on the student-T distribution. } \examples{ -datay <- 1:(2*3*4*5*6) -dim(datay) <- c(lat = 2, lon = 3, member = 4, time = 6, sdate = 5) -datax <- datay -res <- Regression(datay, datax, time_dim = 'time') -str(res) -datax[c(1,5,7,9:22)] <- NA -res <- Regression(datay, datax, time_dim = 'time', na.action = 2) -length(which(is.na(res$filtered))) -res <- Regression(datay, datax, time_dim = 'time', na.action = 0) -length(which(is.na(res$filtered))) -datay[1] <- NA -res <- Regression(datay, datax, time_dim = 'time') -res <- Regression(datay, datax, time_dim = 'time', na.action = 5) -exp <- 1 : 70 -obs <- rnorm(70) -dim(exp) <- c(time = 70) -dim(obs) <- c(time = 70) -reg <- Regression(exp, obs, time_dim = 'time') -reg <- Regression(exp, obs, time_dim = 'time', na.action = 10) +datay <- sampleData$mod +datax <- sampleData$obs +datay <- Subset(datay, 'member', 2) +res1 <- Regression(datay, datax, formula = y~poly(x, 2, raw = T)) +res2 <- Regression(datay, datax, conf.lev = 0.9) } \author{ diff --git a/tests/testthat/test-Regression.R b/tests/testthat/test-Regression.R index 82af262e..7c39eb0e 100644 --- a/tests/testthat/test-Regression.R +++ b/tests/testthat/test-Regression.R @@ -131,7 +131,11 @@ test_that("2. Output checks: dat1", { c(Min. = 0.005335), tolerance = 0.0001 ) - + expect_equal( + summary(Regression(datay1, datax1, formula = y~poly(x, 2, raw = T))$p.val)[3], + c(Median = 0.22560), + tolerance = 0.0001 + ) }) ############################################## @@ -156,6 +160,11 @@ test_that("3. Output checks: dat2", { which(is.na(Regression(datay2, datax2, na.action = 1)$p.val)), c(3,15) ) + expect_equal( + which(is.na(Regression(datay2, datax2, na.action = 1, + formula = y~poly(x, 2, raw = T))$p.val)), + c(3,15) + ) }) ############################################## -- GitLab From 322b684f9407191708bf90f1cb6d2bc4588f2973 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 24 Jan 2020 17:48:03 +0100 Subject: [PATCH 14/16] Fix example in Regression() --- R/Regression.R | 2 ++ man/Regression.Rd | 2 ++ 2 files changed, 4 insertions(+) diff --git a/R/Regression.R b/R/Regression.R index 644e043e..094a3344 100644 --- a/R/Regression.R +++ b/R/Regression.R @@ -71,6 +71,8 @@ #'2.0 - 2019-10 (N. Perez-Zanon, \email{nuria.perez@bsc.es}) - Formatting to multiApply #'3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Formatting to multiApply #'@examples +#'# Load sample data as in Load() example: +#'example(Load) #'datay <- sampleData$mod #'datax <- sampleData$obs #'datay <- Subset(datay, 'member', 2) diff --git a/man/Regression.Rd b/man/Regression.Rd index 4c7fc165..cd818913 100644 --- a/man/Regression.Rd +++ b/man/Regression.Rd @@ -81,6 +81,8 @@ The p-value relies on the F distribution, and the confidence interval relies on the student-T distribution. } \examples{ +# Load sample data as in Load() example: +example(Load) datay <- sampleData$mod datax <- sampleData$obs datay <- Subset(datay, 'member', 2) -- GitLab From 274f6f60a242d63d716fd79f1c6243a13f094aa7 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 24 Jan 2020 18:01:13 +0100 Subject: [PATCH 15/16] Small fix in example --- R/Regression.R | 2 +- man/Regression.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/Regression.R b/R/Regression.R index 094a3344..748e1337 100644 --- a/R/Regression.R +++ b/R/Regression.R @@ -76,7 +76,7 @@ #'datay <- sampleData$mod #'datax <- sampleData$obs #'datay <- Subset(datay, 'member', 2) -#'res1 <- Regression(datay, datax, formula = y~poly(x, 2, raw = T)) +#'res1 <- Regression(datay, datax, formula = y~poly(x, 2, raw = TRUE)) #'res2 <- Regression(datay, datax, conf.lev = 0.9) #' #'@importFrom stats lm na.omit confint diff --git a/man/Regression.Rd b/man/Regression.Rd index cd818913..e0775c95 100644 --- a/man/Regression.Rd +++ b/man/Regression.Rd @@ -86,7 +86,7 @@ example(Load) datay <- sampleData$mod datax <- sampleData$obs datay <- Subset(datay, 'member', 2) -res1 <- Regression(datay, datax, formula = y~poly(x, 2, raw = T)) +res1 <- Regression(datay, datax, formula = y~poly(x, 2, raw = TRUE)) res2 <- Regression(datay, datax, conf.lev = 0.9) } -- GitLab From 10de07d7f31d44581ef5e254275767a510665f19 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 29 Jan 2020 16:21:33 +0100 Subject: [PATCH 16/16] Small fix in Regression(). Add one more unit test --- R/Regression.R | 70 ++++++++++++++++++-------------- man/Regression.Rd | 24 +++++------ tests/testthat/test-Regression.R | 28 +++++++++++++ 3 files changed, 80 insertions(+), 42 deletions(-) diff --git a/R/Regression.R b/R/Regression.R index 748e1337..2e736a05 100644 --- a/R/Regression.R +++ b/R/Regression.R @@ -1,4 +1,4 @@ -#'Compute the regression of an array on another along a dimension. +#'Compute the regression of an array on another along one dimension. #' #'Compute the regression of the array 'datay' on the array 'datax' along the #''time_dim' dimension by least square fitting (default) or self-defined model. @@ -13,12 +13,7 @@ #'@param datax An numeric array as predictor. The dimension should be identical #' as parameter 'datay'. #'@param time_dim A character string indicating the dimension along which to -#' compute the trend. -#'@param na.action A function or an integer. A function (e.g., na.omit, -#' na.exclude, na.fail, na.pass) indicates what should happen when the data -#' contain NAs. A numeric indicates the maximum number of NA position (it -#' counts as long as one of datay and datax is NA) allowed for compute -#' regression. Set na.omit as default. +#' compute the regression. #'@param formula An object of class "formula" (see function \code{link[stats]{lm}}). #'@param pval A logical value indicating whether to retrieve the p-value #' or not. The default value is TRUE. @@ -26,6 +21,11 @@ #' intervals or not. The default value is TRUE. #'@param conf.lev A numeric indicating the confidence level for the #' regression computation. The default value is 0.95. +#'@param na.action A function or an integer. A function (e.g., na.omit, +#' na.exclude, na.fail, na.pass) indicates what should happen when the data +#' contain NAs. A numeric indicates the maximum number of NA position (it +#' counts as long as one of datay and datax is NA) allowed for compute +#' regression. The default value is na.omit- #'@param ncores An integer indicating the number of cores to use for parallel #' computation. Default value is NULL. #' @@ -56,7 +56,7 @@ #'} #'\item{$p.val}{ #' A numeric array with same dimensions as parameter 'daty' and 'datax' except -#' the 'time_dim' dimension, the p-value. +#' the 'time_dim' dimension, The array contains the p-value. #'} #'\item{$filtered}{ #' A numeric array with the same dimension as paramter 'datay' and 'datax', @@ -69,7 +69,7 @@ #'0.1 - 2013-05 (V. Guemas, \email{virginie.guemas@ic3.cat}) - Original code\cr #'1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Formatting to CRAN #'2.0 - 2019-10 (N. Perez-Zanon, \email{nuria.perez@bsc.es}) - Formatting to multiApply -#'3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Formatting to multiApply +#'3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Adapt multiApply feature #'@examples #'# Load sample data as in Load() example: #'example(Load) @@ -82,9 +82,9 @@ #'@importFrom stats lm na.omit confint #'@import multiApply #'@export -Regression <- function(datay, datax, time_dim = 'sdate', na.action = na.omit, - formula = y ~ x, pval = T, conf = T, - conf.lev = 0.95, ncores = NULL) { +Regression <- function(datay, datax, time_dim = 'sdate', formula = y ~ x, + pval = TRUE, conf = TRUE, conf.lev = 0.95, + na.action = na.omit, ncores = NULL) { # Check inputs ## datay and datax @@ -117,19 +117,6 @@ Regression <- function(datay, datax, time_dim = 'sdate', na.action = na.omit, if (!time_dim %in% names(dim(datay)) | !time_dim %in% names(dim(datax))) { stop("Parameter 'time_dim' is not found in 'datay' or 'datax' dimension.") } - ## na.action - if (!is.function(na.action) & !is.numeric(na.action)) { - stop(paste0("Parameter 'na.action' must be a function for NA values or ", - "a numeric indicating the number of NA values allowed ", - "before returning NA.")) - } - if (is.numeric(na.action)) { - if (any(na.action %% 1 != 0) | any(na.action < 0) | length(na.action) > 1) { - stop(paste0("Parameter 'na.action' must be a function for NA values or ", - "a numeric indicating the number of NA values allowed ", - "before returning NA.")) - } - } ## formula if (class(formula) != 'formula') { stop("Parameter 'formula' must the an object of class 'formula'.") @@ -146,6 +133,19 @@ Regression <- function(datay, datax, time_dim = 'sdate', na.action = na.omit, if (!is.numeric(conf.lev) | conf.lev < 0 | conf.lev > 1 | length(conf.lev) > 1) { stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.") } + ## na.action + if (!is.function(na.action) & !is.numeric(na.action)) { + stop(paste0("Parameter 'na.action' must be a function for NA values or ", + "a numeric indicating the number of NA values allowed ", + "before returning NA.")) + } + if (is.numeric(na.action)) { + if (any(na.action %% 1 != 0) | any(na.action < 0) | length(na.action) > 1) { + stop(paste0("Parameter 'na.action' must be a function for NA values or ", + "a numeric indicating the number of NA values allowed ", + "before returning NA.")) + } + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | @@ -154,6 +154,14 @@ Regression <- function(datay, datax, time_dim = 'sdate', na.action = na.omit, } } + ############################### + # Sort dimension + name_datay <- names(dim(datay)) + name_datax <- names(dim(datax)) + order_datax <- match(name_datay, name_datax) + datax <- s2dverification:::.aperm2(datax, order_datax) + + ############################### # Calculate Regression if (conf & pval) { @@ -172,16 +180,18 @@ Regression <- function(datay, datax, time_dim = 'sdate', na.action = na.omit, res <- Apply(list(datay, datax), target_dims = time_dim, output_dims = output_dims, - fun = .Regression, conf.lev = conf.lev, - na.action = na.action, formula = formula, - conf = conf, pval = pval, ncores = ncores) + fun = .Regression, + formula = formula, pval = pval, conf = conf, + conf.lev = conf.lev, na.action = na.action, + ncores = ncores) return(invisible(res)) } -.Regression <- function(y, x, na.action = na.omit, formula = y ~ x, conf.lev = 0.95, - conf = conf, pval = pval) { +.Regression <- function(y, x, formula = y~x, pval = TRUE, conf = TRUE, + conf.lev = 0.95, na.action = na.omit) { + NApos <- 1:length(x) NApos[which(is.na(x) | is.na(y))] <- NA filtered <- rep(NA, length(x)) diff --git a/man/Regression.Rd b/man/Regression.Rd index e0775c95..f6a28a25 100644 --- a/man/Regression.Rd +++ b/man/Regression.Rd @@ -2,10 +2,10 @@ % Please edit documentation in R/Regression.R \name{Regression} \alias{Regression} -\title{Compute the regression of an array on another along a dimension.} +\title{Compute the regression of an array on another along one dimension.} \usage{ -Regression(datay, datax, time_dim = "sdate", na.action = na.omit, - formula = y ~ x, pval = T, conf = T, conf.lev = 0.95, ncores = NULL) +Regression(datay, datax, time_dim = "sdate", formula = y ~ x, pval = TRUE, + conf = TRUE, conf.lev = 0.95, na.action = na.omit, ncores = NULL) } \arguments{ \item{datay}{An numeric array as predictand including the dimension along @@ -15,13 +15,7 @@ which the regression is computed.} as parameter 'datay'.} \item{time_dim}{A character string indicating the dimension along which to -compute the trend.} - -\item{na.action}{A function or an integer. A function (e.g., na.omit, -na.exclude, na.fail, na.pass) indicates what should happen when the data -contain NAs. A numeric indicates the maximum number of NA position (it -counts as long as one of datay and datax is NA) allowed for compute -regression. Set na.omit as default.} +compute the regression.} \item{formula}{An object of class "formula" (see function \code{link[stats]{lm}}).} @@ -34,6 +28,12 @@ intervals or not. The default value is TRUE.} \item{conf.lev}{A numeric indicating the confidence level for the regression computation. The default value is 0.95.} +\item{na.action}{A function or an integer. A function (e.g., na.omit, +na.exclude, na.fail, na.pass) indicates what should happen when the data +contain NAs. A numeric indicates the maximum number of NA position (it +counts as long as one of datay and datax is NA) allowed for compute +regression. The default value is na.omit-} + \item{ncores}{An integer indicating the number of cores to use for parallel computation. Default value is NULL.} } @@ -63,7 +63,7 @@ computation. Default value is NULL.} } \item{$p.val}{ A numeric array with same dimensions as parameter 'daty' and 'datax' except - the 'time_dim' dimension, the p-value. + the 'time_dim' dimension, The array contains the p-value. } \item{$filtered}{ A numeric array with the same dimension as paramter 'datay' and 'datax', @@ -95,7 +95,7 @@ History:\cr 0.1 - 2013-05 (V. Guemas, \email{virginie.guemas@ic3.cat}) - Original code\cr 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Formatting to CRAN 2.0 - 2019-10 (N. Perez-Zanon, \email{nuria.perez@bsc.es}) - Formatting to multiApply -3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Formatting to multiApply +3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Adapt multiApply feature } \keyword{datagen} diff --git a/tests/testthat/test-Regression.R b/tests/testthat/test-Regression.R index 7c39eb0e..c966fe92 100644 --- a/tests/testthat/test-Regression.R +++ b/tests/testthat/test-Regression.R @@ -18,6 +18,15 @@ context("s2dverification::Regression tests") na <- floor(runif(5, min = 1, max = 80)) datax2[na] <- NA + # dat3 + set.seed(1) + datay3 <- array(c(-39:40) + rnorm(80), + dim = c(date = 5, ftime = 2, lon = 2, lat = 4)) + set.seed(2) + datax3 <- array(c(1:80) + rnorm(80), + dim = c(date = 5, lon = 2, lat = 4,ftime = 2)) + + ############################################## test_that("1. Input checks", { @@ -168,4 +177,23 @@ test_that("3. Output checks: dat2", { }) ############################################## +test_that("4. Output checks: dat3", { + + expect_equal( + dim(Regression(datay3, datax3, time_dim = 'date')$regression), + c(stats = 2, ftime = 2, lon = 2, lat = 4) + ) + expect_equal( + dim(Regression(datay3, datax3, time_dim = 'date')$conf.lower), + c(stats = 2, ftime = 2, lon = 2, lat = 4) + ) + expect_equal( + dim(Regression(datay3, datax3, time_dim = 'date')$p.val), + c(ftime = 2, lon = 2, lat = 4) + ) + + +}) + +############################################## -- GitLab