diff --git a/DESCRIPTION b/DESCRIPTION index 0045265cfeeefd668ffd0fc634ae9e42cac29915..3be1e88212fc86e7127eeb5dd96611d9a8328f7d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -43,6 +43,7 @@ Depends: Imports: abind, bigmemory, + multiApply, GEOmap, geomapdata, mapproj, @@ -50,7 +51,8 @@ Imports: ncdf4, parallel, plyr, - SpecsVerification (>= 0.5.0) + SpecsVerification (>= 0.5.0), + multiApply (>= 2.0.0) Suggests: easyVerification, testthat diff --git a/NAMESPACE b/NAMESPACE index 4e8557898444268933dfffceb4f5d7d3c8460183..00c59bfa8c0477bfe32e3cfa00002bf6501b2e13 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -90,6 +90,7 @@ import(graphics) import(mapproj) import(maps) import(methods) +import(multiApply) import(ncdf4) import(parallel) import(plyr) diff --git a/R/Regression.R b/R/Regression.R index 2afd14b81905d53b285bff1f9374286bc320342e..2e736a051c8c011a5e60cd03f37e279f6756d176 100644 --- a/R/Regression.R +++ b/R/Regression.R @@ -1,157 +1,249 @@ -#'Computes The Regression Of An Array On Another Along A Dimension +#'Compute the regression of an array on another along one dimension. #' -#'Computes the regression of the input matrice vary on the input matrice varx -#'along the posREG dimension by least square fitting. Provides the slope of -#'the regression, the associated confidence interval, and the intercept.\cr -#'Provides also the vary data filtered out from the regression onto varx.\cr -#'The confidence interval relies on a student-T distribution. +#'Compute the regression of the array 'datay' on the array 'datax' along the +#''time_dim' dimension by least square fitting (default) or self-defined model. +#'The function provides the slope of the regression, the intercept, and the +#'associated p-value and confidence interval. The filtered datay from the +#'regression onto datax is also provided.\cr +#'The p-value relies on the F distribution, and the confidence interval relies +#'on the student-T distribution. #' -#'@param vary Array of any number of dimensions up to 10. -#'@param varx Array of any number of dimensions up to 10. -#' Same dimensions as vary. -#'@param posREG Position along which to compute the regression. +#'@param datay An numeric array as predictand including the dimension along +#' which the regression is computed. +#'@param datax An numeric array as predictor. The dimension should be identical +#' as parameter 'datay'. +#'@param time_dim A character string indicating the dimension along which to +#' compute the regression. +#'@param formula An object of class "formula" (see function \code{link[stats]{lm}}). +#'@param pval A logical value indicating whether to retrieve the p-value +#' or not. The default value is TRUE. +#'@param conf A logical value indicating whether to retrieve the confidence +#' intervals or not. The default value is TRUE. +#'@param conf.lev A numeric indicating the confidence level for the +#' regression computation. The default value is 0.95. +#'@param na.action A function or an integer. A function (e.g., na.omit, +#' na.exclude, na.fail, na.pass) indicates what should happen when the data +#' contain NAs. A numeric indicates the maximum number of NA position (it +#' counts as long as one of datay and datax is NA) allowed for compute +#' regression. The default value is na.omit- +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. Default value is NULL. #' +#'@import multiApply #'@return -#' \item{$regression}{ -#' Array with same dimensions as varx and vary except along posREG -#' dimension which is replaced by a length 4 dimension, corresponding -#' to the lower limit of the 95\% confidence interval, the slope, -#' the upper limit of the 95\% confidence interval and the intercept. -#' } -#' \item{$filtered}{ -#' Same dimensions as vary filtered out from the regression onto varx -#' along the posREG dimension. -#' } +#'\item{$regression}{ +#' A numeric array with same dimensions as parameter 'datay' and 'datax' except +#' the 'time_dim' dimension, which is replaced by a 'stats' dimension containing +#' the regression coefficients from the lowest order (i.e., intercept) to +#' the highest degree. The length of the 'stats' dimension should be +#' \code{polydeg + 1}. +#'} +#'\item{$conf.lower}{ +#' A numeric array with same dimensions as parameter 'daty' and 'datax' except +#' the 'time_dim' dimension, which is replaced by a 'stats' dimension containing +#' the lower value of the \code{siglev}\% confidence interval for all +#' the regression coefficients with the same order as $regression. The length +#' of 'stats' dimension should be \code{polydeg + 1}. Only present if +#' \code{conf = TRUE}. +#'} +#'\item{$conf.upper}{ +#' A numeric array with same dimensions as parameter 'daty' and 'datax' except +#' the 'time_dim' dimension, which is replaced by a 'stats' dimension containing +#' the upper value of the \code{siglev}\% confidence interval for all +#' the regression coefficients with the same order as $regression. The length +#' of 'stats' dimension should be \code{polydeg + 1}. Only present if +#' \code{conf = TRUE}. +#'} +#'\item{$p.val}{ +#' A numeric array with same dimensions as parameter 'daty' and 'datax' except +#' the 'time_dim' dimension, The array contains the p-value. +#'} +#'\item{$filtered}{ +#' A numeric array with the same dimension as paramter 'datay' and 'datax', +#' the filtered datay from the regression onto datax along the 'time_dim' +#' dimension. +#'} #' #'@keywords datagen #'@author History:\cr #'0.1 - 2013-05 (V. Guemas, \email{virginie.guemas@ic3.cat}) - Original code\cr #'1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Formatting to CRAN +#'2.0 - 2019-10 (N. Perez-Zanon, \email{nuria.perez@bsc.es}) - Formatting to multiApply +#'3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Adapt multiApply feature #'@examples -#'# See examples on Load() to understand the first lines in this example -#' \dontrun{ -#'data_path <- system.file('sample_data', package = 's2dverification') -#'expA <- list(name = 'experiment', path = file.path(data_path, -#' 'model/$EXP_NAME$/$STORE_FREQ$_mean/$VAR_NAME$_3hourly', -#' '$VAR_NAME$_$START_DATE$.nc')) -#'obsX <- list(name = 'observation', path = file.path(data_path, -#' '$OBS_NAME$/$STORE_FREQ$_mean/$VAR_NAME$', -#' '$VAR_NAME$_$YEAR$$MONTH$.nc')) -#' -#'# Now we are ready to use Load(). -#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -#'sampleData <- Load('tos', list(expA), list(obsX), startDates, -#' output = 'lonlat', latmin = 27, latmax = 48, -#' lonmin = -12, lonmax = 40) -#' } -#' \dontshow{ -#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -#'sampleData <- s2dverification:::.LoadSampleData('tos', c('experiment'), -#' c('observation'), startDates, -#' output = 'lonlat', -#' latmin = 27, latmax = 48, -#' lonmin = -12, lonmax = 40) -#' } -#'sampleData$mod <- Season(sampleData$mod, 4, 11, 12, 2) -#'sampleData$obs <- Season(sampleData$obs, 4, 11, 12, 2) -#'reg <- Regression(Mean1Dim(sampleData$mod, 2), -#' Mean1Dim(sampleData$obs, 2), 2) -#'PlotEquiMap(reg$regression[1, 2, 1, , ], sampleData$lon, sampleData$lat, -#' toptitle='Regression of the prediction on the observations', -#' sizetit = 0.5) +#'# Load sample data as in Load() example: +#'example(Load) +#'datay <- sampleData$mod +#'datax <- sampleData$obs +#'datay <- Subset(datay, 'member', 2) +#'res1 <- Regression(datay, datax, formula = y~poly(x, 2, raw = TRUE)) +#'res2 <- Regression(datay, datax, conf.lev = 0.9) #' #'@importFrom stats lm na.omit confint +#'@import multiApply #'@export -Regression <- function(vary, varx, posREG = 2) { - # - # Enlarge the size of varx and vary to 10 - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - dimsvar <- dim(vary) - if (is.null(dimsvar)) { - dimsvar <- length(vary) - } - if (length(dimsvar) == 1) { - if (length(varx) != dimsvar[1]) { - stop("varx and vary should have the same dimensions") +Regression <- function(datay, datax, time_dim = 'sdate', formula = y ~ x, + pval = TRUE, conf = TRUE, conf.lev = 0.95, + na.action = na.omit, ncores = NULL) { + + # Check inputs + ## datay and datax + if (is.null(datay) | is.null(datax)) { + stop("Parameter 'datay' and 'datax' cannot be NULL.") + } + if (!is.numeric(datay) | !is.numeric(datax)) { + stop("Parameter 'datay' and 'datax' must be a numeric array.") + } + if (is.null(dim(datay)) | is.null(dim(datax))) { + stop("Parameter 'datay' and 'datax' must be at least one dimension 'time_dim'.") + } + if(any(is.null(names(dim(datay))))| any(nchar(names(dim(datay))) == 0) | + any(is.null(names(dim(datax))))| any(nchar(names(dim(datax))) == 0)) { + stop("Parameter 'datay' and 'datax' must have dimension names.") + } + if(!all(names(dim(datay)) %in% names(dim(datax))) | + !all(names(dim(datax)) %in% names(dim(datay)))) { + stop("Parameter 'datay' and 'datax' must have same dimension name") + } + name_datay <- sort(names(dim(datay))) + name_datax <- sort(names(dim(datax))) + if(!all(dim(datay)[name_datay] == dim(datax)[name_datax])) { + stop("Parameter 'datay' and 'datax' must have same length of all dimensions.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(datay)) | !time_dim %in% names(dim(datax))) { + stop("Parameter 'time_dim' is not found in 'datay' or 'datax' dimension.") + } + ## formula + if (class(formula) != 'formula') { + stop("Parameter 'formula' must the an object of class 'formula'.") + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## conf + if (!is.logical(conf) | length(conf) > 1) { + stop("Parameter 'conf' must be one logical value.") + } + ##conf.lev + if (!is.numeric(conf.lev) | conf.lev < 0 | conf.lev > 1 | length(conf.lev) > 1) { + stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.") + } + ## na.action + if (!is.function(na.action) & !is.numeric(na.action)) { + stop(paste0("Parameter 'na.action' must be a function for NA values or ", + "a numeric indicating the number of NA values allowed ", + "before returning NA.")) + } + if (is.numeric(na.action)) { + if (any(na.action %% 1 != 0) | any(na.action < 0) | length(na.action) > 1) { + stop(paste0("Parameter 'na.action' must be a function for NA values or ", + "a numeric indicating the number of NA values allowed ", + "before returning NA.")) } - } else { - for (jdim in 1:length(dimsvar)) { - if (dim(varx)[jdim] != dimsvar[jdim]) { - stop("varx and vary should have the same dimensions") - } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") } + } + + ############################### + # Sort dimension + name_datay <- names(dim(datay)) + name_datax <- names(dim(datax)) + order_datax <- match(name_datay, name_datax) + datax <- s2dverification:::.aperm2(datax, order_datax) + + + ############################### + # Calculate Regression + if (conf & pval) { + output_dims <- list(regression = 'stats', conf.lower = 'stats', + conf.upper = 'stats', p.val = NULL, filtered = time_dim) + } else if (conf & !pval) { + output_dims <- list(regression = 'stats', conf.lower = 'stats', + conf.upper = 'stats', filtered = time_dim) + } else if (!conf & pval) { + output_dims <- list(regression = 'stats', + p.val = NULL, filtered = time_dim) + } else if (!conf & !pval) { + output_dims <- list(regression = 'stats', filtered = time_dim) } - enlvarx <- Enlarge(varx, 10) - enlvary <- Enlarge(vary, 10) - outdim <- dim(enlvarx) - # - # Initialize intermediate and output matrices - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - posaperm <- 1:10 - posaperm[1] <- posREG - posaperm[posREG] <- 1 - enlvarx <- aperm(enlvarx, posaperm) - enlvary <- aperm(enlvary, posaperm) - dimsaperm <- outdim[posaperm] + res <- Apply(list(datay, datax), + target_dims = time_dim, + output_dims = output_dims, + fun = .Regression, + formula = formula, pval = pval, conf = conf, + conf.lev = conf.lev, na.action = na.action, + ncores = ncores) + + return(invisible(res)) +} + + +.Regression <- function(y, x, formula = y~x, pval = TRUE, conf = TRUE, + conf.lev = 0.95, na.action = na.omit) { - enlfilt <- array(dim = dimsaperm) - filtered <- array(dim = dimsvar) - dimsaperm[1] <- 4 - enlreg <- array(dim = dimsaperm) - dimsvar[posREG] <- 4 - regression <- array(dim = dimsvar) - # - # Loop on all dimensions to compute regressions - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # - for (j2 in 1:dimsaperm[2]){ - for (j3 in 1:dimsaperm[3]){ - for (j4 in 1:dimsaperm[4]){ - for (j5 in 1:dimsaperm[5]){ - for (j6 in 1:dimsaperm[6]){ - for (j7 in 1:dimsaperm[7]){ - for (j8 in 1:dimsaperm[8]){ - for (j9 in 1:dimsaperm[9]){ - for (j10 in 1:dimsaperm[10]){ - tmpy <- enlvary[, j2, j3, j4, j5, j6, j7, j8, j9, j10] - tmpx <- enlvarx[, j2, j3, j4, j5, j6, j7, j8, j9, j10] - if ((length(sort(tmpy)) > 0) & (length(sort(tmpx)) > 0)) { - lm.out <- lm(tmpy ~ tmpx, na.action = na.omit) - enlreg[1, j2, j3, j4, j5, j6, j7, j8, j9, - j10] <- confint(lm.out)[2, 1] - enlreg[2, j2, j3, j4, j5, j6, j7, j8, j9, - j10] <- lm.out$coefficients[2] - enlreg[3, j2, j3, j4, j5, j6, j7, j8, j9, - j10] <- confint(lm.out)[2, 2] - enlreg[4, j2, j3, j4, j5, j6, j7, j8, j9, - j10] <- lm.out$coefficients[1] - enlfilt[which(is.na(tmpx) == FALSE & is.na(tmpy) == FALSE), j2, - j3, j4, j5, j6, j7, j8, j9, j10] <- tmpy[which( - is.na(tmpx) == FALSE & is.na(tmpy - ) == FALSE)] - lm.out$fitted.values - } - } - } - } - } - } - } + NApos <- 1:length(x) + NApos[which(is.na(x) | is.na(y))] <- NA + filtered <- rep(NA, length(x)) + check_na <- FALSE + + if (is.numeric(na.action)) { + na_threshold <- na.action + na.action <- na.omit + check_na <- TRUE + } + + # remove NAs for potential poly() + x2 <- x[!is.na(NApos)] + y2 <- y[!is.na(NApos)] + lm.out <- lm(formula, data = data.frame(x = x2, y = y2), na.action = na.action) + coeff <- lm.out$coefficients + if (conf) { + conf.lower <- confint(lm.out, level = conf.lev)[, 1] + conf.upper <- confint(lm.out, level = conf.lev)[, 2] + } + if (pval) { + f <- summary(lm.out)$fstatistic + p.val <- pf(f[1], f[2], f[3],lower.tail = F) + } + filtered[!is.na(NApos)] <- y[!is.na(NApos)] - lm.out$fitted.values + + # Check if NA is too many + if (check_na) { + if (sum(is.na(NApos)) > na_threshold) { #turn everything into NA + coeff[which(!is.na(coeff))] <- NA + if (conf) { + conf.lower[which(!is.na(conf.lower))] <- NA + conf.upper[which(!is.na(conf.upper))] <- NA } + if (pval) { + p.val[which(!is.na(p.val))] <- NA + } + filtered[which(!is.na(filtered))] <- NA } } - # - # Back to the original dimensions - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - regression[] <- aperm(enlreg, posaperm) - filtered[] <- aperm(enlfilt, posaperm) + if (conf & pval) { + return(list(regression = coeff, conf.lower = conf.lower, conf.upper = conf.upper, + p.val = p.val, filtered = filtered)) + } else if (conf & !pval) { + return(list(regression = coeff, conf.lower = conf.lower, conf.upper = conf.upper, + filtered = filtered)) + } else if (!conf & pval) { + return(list(regression = coeff, + p.val = p.val, filtered = filtered)) + } else if (!conf & !pval) { + return(list(regression = coeff, filtered = filtered)) + } - # - # Outputs - # ~~~~~~~~~ - # - invisible(list(regression = regression, filtered = filtered)) } diff --git a/man/Regression.Rd b/man/Regression.Rd index 55646576e5b0c180225038f0f15230c6dfdd0243..f6a28a254f48efb2fc7da9080c81bcec4bece186 100644 --- a/man/Regression.Rd +++ b/man/Regression.Rd @@ -2,75 +2,100 @@ % Please edit documentation in R/Regression.R \name{Regression} \alias{Regression} -\title{Computes The Regression Of An Array On Another Along A Dimension} +\title{Compute the regression of an array on another along one dimension.} \usage{ -Regression(vary, varx, posREG = 2) +Regression(datay, datax, time_dim = "sdate", formula = y ~ x, pval = TRUE, + conf = TRUE, conf.lev = 0.95, na.action = na.omit, ncores = NULL) } \arguments{ -\item{vary}{Array of any number of dimensions up to 10.} +\item{datay}{An numeric array as predictand including the dimension along +which the regression is computed.} -\item{varx}{Array of any number of dimensions up to 10. -Same dimensions as vary.} +\item{datax}{An numeric array as predictor. The dimension should be identical +as parameter 'datay'.} -\item{posREG}{Position along which to compute the regression.} +\item{time_dim}{A character string indicating the dimension along which to +compute the regression.} + +\item{formula}{An object of class "formula" (see function \code{link[stats]{lm}}).} + +\item{pval}{A logical value indicating whether to retrieve the p-value +or not. The default value is TRUE.} + +\item{conf}{A logical value indicating whether to retrieve the confidence +intervals or not. The default value is TRUE.} + +\item{conf.lev}{A numeric indicating the confidence level for the +regression computation. The default value is 0.95.} + +\item{na.action}{A function or an integer. A function (e.g., na.omit, +na.exclude, na.fail, na.pass) indicates what should happen when the data +contain NAs. A numeric indicates the maximum number of NA position (it +counts as long as one of datay and datax is NA) allowed for compute +regression. The default value is na.omit-} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. Default value is NULL.} } \value{ \item{$regression}{ - Array with same dimensions as varx and vary except along posREG - dimension which is replaced by a length 4 dimension, corresponding - to the lower limit of the 95\% confidence interval, the slope, - the upper limit of the 95\% confidence interval and the intercept. - } - \item{$filtered}{ - Same dimensions as vary filtered out from the regression onto varx - along the posREG dimension. - } + A numeric array with same dimensions as parameter 'datay' and 'datax' except + the 'time_dim' dimension, which is replaced by a 'stats' dimension containing + the regression coefficients from the lowest order (i.e., intercept) to + the highest degree. The length of the 'stats' dimension should be + \code{polydeg + 1}. +} +\item{$conf.lower}{ + A numeric array with same dimensions as parameter 'daty' and 'datax' except + the 'time_dim' dimension, which is replaced by a 'stats' dimension containing + the lower value of the \code{siglev}\% confidence interval for all + the regression coefficients with the same order as $regression. The length + of 'stats' dimension should be \code{polydeg + 1}. Only present if + \code{conf = TRUE}. +} +\item{$conf.upper}{ + A numeric array with same dimensions as parameter 'daty' and 'datax' except + the 'time_dim' dimension, which is replaced by a 'stats' dimension containing + the upper value of the \code{siglev}\% confidence interval for all + the regression coefficients with the same order as $regression. The length + of 'stats' dimension should be \code{polydeg + 1}. Only present if + \code{conf = TRUE}. +} +\item{$p.val}{ + A numeric array with same dimensions as parameter 'daty' and 'datax' except + the 'time_dim' dimension, The array contains the p-value. +} +\item{$filtered}{ + A numeric array with the same dimension as paramter 'datay' and 'datax', + the filtered datay from the regression onto datax along the 'time_dim' + dimension. +} } \description{ -Computes the regression of the input matrice vary on the input matrice varx -along the posREG dimension by least square fitting. Provides the slope of -the regression, the associated confidence interval, and the intercept.\cr -Provides also the vary data filtered out from the regression onto varx.\cr -The confidence interval relies on a student-T distribution. +Compute the regression of the array 'datay' on the array 'datax' along the +'time_dim' dimension by least square fitting (default) or self-defined model. +The function provides the slope of the regression, the intercept, and the +associated p-value and confidence interval. The filtered datay from the +regression onto datax is also provided.\cr +The p-value relies on the F distribution, and the confidence interval relies +on the student-T distribution. } \examples{ -# See examples on Load() to understand the first lines in this example - \dontrun{ -data_path <- system.file('sample_data', package = 's2dverification') -expA <- list(name = 'experiment', path = file.path(data_path, - 'model/$EXP_NAME$/$STORE_FREQ$_mean/$VAR_NAME$_3hourly', - '$VAR_NAME$_$START_DATE$.nc')) -obsX <- list(name = 'observation', path = file.path(data_path, - '$OBS_NAME$/$STORE_FREQ$_mean/$VAR_NAME$', - '$VAR_NAME$_$YEAR$$MONTH$.nc')) - -# Now we are ready to use Load(). -startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -sampleData <- Load('tos', list(expA), list(obsX), startDates, - output = 'lonlat', latmin = 27, latmax = 48, - lonmin = -12, lonmax = 40) - } - \dontshow{ -startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -sampleData <- s2dverification:::.LoadSampleData('tos', c('experiment'), - c('observation'), startDates, - output = 'lonlat', - latmin = 27, latmax = 48, - lonmin = -12, lonmax = 40) - } -sampleData$mod <- Season(sampleData$mod, 4, 11, 12, 2) -sampleData$obs <- Season(sampleData$obs, 4, 11, 12, 2) -reg <- Regression(Mean1Dim(sampleData$mod, 2), - Mean1Dim(sampleData$obs, 2), 2) -PlotEquiMap(reg$regression[1, 2, 1, , ], sampleData$lon, sampleData$lat, - toptitle='Regression of the prediction on the observations', - sizetit = 0.5) +# Load sample data as in Load() example: +example(Load) +datay <- sampleData$mod +datax <- sampleData$obs +datay <- Subset(datay, 'member', 2) +res1 <- Regression(datay, datax, formula = y~poly(x, 2, raw = TRUE)) +res2 <- Regression(datay, datax, conf.lev = 0.9) } \author{ History:\cr 0.1 - 2013-05 (V. Guemas, \email{virginie.guemas@ic3.cat}) - Original code\cr 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Formatting to CRAN +2.0 - 2019-10 (N. Perez-Zanon, \email{nuria.perez@bsc.es}) - Formatting to multiApply +3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Adapt multiApply feature } \keyword{datagen} diff --git a/tests/testthat/test-Regression.R b/tests/testthat/test-Regression.R new file mode 100644 index 0000000000000000000000000000000000000000..c966fe929a2bc0eff9b96d368602e6f46fc8f050 --- /dev/null +++ b/tests/testthat/test-Regression.R @@ -0,0 +1,199 @@ +context("s2dverification::Regression tests") + +############################################## + # dat1 + set.seed(1) + datay1 <- array(c(-39:40) + rnorm(80), + dim = c(sdate = 5, ftime = 2, lon = 2, lat = 4)) + set.seed(2) + datax1 <- array(c(1:80) + rnorm(80), + dim = c(sdate = 5, ftime = 2, lon = 2, lat = 4)) + # dat2 + datay2 <- datay1 + set.seed(1) + na <- floor(runif(5, min = 1, max = 80)) + datay2[na] <- NA + datax2 <- datax1 + set.seed(2) + na <- floor(runif(5, min = 1, max = 80)) + datax2[na] <- NA + + # dat3 + set.seed(1) + datay3 <- array(c(-39:40) + rnorm(80), + dim = c(date = 5, ftime = 2, lon = 2, lat = 4)) + set.seed(2) + datax3 <- array(c(1:80) + rnorm(80), + dim = c(date = 5, lon = 2, lat = 4,ftime = 2)) + + +############################################## +test_that("1. Input checks", { + + expect_error( + Regression(c(), c()), + "Parameter 'datay' and 'datax' cannot be NULL." + ) + expect_error( + Regression(c('b'), c('a')), + "Parameter 'datay' and 'datax' must be a numeric array." + ) + expect_error( + Regression(c(1:10), c(2:4)), + "Parameter 'datay' and 'datax' must be at least one dimension 'time_dim'." + ) + expect_error( + Regression(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), + "Parameter 'datay' and 'datax' must have dimension names." + ) + expect_error( + Regression(array(1:10, dim = c(a = 2, c = 5)), array(1:4, dim = c(a = 2, b = 2))), + "Parameter 'datay' and 'datax' must have same dimension name" + ) + expect_error( + Regression(array(1:10, dim = c(a = 2, b = 5)), array(1:4, dim = c(a = 2, b = 2))), + "Parameter 'datay' and 'datax' must have same length of all dimensions." + ) + expect_error( + Regression(datay1, datax1, time_dim = 1), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + Regression(datay1, datax1, time_dim = 'asd'), + "Parameter 'time_dim' is not found in 'datay' or 'datax' dimension." + ) + expect_error( + Regression(datay1, datax1, na.action = TRUE), + paste0("Parameter 'na.action' must be a function for NA values or ", + "a numeric indicating the number of NA values allowed ", + "before returning NA.") + ) + expect_error( + Regression(datay1, datax1, na.action = c(1,2)), + paste0("Parameter 'na.action' must be a function for NA values or ", + "a numeric indicating the number of NA values allowed ", + "before returning NA.") + ) + expect_error( + Regression(datay1, datax1, formula =T), + "Parameter 'formula' must the an object of class 'formula'." + ) + expect_error( + Regression(datay1, datax1, pval = 0.05), + "Parameter 'pval' must be one logical value." + ) + expect_error( + Regression(datay1, datax1, conf = 0.05), + "Parameter 'conf' must be one logical value." + ) + expect_error( + Regression(datay1, datax1, conf.lev = 1.5), + "Parameter 'conf.lev' must be a numeric number between 0 and 1." + ) + expect_error( + Regression(datay1, datax1, ncores = T), + "Parameter 'ncores' must be a positive integer." + ) +}) + + +############################################## +test_that("2. Output checks: dat1", { + + expect_equal( + dim(Regression(datay1, datax1)$regression), + c(stats = 2, ftime = 2, lon = 2, lat = 4) + ) + expect_equal( + head(Regression(datay1, datax1)$regression), + c(-39.0091480, 0.7290814, -39.1853129, 0.8623175, -37.4342099, 0.7844530), + tolerance = 0.001 + ) + expect_equal( + length(which(is.na(Regression(datay1, datax1)$conf.low))), + 0 + ) + expect_equal( + max(Regression(datay1, datax1)$conf.upper, na.rm = T), + 127.4267, + tolerance = 0.001 + ) + expect_equal( + length(Regression(datay1, datax1, conf = F)), + 3 + ) + expect_equal( + length(Regression(datay1, datax1, pval = F)), + 4 + ) + expect_equal( + length(Regression(datay1, datax1, pval = F, conf = F)), + 2 + ) + expect_equal( + range(Regression(datay1, datax1, conf.lev = 0.99)$conf.low, na.rm = T), + c(-380.888744, 0.220794), + tolerance = 0.001 + ) + expect_equal( + summary(Regression(datay1, datax1)$p.val)[1], + c(Min. = 0.005335), + tolerance = 0.0001 + ) + expect_equal( + summary(Regression(datay1, datax1, formula = y~poly(x, 2, raw = T))$p.val)[3], + c(Median = 0.22560), + tolerance = 0.0001 + ) +}) + +############################################## +test_that("3. Output checks: dat2", { + expect_equal( + length(which(is.na(Regression(datay2, datax2, na.action = 0)$conf.lower))), + 14 + ) + expect_equal( + length(which(is.na(Regression(datay2, datax2, na.action = 2)$conf.lower))), + 0 + ) + expect_equal( + length(which(is.na(Regression(datay2, datax2, na.action = 1)$p.val))), + 2 + ) + expect_equal( + length(which(is.na(Regression(datay2, datax2, na.action = na.pass)$p.val))), + 0 + ) + expect_equal( + which(is.na(Regression(datay2, datax2, na.action = 1)$p.val)), + c(3,15) + ) + expect_equal( + which(is.na(Regression(datay2, datax2, na.action = 1, + formula = y~poly(x, 2, raw = T))$p.val)), + c(3,15) + ) +}) + +############################################## +test_that("4. Output checks: dat3", { + + expect_equal( + dim(Regression(datay3, datax3, time_dim = 'date')$regression), + c(stats = 2, ftime = 2, lon = 2, lat = 4) + ) + expect_equal( + dim(Regression(datay3, datax3, time_dim = 'date')$conf.lower), + c(stats = 2, ftime = 2, lon = 2, lat = 4) + ) + expect_equal( + dim(Regression(datay3, datax3, time_dim = 'date')$p.val), + c(ftime = 2, lon = 2, lat = 4) + ) + + +}) + +############################################## +