diff --git a/R/Eno.R b/R/Eno.R index 21899e737c5c0e7cf0aaaac07d789fc34b0b01f8..40ca54a372c98370ab38f82e335a3d7a59e17072 100644 --- a/R/Eno.R +++ b/R/Eno.R @@ -1,157 +1,108 @@ -#'Computes Effective Sample Size With Classical Method +#'Compute effective sample size with classical method #' -#'Computes the effective number of independent values along the posdim -#'dimension of a matrix.\cr -#'This effective number of independent observations can be used in -#'statistical/inference tests.\cr -#'Based on eno function from Caio Coelho from rclim.txt. +#'Compute the number of effective samples along one dimension of an array. This +#'effective number of independent observations can be used in statistical/inference +#'tests.\cr +#'The calculation is based on eno function from Caio Coelho from rclim.txt. #' -#'@param obs A numeric n-dimensional array with or without named dimensions. -#'@param posdim A character or an integer indicating the dimension along -#' which to compute the effective sample size. -#'@param na.action Function to be called to handle missing values, can be -#' na.pass or na.fail. Default: na.pass -#'@param ncores The number of cores to use for parallel computation. Default: NULL +#'@param data A numeric array with named dimensions. +#'@param time_dim A function indicating the dimension along which to compute +#' the effective sample size. The default value is 'sdate'. +#'@param na.action A function. It can be na.pass (missing values are allowed) +#' or na.fail (no missing values are allowed). See details in stats::acf(). +#' The default value is na.pass. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. #' -#'@return An array with the same dimensions as obs except the posdim dimension. +#'@return An array with the first dimension 'stats', which is the number of +#' effective samples, and the following dimensions are same as parameter 'data' +#' except the time_dim dimension, which is removed after the computation. #' #'@keywords datagen #'@author History:\cr -#'0.1 - 2011-05 (V. Guemas, \email{virginie.guemas at ic3.cat}) - Original code\cr -#'1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at ic3.cat}) - Formatting to R CRAN -#'3.0 - 2019-08 (A. Ho, \email{an.ho@bsc.es}) - Adopt multiApply +#'0.1 - 2011-05 (V. Guemas, \email{virginie.guemas@ic3.cat}) - Original code\cr +#'1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Formatting to R CRAN +#'3.0 - 2019-12 (A. Ho, \email{an.ho@bsc.es}) - Adopt multiApply feature #' #'@examples -#'# See examples on Load() to understand the first lines in this example -#' \dontrun{ -#'data_path <- system.file('sample_data', package = 's2dverification') -#'exp <- list( -#' name = 'experiment', -#' path = file.path(data_path, 'model/$EXP_NAME$/monthly_mean', -#' '$VAR_NAME$_3hourly/$VAR_NAME$_$START_DATES$.nc') -#' ) -#'obs <- list( -#' name = 'observation', -#' path = file.path(data_path, 'observation/$OBS_NAME$/monthly_mean', -#' '$VAR_NAME$/$VAR_NAME$_$YEAR$$MONTH$.nc') -#' ) -#'# Now we are ready to use Load(). -#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -#'sampleData <- Load('tos', list(exp), list(obs), startDates, -#' leadtimemin = 1, leadtimemax = 4, output = 'lonlat', -#' latmin = 27, latmax = 48, lonmin = -12, lonmax = 40) -#' } -#' \dontshow{ -#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -#'sampleData <- s2dverification:::.LoadSampleData('tos', c('experiment'), -#' c('observation'), startDates, -#' output = 'lonlat', -#' latmin = 27, latmax = 48, -#' lonmin = -12, lonmax = 40) -#' } -#'sampleData$mod <- Season(sampleData$mod, 4, 11, 1, 12) -#'eno <- Eno(sampleData$mod[1, 1, , 1, , ], 1) -#'PlotEquiMap(eno, sampleData$lon, sampleData$lat) +#'set.seed(1) +#'data <- array(rnorm(800), dim = c(dataset = 1, member = 2, sdate = 4, +#' ftime = 4, lat = 10, lon = 10)) +#'na <- floor(runif(40, min = 1, max = 800)) +#'data[na] <- NA +#'res <- Eno(data) #' #'@importFrom stats acf na.pass na.fail #'@import multiApply #'@export -Eno <- function(obs, posdim, na.action = na.pass, ncores = NULL) { +Eno <- function(data, time_dim = 'sdate', na.action = na.pass, ncores = NULL) { - #Check params: - - if (is.null(obs)) { - stop("Parameter 'obs' cannot be NULL.") + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") } - if (!is.numeric(obs)) { - stop("Parameter 'obs' must be a numeric array.") + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") } - - if (is.null(posdim)) { - stop("Parameter 'posdim' cannot be NULL.") + if (is.null(dim(data))) { #is vector + dim(data) <- c(length(data)) + names(dim(data)) <- time_dim } - - if (!is.numeric(posdim) && !is.character(posdim)) { - stop("Parameter 'posdim' must be an positive integer or character.") + if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") } - - if (is.numeric(posdim)) { - if (posdim <= 0 | posdim %% 1 != 0) { - stop("Parameter 'posdim' must be a positive integer or character.") - } else if (posdim > length(dim(obs))) { - stop("Parameter 'posdim' excesses the dimension length of parameter 'obs'.") - } else { #posdim is okay for being integer. But change it to character. - if (!is.null(names(dim(obs)))) { - posdim <- names(dim(obs))[posdim] - } else { - names(dim(obs)) <- paste0("D", 1:length(dim(obs))) - posdim <- names(dim(obs))[posdim] - } - } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") } - - if (is.character(posdim)) { - if (is.null(names(dim(obs)))) { - stop(paste0("Parameter 'obs' doesn't have dimension names. ", - "Add names or specify parameter 'posdim' by number.")) - } else if (!any(posdim %in% names(dim(obs)))) { - stop(paste0("Parameter 'posdim' does not match any dimension name of ", - "parameter 'obs'.")) - } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") } - - if (as.character(substitute(na.action)) != c("na.pass") && + ## na.action + if (as.character(substitute(na.action)) != c("na.pass") & as.character(substitute(na.action)) != c("na.fail")) { - stop("Parameter 'na.action' must be either 'na.pass' or 'na.fail'.") + stop("Parameter 'na.action' must be a function either na.pass or na.fail.") } - - if(as.character(substitute(na.action))== c("na.fail") && any(is.na(obs))) { - stop(paste0("NA value in paratemter 'obs', which is not accepted when ", - "parameter 'na.action' = 'na.fail'.")) + if(as.character(substitute(na.action))== c("na.fail") && any(is.na(data))) { + stop(paste0("Calculation fails because NA is found in paratemter 'data', ", + "which is not accepted when ", + "parameter 'na.action' = na.fail.")) } - + ## ncores if (!is.null(ncores)) { - if (!is.numeric(ncores)) { - stop("Parameter 'ncores' must be a positive integer.") - } else if (ncores <= 0 | ncores %% 1 != 0) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | + length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } } - #Calculation: + ############################### + # Calculate Eno - effnumobs <- Apply(data = list(obs), - target_dims = posdim, - fun = .Eno, - na.action = na.action, - ncores = ncores)$output1 + eno <- Apply(data = list(data), + target_dims = time_dim, + output_dims = 'stats', + fun = .Eno, + na.action = na.action, + ncores = ncores)$output1 - return(effnumobs) + return(eno) } .Eno <- function(x, na.action) { - # Checks: - if (!is.numeric(x)) { - stop("Parameter 'x' must be a numeric vector.") - } - - if (length(sort(x)) > 1) { - n <- length(sort(x)) -# if (n == 0) { -# n <- 1 -# } + n <- length(sort(x)) + if (n > 1) { a <- acf(x, lag.max = n - 1, plot = FALSE, na.action = na.action)$acf[2:n, 1, 1] s <- 0 for (k in 1:(n - 1)) { s <- s + (((n - k) / n) * a[k]) } - eno <- min(n / (1 + (2 * s)), n) - } else { eno <- NA } - eno + return(as.array(eno)) } diff --git a/man/Eno.Rd b/man/Eno.Rd index b2a3123d81cd8727f783698b6c6b0f2d5e9dac66..e81832edd0454605ef014a0fc33131cd7cea6431 100644 --- a/man/Eno.Rd +++ b/man/Eno.Rd @@ -2,69 +2,49 @@ % Please edit documentation in R/Eno.R \name{Eno} \alias{Eno} -\title{Computes Effective Sample Size With Classical Method} +\title{Compute effective sample size with classical method} \usage{ -Eno(obs, posdim, na.action = na.pass, ncores = NULL) +Eno(data, time_dim = "sdate", na.action = na.pass, ncores = NULL) } \arguments{ -\item{obs}{A numeric n-dimensional array with or without named dimensions.} +\item{data}{A numeric array with named dimensions.} -\item{posdim}{A character or an integer indicating the dimension along -which to compute the effective sample size.} +\item{time_dim}{A function indicating the dimension along which to compute +the effective sample size. The default value is 'sdate'.} -\item{na.action}{Function to be called to handle missing values, can be -na.pass or na.fail. Default: na.pass} +\item{na.action}{A function. It can be na.pass (missing values are allowed) +or na.fail (no missing values are allowed). See details in stats::acf(). +The default value is na.pass.} -\item{ncores}{The number of cores to use for parallel computation. Default: NULL} +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} } \value{ -An array with the same dimensions as obs except the posdim dimension. +An array with the first dimension 'stats', which is the number of + effective samples, and the following dimensions are same as parameter 'data' + except the time_dim dimension, which is removed after the computation. } \description{ -Computes the effective number of independent values along the posdim -dimension of a matrix.\cr -This effective number of independent observations can be used in -statistical/inference tests.\cr -Based on eno function from Caio Coelho from rclim.txt. +Compute the number of effective samples along one dimension of an array. This +effective number of independent observations can be used in statistical/inference +tests.\cr +The calculation is based on eno function from Caio Coelho from rclim.txt. } \examples{ -# See examples on Load() to understand the first lines in this example - \dontrun{ -data_path <- system.file('sample_data', package = 's2dverification') -exp <- list( - name = 'experiment', - path = file.path(data_path, 'model/$EXP_NAME$/monthly_mean', - '$VAR_NAME$_3hourly/$VAR_NAME$_$START_DATES$.nc') - ) -obs <- list( - name = 'observation', - path = file.path(data_path, 'observation/$OBS_NAME$/monthly_mean', - '$VAR_NAME$/$VAR_NAME$_$YEAR$$MONTH$.nc') - ) -# Now we are ready to use Load(). -startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -sampleData <- Load('tos', list(exp), list(obs), startDates, - leadtimemin = 1, leadtimemax = 4, output = 'lonlat', - latmin = 27, latmax = 48, lonmin = -12, lonmax = 40) - } - \dontshow{ -startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -sampleData <- s2dverification:::.LoadSampleData('tos', c('experiment'), - c('observation'), startDates, - output = 'lonlat', - latmin = 27, latmax = 48, - lonmin = -12, lonmax = 40) - } -sampleData$mod <- Season(sampleData$mod, 4, 11, 1, 12) -eno <- Eno(sampleData$mod[1, 1, , 1, , ], 1) -PlotEquiMap(eno, sampleData$lon, sampleData$lat) +set.seed(1) +data <- array(rnorm(800), dim = c(dataset = 1, member = 2, sdate = 4, + ftime = 4, lat = 10, lon = 10)) +na <- floor(runif(40, min = 1, max = 800)) +data[na] <- NA +res <- Eno(data) } \author{ History:\cr -0.1 - 2011-05 (V. Guemas, \email{virginie.guemas at ic3.cat}) - Original code\cr -1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens at ic3.cat}) - Formatting to R CRAN -3.0 - 2019-08 (A. Ho, \email{an.ho@bsc.es}) - Adopt multiApply +0.1 - 2011-05 (V. Guemas, \email{virginie.guemas@ic3.cat}) - Original code\cr +1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Formatting to R CRAN +3.0 - 2019-12 (A. Ho, \email{an.ho@bsc.es}) - Adopt multiApply feature + } \keyword{datagen} diff --git a/tests/testthat/test-Eno.R b/tests/testthat/test-Eno.R index e8c7e6cd30605b4c602e9f3ba5ed67ce09b6fe6f..5c11dee97ca575b6eea472abab2b4974f4a0819e 100644 --- a/tests/testthat/test-Eno.R +++ b/tests/testthat/test-Eno.R @@ -1,102 +1,92 @@ -context("Generic tests") -test_that("Sanity checks", { +context("s2dverification::Eno tests") -#error - expect_error( - Eno(NULL), - "Parameter 'obs' cannot be NULL." - ) +############################################## + set.seed(1) + dat1 <- array(rnorm(800), dim = c(dataset = 1, member = 2, sdate = 4, + ftime = 4, lat = 10, lon = 10)) + set.seed(1) + na <- floor(runif(40, min = 1, max = 800)) + dat1[na] <- NA - expect_error( - Eno(c('a', 'b'), posdim = 1), - "Parameter 'obs' must be a numeric array." - ) - expect_error( - Eno(1:10, c()), - "Parameter 'posdim' cannot be NULL." - ) + dat2 <- array(c(-5, -7, -10:10, 12, 11, 7, 16), + dim = c(date = 13, ftime = 2)) - expect_error( - Eno(1:10, posdim = FALSE), - "Parameter 'posdim' must be an positive integer or character." - ) +############################################## +test_that("1. Input checks", { expect_error( - Eno(1:10, posdim = -1), - "Parameter 'posdim' must be a positive integer or character." + Eno(c()), + "Parameter 'data' cannot be NULL." ) - expect_error( - Eno(1:10, posdim = 1.2), - "Parameter 'posdim' must be a positive integer or character." + Eno(data = 'a'), + "Parameter 'data' must be a numeric array." ) - expect_error( - Eno(array(1:10, dim = c(2, 5)), 3), - "Parameter 'posdim' excesses the dimension length of parameter 'obs'." + Eno(data = array(1:10, dim = c(2,5))), + "Parameter 'data' must have dimension names." ) - - dat <- array(1:10, dim = c(2, 5)) expect_error( - Eno(dat, 'a'), - paste0("Parameter 'obs' doesn't have dimension names. ", - "Add names or specify parameter 'posdim' by number.") + Eno(data = 1:10, time_dim = 12), + "Parameter 'time_dim' must be a character string." ) - - names(dim(dat)) <- c('a', 'b') expect_error( - Eno(dat, 'c'), - paste0("Parameter 'posdim' does not match any dimension name of ", - "parameter 'obs'.") + Eno(data = array(1:10, dim = c(a = 2, b = 5))), + "Parameter 'time_dim' is not found in 'data' dimension." ) - expect_error( - Eno(dat, 'a', na.action = na.stop), - "Parameter 'na.action' must be either 'na.pass' or 'na.fail'." + Eno(data = array(1:10, dim = c(a = 2, sdate = 5)), na.action = na.rm), + "Parameter 'na.action' must be a function either na.pass or na.fail." ) - - dat[1] <- NA expect_error( - Eno(dat, 'a', na.action = na.fail), - paste0("NA value in paratemter 'obs', which is not accepted when ", - "parameter 'na.action' = 'na.fail'.") + Eno(data = c(NA,1:19), na.action = na.fail), + paste0("Calculation fails because NA is found in paratemter 'data', ", + "which is not accepted when ", + "parameter 'na.action' = na.fail.") ) - expect_error( - Eno(dat, 'a', ncores = FALSE), - "Parameter 'ncores' must be a positive integer." + Eno(data = array(1:10, dim = c(a = 2, sdate = 5)), ncores = 0.5), + "Parameter 'ncores' must be a positive integer." ) - expect_error( - Eno(dat, 'a', ncores = 1.5), - "Parameter 'ncores' must be a positive integer." - ) +}) -#equal - dat <- array(1:10, dim = c(2, 5)) +############################################## +test_that("2. Output checks: dat1", { + res <- Eno(dat1) expect_equal( - names(dim(Eno(dat, 1))), - c('D2') + dim(res), + dim(array(dim = c(stats = 1, dataset = 1, member = 2, ftime = 4, lat = 10, lon = 10))) ) - names(dim(dat)) <- c('a', 'b') expect_equal( - Eno(dat, 1), - array(rep(2, 5), dim = c(b = 5)) + length(res[which(is.na(res))]), + 1 ) - dat[1] <- NA expect_equal( - Eno(dat, 2)[1], - 4 + length(res[which(res != 4)]), + 37 ) expect_equal( - Eno(dat, 1)[1], - as.numeric(NA) + mean(res, na.rm = T), + 2.768103, + tolerance = 0.0001 ) }) +############################################## + +test_that("3. Output checks: dat2", { + + expect_equal( + Eno(dat2, time_dim = 'date'), + array(c(6.237689, 5.683186), dim = c(stats = 1, ftime = 2)), + tolerance = 0.0001 + ) + +})