diff --git a/R/ACC.R b/R/ACC.R index bf3daa27a2bc4017bc1c7f444671fda7adc9c700..0f1040cec8741e1f482ca7e95b769ad91e898d61 100644 --- a/R/ACC.R +++ b/R/ACC.R @@ -283,8 +283,8 @@ ACC <- function(exp, obs, dat_dim = 'dataset', space_dim = c('lat', 'lon'), exp_ori <- exp obs_ori <- obs } - exp <- MeanDims(exp, memb_dim, na.rm = TRUE, ncores = ncores) - obs <- MeanDims(obs, memb_dim, na.rm = TRUE, ncores = ncores) + exp <- MeanDims(exp, memb_dim, na.rm = TRUE) + obs <- MeanDims(obs, memb_dim, na.rm = TRUE) } if (is.null(avg_dim)) { @@ -558,8 +558,8 @@ ACC <- function(exp, obs, dat_dim = 'dataset', space_dim = c('lat', 'lon'), dim = dim(obs)) # ensemble mean before .ACC - drawexp <- MeanDims(drawexp, memb_dim, na.rm = TRUE, ncores = ncores_input) - drawobs <- MeanDims(drawobs, memb_dim, na.rm = TRUE, ncores = ncores_input) + drawexp <- MeanDims(drawexp, memb_dim, na.rm = TRUE) + drawobs <- MeanDims(drawobs, memb_dim, na.rm = TRUE) # Reorder if (is.null(avg_dim)) { drawexp <- Reorder(drawexp, c(2, 3, 1)) diff --git a/R/Ano.R b/R/Ano.R index 13ee211c106514c910835ac5d33e2aab81ae47cd..e0a69db232230da28d8dde61fc1d92d07f123de5 100644 --- a/R/Ano.R +++ b/R/Ano.R @@ -13,8 +13,7 @@ #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' -#'@return An array with same dimensions as parameter 'data' but with different -#' dimension order. The dimensions in parameter 'clim' are ordered first. +#'@return An array with same dimensions as parameter 'data'. #' #'@examples #'# Load sample data as in Load() example: @@ -22,8 +21,6 @@ #'clim <- Clim(sampleData$mod, sampleData$obs) #'ano_exp <- Ano(sampleData$mod, clim$clim_exp) #'ano_obs <- Ano(sampleData$obs, clim$clim_obs) -#'ano_exp <- Reorder(ano_exp, c(1, 2, 4, 3)) -#'ano_obs <- Reorder(ano_obs, c(1, 2, 4, 3)) #'\donttest{ #'PlotAno(ano_exp, ano_obs, startDates, #' toptitle = 'Anomaly', ytitle = c('K', 'K', 'K'), @@ -62,38 +59,62 @@ if (any(is.null(names(dim(clim))))| any(nchar(names(dim(clim))) == 0)) { stop("Parameter 'clim' must have dimension names.") } - for (i in 1:length(dim(clim))) { - if (!(names(dim(clim))[i] %in% names(dim(data)))) { - stop("Parameter 'data' must have all the dimensions of parameter 'clim'.") - } else { - pos <- names(dim(data))[which(names(dim(clim))[i] == names(dim(data)))] - if ((dim(clim)[i] != dim(data)[pos])) { - stop("Some dimensions of parameter 'clim' have different length from parameter 'data'.") - } + ## data and clim + if (!all(names(dim(clim)) %in% names(dim(data)))) { + stop("Parameter 'data' must have all the dimensions of parameter 'clim'.") + } else { + pos <- names(dim(data))[match(names(dim(clim)), names(dim(data)))] + if (any(dim(clim) != dim(data)[pos])) { + stop("Some dimensions of parameter 'clim' have different length from parameter 'data'.") } } ## ncores if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { + if (!is.numeric(ncores)) { + stop("Parameter 'ncores' must be a positive integer.") + } else if (ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } } ############################### # Calculate Ano + parallel_compute <- TRUE + if (is.null(ncores)) { + parallel_compute <- FALSE + } else if (ncores == 1) { + parallel_compute <- FALSE + } + if (!parallel_compute) { + target_dims_ind <- match(names(dim(clim)), names(dim(data))) + if (any(target_dims_ind != sort(target_dims_ind))) { + clim <- Reorder(clim, match(sort(target_dims_ind), target_dims_ind)) + } + if (length(dim(data)) == length(dim(clim))) { + res <- data - clim + } else { + target_dims_ind <- match(names(dim(clim)), names(dim(data))) + margin_dims_ind <- c(1:length(dim(data)))[-target_dims_ind] + res <- apply(data, margin_dims_ind, .Ano, clim) + res <- array(res, dim = dim(data)[c(target_dims_ind, margin_dims_ind)]) + } + } else { + res <- Apply(list(data), + target_dims = names(dim(clim)), + output_dims = names(dim(clim)), + fun = .Ano, + clim = clim, + ncores = ncores)$output1 + } - res <- Apply(list(data), - target_dims = names(dim(clim)), - output_dims = names(dim(clim)), - fun = .Ano, - clim = clim, - ncores = ncores)$output1 + # Reorder dim back to data + if (any(dim(res) != dim(data))) { + res <- Reorder(res, names(dim(data))) + } - return(invisible(res)) + return(invisible(res)) } .Ano <- function(data, clim) { - ano <- data - clim - return(ano) + data - clim } diff --git a/R/Clim.R b/R/Clim.R index 0de6f828e7a921ccd70ae851cf74215181653bcc..21f97b67ebcd586efb284f6654523b51f53eadd7 100644 --- a/R/Clim.R +++ b/R/Clim.R @@ -172,8 +172,8 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), ## dat_dim: [dataset, member] pos[i] <- which(names(dim(obs)) == dat_dim[i]) } - outrows_exp <- MeanDims(exp, pos, na.rm = FALSE, ncores = ncores) + - MeanDims(obs, pos, na.rm = FALSE, ncores = ncores) + outrows_exp <- MeanDims(exp, pos, na.rm = FALSE) + + MeanDims(obs, pos, na.rm = FALSE) outrows_obs <- outrows_exp for (i in 1:length(pos)) { diff --git a/R/Composite.R b/R/Composite.R index 01b7acfc7491095eb680787768015c0bd36b12b6..be03ac9e71b37672a4eb2d84c061611d8921ed57 100644 --- a/R/Composite.R +++ b/R/Composite.R @@ -209,7 +209,7 @@ Composite <- function(data, occ, time_dim = 'time', space_dim = c('lon', 'lat'), n_tot <- length(occ) } - mean_tot <- MeanDims(data, dims = 3, na.rm = TRUE, ncores = ncores_input) + mean_tot <- MeanDims(data, dims = 3, na.rm = TRUE) stdv_tot <- apply(data, c(1, 2), sd, na.rm = TRUE) for (k in 1:K) { @@ -231,7 +231,7 @@ Composite <- function(data, occ, time_dim = 'time', space_dim = c('lon', 'lat'), if (length(indices) == 1) { composite[, , k] <- data[, , indices] } else { - composite[, , k] <- MeanDims(data[, , indices], dims = 3, na.rm = TRUE, ncores = ncores_input) + composite[, , k] <- MeanDims(data[, , indices], dims = 3, na.rm = TRUE) } stdv_k <- apply(data[, , indices], c(1, 2), sd, na.rm = TRUE) diff --git a/R/Corr.R b/R/Corr.R index 463d5f84e22f9646d04ed089b251aba9f81a4a06..0382a393266a7b021a7aab39e69f9e7382e1155c 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -222,14 +222,16 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', # Remove data along comp_dim dim if there is at least one NA between limits if (!is.null(comp_dim)) { + pos <- which(names(dim(obs)) == comp_dim) if (is.null(limits)) { - limits <- c(1, dim(obs)[comp_dim]) + obs_sub <- obs + } else { + obs_sub <- ClimProjDiags::Subset(obs, pos, list(limits[1]:limits[2])) } - pos <- which(names(dim(obs)) == comp_dim) - obs_sub <- Subset(obs, pos, list(limits[1]:limits[2])) - outrows <- is.na(MeanDims(obs_sub, pos, na.rm = FALSE, ncores = ncores)) + outrows <- is.na(MeanDims(obs_sub, pos, na.rm = FALSE)) outrows <- InsertDim(outrows, pos, dim(obs)[comp_dim]) obs[which(outrows)] <- NA + rm(obs_sub, outrows) } if (is.null(memb_dim)) { diff --git a/R/MeanDims.R b/R/MeanDims.R index cf4f929824ba6b33313adc3e2850f0f328b18d6c..7d3cb445487f60aefb8020f2cb598c28d2777fd2 100644 --- a/R/MeanDims.R +++ b/R/MeanDims.R @@ -8,24 +8,17 @@ #' dimensions to average. #'@param na.rm A logical value indicating whether to ignore NA values (TRUE) or #' not (FALSE). -#'@param ncores A integer indicating the number of cores to use in parallel computation. #'@return An array with the same dimension as parameter 'data' except the 'dims' #' dimensions. #' removed. #' -#'@keywords datagen -#'@author History:\cr -#'0.1 - 2011-04 (V. Guemas, \email{vguemas@@ic3.cat}) - Original code\cr -#'1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@@ic3.cat}) - Formatting to R CRAN\cr -#'1.1 - 2015-03 (N. Manubens, \email{nicolau.manubens@@ic3.cat}) - Improved memory usage -#'3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Preserve dimension names #'@examples #'a <- array(rnorm(24), dim = c(2, 3, 4)) #'MeanDims(a, 2) #'MeanDims(a, c(2, 3)) #'@import multiApply #'@export -MeanDims <- function(data, dims, na.rm = FALSE, ncores = NULL) { +MeanDims <- function(data, dims, na.rm = FALSE) { # Check inputs ## data @@ -61,29 +54,18 @@ MeanDims <- function(data, dims, na.rm = FALSE, ncores = NULL) { if (!is.logical(na.rm) | length(na.rm) > 1) { stop("Parameter 'na.rm' must be one logical value.") } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be a positive integer.") - } - } - ############################### # Calculate MeanDims - if (length(dims) == length(dim(data)) || length(dim(data)) == 1) { - res <- mean(data, na.rm = na.rm) - } else if (length(dims) == 1) { + if (length(dims) == length(dim(data))) { + data <- mean(data, na.rm = na.rm) + } else { if (is.character(dims)) { - dims <- which(names(dim(data)) == dims) + dims <- which(names(dim(data)) %in% dims) } pos <- (1:length(dim(data)))[-dims] - res <- apply(data, pos, mean, na.rm = na.rm) - } else { - res <- Apply(list(data), target_dims = list(dims), fun = mean, - na.rm = na.rm, ncores = ncores)$output1 + data <- apply(data, pos, mean, na.rm = na.rm) } - return(res) + return(data) } diff --git a/R/Reorder.R b/R/Reorder.R index 8a248e936a5b9ab8f821690e2712064434a41142..04312071a110a35f5feb396cd0fbe18f494ab010 100644 --- a/R/Reorder.R +++ b/R/Reorder.R @@ -58,11 +58,7 @@ Reorder <- function(data, order) { ## If order is character string, find the indices if (is.character(order)) { - tmp <- rep(0, length(order)) - for (i in 1:length(order)) { - tmp[i] <- which(names(dim(data)) == order[i]) - } - order <- tmp + order <- match(order, names(dim(data))) } ## reorder diff --git a/R/Trend.R b/R/Trend.R index 211babbce7cab9ca93eaa4ecea80c869695a34e5..1f714a6fac3b87e9c2796afcbfde6d3d4cb42fc7 100644 --- a/R/Trend.R +++ b/R/Trend.R @@ -115,16 +115,15 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, } ## ncores if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { + if (!is.numeric(ncores)) { + stop("Parameter 'ncores' must be a positive integer.") + } else if (ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } } ############################### # Calculate Trend - dim_names <- names(dim(data)) - if (conf & pval) { output_dims <- list(trend = 'stats', conf.lower = 'stats', conf.upper = 'stats', p.val = 'stats', detrended = time_dim) @@ -136,22 +135,22 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, } else { output_dims <- list(trend = 'stats', detrended = time_dim) } - - + output <- Apply(list(data), target_dims = time_dim, fun = .Trend, output_dims = output_dims, - time_dim = time_dim, interval = interval, + interval = interval, polydeg = polydeg, conf = conf, conf.lev = conf.lev, pval = pval, ncores = ncores) - return(output) + return(invisible(output)) } -.Trend <- function(x, time_dim = 'ftime', interval = 1, polydeg = 1, +.Trend <- function(x, interval = 1, polydeg = 1, conf = TRUE, conf.lev = 0.95, pval = TRUE) { + # x: [ftime] mon <- seq(x) * interval @@ -193,7 +192,6 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, } } - if (conf & pval) { return(list(trend = trend, conf.lower = conf.lower, conf.upper = conf.upper, p.val = p.val, detrended = detrended)) diff --git a/inst/doc/profiling_compare_apply.md b/inst/doc/profiling_compare_apply.md new file mode 100644 index 0000000000000000000000000000000000000000..967785c2eb172777038bcfc9a7dec190db35fb6d --- /dev/null +++ b/inst/doc/profiling_compare_apply.md @@ -0,0 +1,95 @@ +This document records the profiling tests of those functions using apply() and Apply() +depending on 'ncores'. The comparison is among apply(), Apply() with one core, and Apply() with two cores. Two different data sizes are tested. The testing package is "peakRAM". + + +- Ano() +For small data, apply() is better than Apply() both in time and memory usage. However, if +the data size is larger, apply() requires more memory even if it saves time still. Using +2 cores can save memory usage but time is even longer. + + - small data +```r + set.seed(1) + dat1 <- array(rnorm(10000), c(dat = 10, member = 30, sdate = 400, ftime = 10)) + set.seed(2) + clim1 <- array(rnorm(10000), c(dat = 10, member = 30, sdate = 400)) + pryr::object_size(dat1) + 9.6 MB + + # (1) apply + Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB + 0.016 9.1 68.6 + + # (2) Apply, 1 core + Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB + 0.039 9.1 82.4 + + # (3) Apply, 2 cores + Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB + 0.117 9.1 50.4 +``` + + - large data +```r + set.seed(1) + dat2 <- array(rnorm(10000), c(dat = 10, member = 30, sdate = 400, ftime = 10, lon = 150)) + set.seed(2) + clim2 <- array(rnorm(10000), c(dat = 10, member = 30, sdate = 400)) + pryr::object_size(dat2) + 1.44GB + + # (1) apply + Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB + 6.368 1373.3 6004 + + # (2) Apply, 1 core + Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB + 15.211 1373.3 5844.3 + + # (3) Apply, 2 cores + Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB + 20.193 1373.3 4718.9 +``` + +- Trend +Because the returned value is list, apply() is not suitable for Trend(). For small data, +2 cores is twice faster than 1 core. The peak RAM is around 6-7x of data. For larger data, +1 core is a bit faster than 2 cores. The peak RAM is around 4-5x of data. + - small data +```r + set.seed(1) + dat1 <- array(rnorm(10000), c(dat = 10, member = 30, sdate = 40, ftime = 100)) + pryr::object_size(dat1) + 9.6 MB + + # (1) Apply, 1 core + Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB + 21.324 9.8 56.4 + + # (2) Apply, 2 cores + Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB + 11.327 9.8 63.1 + + # (3) Apply, 4 cores + +``` + + - large data +```r + set.seed(1) + dat2 <- array(rnorm(10000), c(dat = 10, member = 10, sdate = 400, ftime = 1000, lon = 4)) + pryr::object_size(dat2) + 1.28GB + + # (1) Apply, 1 core + Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB + 602.273 1230.4 6004.3 + + # (2) Apply, 2 cores + Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB + 632.638 1229.8 5979.2 + +``` + + + diff --git a/man/Ano.Rd b/man/Ano.Rd index 8e423af81035c5bba6638e899f6b385279a804c9..d15ffd14bdbfbb52a3975d8146267edb20eca0b9 100644 --- a/man/Ano.Rd +++ b/man/Ano.Rd @@ -20,8 +20,7 @@ same length.} computation. The default value is NULL.} } \value{ -An array with same dimensions as parameter 'data' but with different - dimension order. The dimensions in parameter 'clim' are ordered first. +An array with same dimensions as parameter 'data'. } \description{ This function computes anomalies from a multidimensional data array and a @@ -33,8 +32,6 @@ example(Load) clim <- Clim(sampleData$mod, sampleData$obs) ano_exp <- Ano(sampleData$mod, clim$clim_exp) ano_obs <- Ano(sampleData$obs, clim$clim_obs) -ano_exp <- Reorder(ano_exp, c(1, 2, 4, 3)) -ano_obs <- Reorder(ano_obs, c(1, 2, 4, 3)) \donttest{ PlotAno(ano_exp, ano_obs, startDates, toptitle = 'Anomaly', ytitle = c('K', 'K', 'K'), diff --git a/man/MeanDims.Rd b/man/MeanDims.Rd index 9c874fc57975211a3b62915eb39779cf7eba4fa6..f70b78b391f1205a30614735a8af875a64c9b608 100644 --- a/man/MeanDims.Rd +++ b/man/MeanDims.Rd @@ -4,7 +4,7 @@ \alias{MeanDims} \title{Average an array along multiple dimensions} \usage{ -MeanDims(data, dims, na.rm = FALSE, ncores = NULL) +MeanDims(data, dims, na.rm = FALSE) } \arguments{ \item{data}{An array to be averaged.} @@ -14,8 +14,6 @@ dimensions to average.} \item{na.rm}{A logical value indicating whether to ignore NA values (TRUE) or not (FALSE).} - -\item{ncores}{A integer indicating the number of cores to use in parallel computation.} } \value{ An array with the same dimension as parameter 'data' except the 'dims' @@ -31,11 +29,3 @@ a <- array(rnorm(24), dim = c(2, 3, 4)) MeanDims(a, 2) MeanDims(a, c(2, 3)) } -\author{ -History:\cr -0.1 - 2011-04 (V. Guemas, \email{vguemas@ic3.cat}) - Original code\cr -1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Formatting to R CRAN\cr -1.1 - 2015-03 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Improved memory usage -3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Preserve dimension names -} -\keyword{datagen} diff --git a/tests/testthat/test-Ano.R b/tests/testthat/test-Ano.R index 4d64ce6cd6e4e37655dd28cfccb529e760f155b5..a74c7c946778287ac84451319457c36782362401 100644 --- a/tests/testthat/test-Ano.R +++ b/tests/testthat/test-Ano.R @@ -3,11 +3,14 @@ context("s2dv::Ano test") ############################################## # dat1 set.seed(1) - dat1 <- array(rnorm(72), c(dat = 1,member = 3, sdate = 4, ftime = 6)) + dat1 <- array(rnorm(72), c(dat = 1, member = 3, sdate = 4, ftime = 6)) set.seed(2) - clim1 <- array(rnorm(12), c(dat = 1,member = 3, sdate = 4)) + clim1 <- array(rnorm(12), c(dat = 1, member = 3, sdate = 4)) #dat2 + set.seed(1) + dat2 <- array(rnorm(72), c(dat = 1, sdate = 4, ftime = 6, member = 3)) + clim2 <- clim1 ############################################## @@ -75,5 +78,39 @@ test_that("2. Output checks: dat1", { c(-0.24416258, -0.08427184, 0.79636122, -0.05306879), tolerance = 0.0001 ) + expect_equal( + Ano(dat1, clim1, ncores = 1), + Ano(dat1, clim1, ncores = 2) + ) }) + +test_that("3. Output checks: dat2", { + + expect_equal( + dim(Ano(dat2, clim2)), + dim(dat2) + ) + expect_equal( + mean(Ano(dat2, clim2)), + -0.1434844, + tolerance = 0.0001 + ) + expect_equal( + min(Ano(dat2, clim2)), + -3.789433, + tolerance = 0.0001 + ) + expect_equal( + Ano(dat2, clim2)[1, 2, , 3], + c(0.74868744, -1.26178338, -1.17655491, -0.17166029, 0.05637202, 2.04019139), + tolerance = 0.0001 + ) + expect_equal( + Ano(dat2, clim2, ncores = 1), + Ano(dat2, clim2, ncores = 2) + ) + +}) + +