Commit b7b08d68 authored by aho's avatar aho
Browse files

Merge branch 'develop-Reorder' into 'master'

Refine code to improve efficiency

See merge request !48
parents 82c7defc af9866b8
Pipeline #5742 passed with stage
in 8 minutes and 41 seconds
......@@ -283,8 +283,8 @@ ACC <- function(exp, obs, dat_dim = 'dataset', space_dim = c('lat', 'lon'),
exp_ori <- exp
obs_ori <- obs
}
exp <- MeanDims(exp, memb_dim, na.rm = TRUE, ncores = ncores)
obs <- MeanDims(obs, memb_dim, na.rm = TRUE, ncores = ncores)
exp <- MeanDims(exp, memb_dim, na.rm = TRUE)
obs <- MeanDims(obs, memb_dim, na.rm = TRUE)
}
if (is.null(avg_dim)) {
......@@ -558,8 +558,8 @@ ACC <- function(exp, obs, dat_dim = 'dataset', space_dim = c('lat', 'lon'),
dim = dim(obs))
# ensemble mean before .ACC
drawexp <- MeanDims(drawexp, memb_dim, na.rm = TRUE, ncores = ncores_input)
drawobs <- MeanDims(drawobs, memb_dim, na.rm = TRUE, ncores = ncores_input)
drawexp <- MeanDims(drawexp, memb_dim, na.rm = TRUE)
drawobs <- MeanDims(drawobs, memb_dim, na.rm = TRUE)
# Reorder
if (is.null(avg_dim)) {
drawexp <- Reorder(drawexp, c(2, 3, 1))
......
......@@ -13,8 +13,7 @@
#'@param ncores An integer indicating the number of cores to use for parallel
#' computation. The default value is NULL.
#'
#'@return An array with same dimensions as parameter 'data' but with different
#' dimension order. The dimensions in parameter 'clim' are ordered first.
#'@return An array with same dimensions as parameter 'data'.
#'
#'@examples
#'# Load sample data as in Load() example:
......@@ -22,8 +21,6 @@
#'clim <- Clim(sampleData$mod, sampleData$obs)
#'ano_exp <- Ano(sampleData$mod, clim$clim_exp)
#'ano_obs <- Ano(sampleData$obs, clim$clim_obs)
#'ano_exp <- Reorder(ano_exp, c(1, 2, 4, 3))
#'ano_obs <- Reorder(ano_obs, c(1, 2, 4, 3))
#'\donttest{
#'PlotAno(ano_exp, ano_obs, startDates,
#' toptitle = 'Anomaly', ytitle = c('K', 'K', 'K'),
......@@ -62,38 +59,62 @@
if (any(is.null(names(dim(clim))))| any(nchar(names(dim(clim))) == 0)) {
stop("Parameter 'clim' must have dimension names.")
}
for (i in 1:length(dim(clim))) {
if (!(names(dim(clim))[i] %in% names(dim(data)))) {
stop("Parameter 'data' must have all the dimensions of parameter 'clim'.")
} else {
pos <- names(dim(data))[which(names(dim(clim))[i] == names(dim(data)))]
if ((dim(clim)[i] != dim(data)[pos])) {
stop("Some dimensions of parameter 'clim' have different length from parameter 'data'.")
}
## data and clim
if (!all(names(dim(clim)) %in% names(dim(data)))) {
stop("Parameter 'data' must have all the dimensions of parameter 'clim'.")
} else {
pos <- names(dim(data))[match(names(dim(clim)), names(dim(data)))]
if (any(dim(clim) != dim(data)[pos])) {
stop("Some dimensions of parameter 'clim' have different length from parameter 'data'.")
}
}
## ncores
if (!is.null(ncores)) {
if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 |
length(ncores) > 1) {
if (!is.numeric(ncores)) {
stop("Parameter 'ncores' must be a positive integer.")
} else if (ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) {
stop("Parameter 'ncores' must be a positive integer.")
}
}
###############################
# Calculate Ano
parallel_compute <- TRUE
if (is.null(ncores)) {
parallel_compute <- FALSE
} else if (ncores == 1) {
parallel_compute <- FALSE
}
if (!parallel_compute) {
target_dims_ind <- match(names(dim(clim)), names(dim(data)))
if (any(target_dims_ind != sort(target_dims_ind))) {
clim <- Reorder(clim, match(sort(target_dims_ind), target_dims_ind))
}
if (length(dim(data)) == length(dim(clim))) {
res <- data - clim
} else {
target_dims_ind <- match(names(dim(clim)), names(dim(data)))
margin_dims_ind <- c(1:length(dim(data)))[-target_dims_ind]
res <- apply(data, margin_dims_ind, .Ano, clim)
res <- array(res, dim = dim(data)[c(target_dims_ind, margin_dims_ind)])
}
} else {
res <- Apply(list(data),
target_dims = names(dim(clim)),
output_dims = names(dim(clim)),
fun = .Ano,
clim = clim,
ncores = ncores)$output1
}
res <- Apply(list(data),
target_dims = names(dim(clim)),
output_dims = names(dim(clim)),
fun = .Ano,
clim = clim,
ncores = ncores)$output1
# Reorder dim back to data
if (any(dim(res) != dim(data))) {
res <- Reorder(res, names(dim(data)))
}
return(invisible(res))
return(invisible(res))
}
.Ano <- function(data, clim) {
ano <- data - clim
return(ano)
data - clim
}
......@@ -172,8 +172,8 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'),
## dat_dim: [dataset, member]
pos[i] <- which(names(dim(obs)) == dat_dim[i])
}
outrows_exp <- MeanDims(exp, pos, na.rm = FALSE, ncores = ncores) +
MeanDims(obs, pos, na.rm = FALSE, ncores = ncores)
outrows_exp <- MeanDims(exp, pos, na.rm = FALSE) +
MeanDims(obs, pos, na.rm = FALSE)
outrows_obs <- outrows_exp
for (i in 1:length(pos)) {
......
......@@ -209,7 +209,7 @@ Composite <- function(data, occ, time_dim = 'time', space_dim = c('lon', 'lat'),
n_tot <- length(occ)
}
mean_tot <- MeanDims(data, dims = 3, na.rm = TRUE, ncores = ncores_input)
mean_tot <- MeanDims(data, dims = 3, na.rm = TRUE)
stdv_tot <- apply(data, c(1, 2), sd, na.rm = TRUE)
for (k in 1:K) {
......@@ -231,7 +231,7 @@ Composite <- function(data, occ, time_dim = 'time', space_dim = c('lon', 'lat'),
if (length(indices) == 1) {
composite[, , k] <- data[, , indices]
} else {
composite[, , k] <- MeanDims(data[, , indices], dims = 3, na.rm = TRUE, ncores = ncores_input)
composite[, , k] <- MeanDims(data[, , indices], dims = 3, na.rm = TRUE)
}
stdv_k <- apply(data[, , indices], c(1, 2), sd, na.rm = TRUE)
......
......@@ -222,14 +222,16 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset',
# Remove data along comp_dim dim if there is at least one NA between limits
if (!is.null(comp_dim)) {
pos <- which(names(dim(obs)) == comp_dim)
if (is.null(limits)) {
limits <- c(1, dim(obs)[comp_dim])
obs_sub <- obs
} else {
obs_sub <- ClimProjDiags::Subset(obs, pos, list(limits[1]:limits[2]))
}
pos <- which(names(dim(obs)) == comp_dim)
obs_sub <- Subset(obs, pos, list(limits[1]:limits[2]))
outrows <- is.na(MeanDims(obs_sub, pos, na.rm = FALSE, ncores = ncores))
outrows <- is.na(MeanDims(obs_sub, pos, na.rm = FALSE))
outrows <- InsertDim(outrows, pos, dim(obs)[comp_dim])
obs[which(outrows)] <- NA
rm(obs_sub, outrows)
}
if (is.null(memb_dim)) {
......
......@@ -8,24 +8,17 @@
#' dimensions to average.
#'@param na.rm A logical value indicating whether to ignore NA values (TRUE) or
#' not (FALSE).
#'@param ncores A integer indicating the number of cores to use in parallel computation.
#'@return An array with the same dimension as parameter 'data' except the 'dims'
#' dimensions.
#' removed.
#'
#'@keywords datagen
#'@author History:\cr
#'0.1 - 2011-04 (V. Guemas, \email{vguemas@@ic3.cat}) - Original code\cr
#'1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@@ic3.cat}) - Formatting to R CRAN\cr
#'1.1 - 2015-03 (N. Manubens, \email{nicolau.manubens@@ic3.cat}) - Improved memory usage
#'3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Preserve dimension names
#'@examples
#'a <- array(rnorm(24), dim = c(2, 3, 4))
#'MeanDims(a, 2)
#'MeanDims(a, c(2, 3))
#'@import multiApply
#'@export
MeanDims <- function(data, dims, na.rm = FALSE, ncores = NULL) {
MeanDims <- function(data, dims, na.rm = FALSE) {
# Check inputs
## data
......@@ -61,29 +54,18 @@ MeanDims <- function(data, dims, na.rm = FALSE, ncores = NULL) {
if (!is.logical(na.rm) | length(na.rm) > 1) {
stop("Parameter 'na.rm' must be one logical value.")
}
## ncores
if (!is.null(ncores)) {
if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 |
length(ncores) > 1) {
stop("Parameter 'ncores' must be a positive integer.")
}
}
###############################
# Calculate MeanDims
if (length(dims) == length(dim(data)) || length(dim(data)) == 1) {
res <- mean(data, na.rm = na.rm)
} else if (length(dims) == 1) {
if (length(dims) == length(dim(data))) {
data <- mean(data, na.rm = na.rm)
} else {
if (is.character(dims)) {
dims <- which(names(dim(data)) == dims)
dims <- which(names(dim(data)) %in% dims)
}
pos <- (1:length(dim(data)))[-dims]
res <- apply(data, pos, mean, na.rm = na.rm)
} else {
res <- Apply(list(data), target_dims = list(dims), fun = mean,
na.rm = na.rm, ncores = ncores)$output1
data <- apply(data, pos, mean, na.rm = na.rm)
}
return(res)
return(data)
}
......@@ -58,11 +58,7 @@ Reorder <- function(data, order) {
## If order is character string, find the indices
if (is.character(order)) {
tmp <- rep(0, length(order))
for (i in 1:length(order)) {
tmp[i] <- which(names(dim(data)) == order[i])
}
order <- tmp
order <- match(order, names(dim(data)))
}
## reorder
......
......@@ -115,16 +115,15 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1,
}
## ncores
if (!is.null(ncores)) {
if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 |
length(ncores) > 1) {
if (!is.numeric(ncores)) {
stop("Parameter 'ncores' must be a positive integer.")
} else if (ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) {
stop("Parameter 'ncores' must be a positive integer.")
}
}
###############################
# Calculate Trend
dim_names <- names(dim(data))
if (conf & pval) {
output_dims <- list(trend = 'stats', conf.lower = 'stats',
conf.upper = 'stats', p.val = 'stats', detrended = time_dim)
......@@ -136,22 +135,22 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1,
} else {
output_dims <- list(trend = 'stats', detrended = time_dim)
}
output <- Apply(list(data),
target_dims = time_dim,
fun = .Trend,
output_dims = output_dims,
time_dim = time_dim, interval = interval,
interval = interval,
polydeg = polydeg, conf = conf,
conf.lev = conf.lev, pval = pval,
ncores = ncores)
return(output)
return(invisible(output))
}
.Trend <- function(x, time_dim = 'ftime', interval = 1, polydeg = 1,
.Trend <- function(x, interval = 1, polydeg = 1,
conf = TRUE, conf.lev = 0.95, pval = TRUE) {
# x: [ftime]
mon <- seq(x) * interval
......@@ -193,7 +192,6 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1,
}
}
if (conf & pval) {
return(list(trend = trend, conf.lower = conf.lower, conf.upper = conf.upper,
p.val = p.val, detrended = detrended))
......
This document records the profiling tests of those functions using apply() and Apply()
depending on 'ncores'. The comparison is among apply(), Apply() with one core, and Apply() with two cores. Two different data sizes are tested. The testing package is "peakRAM".
- Ano()
For small data, apply() is better than Apply() both in time and memory usage. However, if
the data size is larger, apply() requires more memory even if it saves time still. Using
2 cores can save memory usage but time is even longer.
- small data
```r
set.seed(1)
dat1 <- array(rnorm(10000), c(dat = 10, member = 30, sdate = 400, ftime = 10))
set.seed(2)
clim1 <- array(rnorm(10000), c(dat = 10, member = 30, sdate = 400))
pryr::object_size(dat1)
9.6 MB
# (1) apply
Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB
0.016 9.1 68.6
# (2) Apply, 1 core
Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB
0.039 9.1 82.4
# (3) Apply, 2 cores
Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB
0.117 9.1 50.4
```
- large data
```r
set.seed(1)
dat2 <- array(rnorm(10000), c(dat = 10, member = 30, sdate = 400, ftime = 10, lon = 150))
set.seed(2)
clim2 <- array(rnorm(10000), c(dat = 10, member = 30, sdate = 400))
pryr::object_size(dat2)
1.44GB
# (1) apply
Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB
6.368 1373.3 6004
# (2) Apply, 1 core
Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB
15.211 1373.3 5844.3
# (3) Apply, 2 cores
Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB
20.193 1373.3 4718.9
```
- Trend
Because the returned value is list, apply() is not suitable for Trend(). For small data,
2 cores is twice faster than 1 core. The peak RAM is around 6-7x of data. For larger data,
1 core is a bit faster than 2 cores. The peak RAM is around 4-5x of data.
- small data
```r
set.seed(1)
dat1 <- array(rnorm(10000), c(dat = 10, member = 30, sdate = 40, ftime = 100))
pryr::object_size(dat1)
9.6 MB
# (1) Apply, 1 core
Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB
21.324 9.8 56.4
# (2) Apply, 2 cores
Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB
11.327 9.8 63.1
# (3) Apply, 4 cores
```
- large data
```r
set.seed(1)
dat2 <- array(rnorm(10000), c(dat = 10, member = 10, sdate = 400, ftime = 1000, lon = 4))
pryr::object_size(dat2)
1.28GB
# (1) Apply, 1 core
Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB
602.273 1230.4 6004.3
# (2) Apply, 2 cores
Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB
632.638 1229.8 5979.2
```
......@@ -20,8 +20,7 @@ same length.}
computation. The default value is NULL.}
}
\value{
An array with same dimensions as parameter 'data' but with different
dimension order. The dimensions in parameter 'clim' are ordered first.
An array with same dimensions as parameter 'data'.
}
\description{
This function computes anomalies from a multidimensional data array and a
......@@ -33,8 +32,6 @@ example(Load)
clim <- Clim(sampleData$mod, sampleData$obs)
ano_exp <- Ano(sampleData$mod, clim$clim_exp)
ano_obs <- Ano(sampleData$obs, clim$clim_obs)
ano_exp <- Reorder(ano_exp, c(1, 2, 4, 3))
ano_obs <- Reorder(ano_obs, c(1, 2, 4, 3))
\donttest{
PlotAno(ano_exp, ano_obs, startDates,
toptitle = 'Anomaly', ytitle = c('K', 'K', 'K'),
......
......@@ -4,7 +4,7 @@
\alias{MeanDims}
\title{Average an array along multiple dimensions}
\usage{
MeanDims(data, dims, na.rm = FALSE, ncores = NULL)
MeanDims(data, dims, na.rm = FALSE)
}
\arguments{
\item{data}{An array to be averaged.}
......@@ -14,8 +14,6 @@ dimensions to average.}
\item{na.rm}{A logical value indicating whether to ignore NA values (TRUE) or
not (FALSE).}
\item{ncores}{A integer indicating the number of cores to use in parallel computation.}
}
\value{
An array with the same dimension as parameter 'data' except the 'dims'
......@@ -31,11 +29,3 @@ a <- array(rnorm(24), dim = c(2, 3, 4))
MeanDims(a, 2)
MeanDims(a, c(2, 3))
}
\author{
History:\cr
0.1 - 2011-04 (V. Guemas, \email{vguemas@ic3.cat}) - Original code\cr
1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Formatting to R CRAN\cr
1.1 - 2015-03 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Improved memory usage
3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Preserve dimension names
}
\keyword{datagen}
......@@ -3,11 +3,14 @@ context("s2dv::Ano test")
##############################################
# dat1
set.seed(1)
dat1 <- array(rnorm(72), c(dat = 1,member = 3, sdate = 4, ftime = 6))
dat1 <- array(rnorm(72), c(dat = 1, member = 3, sdate = 4, ftime = 6))
set.seed(2)
clim1 <- array(rnorm(12), c(dat = 1,member = 3, sdate = 4))
clim1 <- array(rnorm(12), c(dat = 1, member = 3, sdate = 4))
#dat2
set.seed(1)
dat2 <- array(rnorm(72), c(dat = 1, sdate = 4, ftime = 6, member = 3))
clim2 <- clim1
##############################################
......@@ -75,5 +78,39 @@ test_that("2. Output checks: dat1", {
c(-0.24416258, -0.08427184, 0.79636122, -0.05306879),
tolerance = 0.0001
)
expect_equal(
Ano(dat1, clim1, ncores = 1),
Ano(dat1, clim1, ncores = 2)
)
})
test_that("3. Output checks: dat2", {
expect_equal(
dim(Ano(dat2, clim2)),
dim(dat2)
)
expect_equal(
mean(Ano(dat2, clim2)),
-0.1434844,
tolerance = 0.0001
)
expect_equal(
min(Ano(dat2, clim2)),
-3.789433,
tolerance = 0.0001
)
expect_equal(
Ano(dat2, clim2)[1, 2, , 3],
c(0.74868744, -1.26178338, -1.17655491, -0.17166029, 0.05637202, 2.04019139),
tolerance = 0.0001
)
expect_equal(
Ano(dat2, clim2, ncores = 1),
Ano(dat2, clim2, ncores = 2)
)
})
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment