From cef9ae7facad0bbf1b2bec965330e9db7486cb2f Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 6 Nov 2019 18:37:46 +0100 Subject: [PATCH 1/3] Create unit test --- tests/testthat.R | 5 + tests/testthat/test-Compute.R | 356 ++++++++++++++++++++++++++++++++++ 2 files changed, 361 insertions(+) create mode 100644 tests/testthat.R create mode 100644 tests/testthat/test-Compute.R diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..d424073 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,5 @@ +library(testthat) +library(startR) + +test_check("startR") + diff --git a/tests/testthat/test-Compute.R b/tests/testthat/test-Compute.R new file mode 100644 index 0000000..ae245dc --- /dev/null +++ b/tests/testthat/test-Compute.R @@ -0,0 +1,356 @@ +library(testthat) +library(startR) +context("Generic tests") +test_that("Sanity checks", { + +# expect_error( +# Composite(var = array(1:20, dim = c(2, 5, 2)), c(1, 1, 0)), +# "Temporal dimension of var is not equal to length of occ.") + +# expect_warning( +# Composite(var = array(1:40, dim = c(2, 5, 4)), c(1, 2, 2, 2)), +# "Composite 1 has length 1 and pvalue is NA.") + +#============= 1 =============== +#-----ex2_8_calibration.R------- + +# Define a region +lons.min <- 0 +lons.max <- 10 +lats.min <- 0 +lats.max <- 10 + +# Declaration of data sources +exp <- Start(dat = '/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', + var = 'tas', + sdate = paste0(2000:2010, '0101'), + ensemble = indices(1:2), + time = indices(1), + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = c('sdate')), + retrieve = FALSE) + +obs <- Start(dat = '/esarchive/recon/ecmwf/erainterim/monthly_mean/$var$_f6h/$var$_$sdate$.nc', + var = 'tas', + sdate = paste0(2000:2010, '02'), + time = indices(1), + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = c('sdate')), + split_multiselected_dims = TRUE, + merge_across_dims = TRUE, + retrieve = FALSE) + +# Define of the workflow + +# Function +wrap_cal <- function(obs, exp) { + obs <- s2dverification::InsertDim(obs, 1, 1) + names(dim(obs))<- c('member', 'sdate') + exp <- t(exp) + names(dim(exp))<- c('member', 'sdate') + calibrated <- CSTools:::.cal(var_obs = obs, var_exp = exp) + return(calibrated) +} + +step <- Step(wrap_cal, + target_dims = list(obs = c('sdate'), exp = c('sdate', 'ensemble')), + output_dims = c('ensemble', 'sdate')) + +# workflow of operations +wf <- AddStep(list(obs = obs, exp = exp), step) + + +# Execution +#res1 <- Compute(wf, chunks = list(latitude = 2, longitude = 2)) +res2 <- Compute(wf, chunks = list(latitude = 1, longitude = 1)) + + +# expect_equal( +# as.vector(dim(res1$output1)), +# as.integer(c(15, 11, 1, 1, 1, 14, 15)) +# ) + expect_equal( + as.vector(dim(res2$output1)), + as.integer(c(2, 11, 1, 1, 1, 14, 15)) + ) + + +# expect_equivalent( +# Composite(var, occ)$composite[, , 2], +# output +# ) + + +#==================== 2 ======================== +#----- ex2_1_timedim.R ------- + + repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' + data <- Start(dat = repos, + var = 'tas', + sdate = c('20170101', '20180101'), + ensemble = indices(1:2), + time = 'all', + latitude = 'all', + longitude = indices(1:2), + return_vars = list(latitude = 'dat', longitude = 'dat', time = 'sdate'), + retrieve = FALSE) + + library(multiApply) + fun_spring <- function(x) { + source("/esarchive/scratch/nperez/Season_v2.R") + y <- Season_v2(x, monini = 1, moninf = 3, monsup = 5) + return(y) + } + + step1 <- Step(fun = fun_spring, + target_dims = c('time'), + output_dims = c('time')) + + wf1 <- AddStep(data, step1) + +## locally + res1 <- Compute(wf1, + chunks = list(ensemble = 2, sdate = 2)) + + expect_equal( + summary(res1$output1)[4], + c(Mean=280.2) + ) + +#==================== 3 ======================== +#----- ex2_2_attr.R ------- + repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' + data <- Start(dat = repos, + var = 'tas', + sdate = c('20170101', '20180101'), + ensemble = indices(1:2), + time = indices(1:2), + latitude = 'all', + longitude = indices(1:4), + return_vars = list(latitude = 'dat', longitude = 'dat', time = 'sdate'), + retrieve = FALSE) + funp <- function(x) { + lat = attributes(x)$Variables$dat1$latitude + weight = sqrt(cos(lat * pi / 180)) + corrected = Apply(list(x), target_dims = "latitude", + fun = function(x) {x * weight}) + } + + + step2 <- Step(fun = funp, + target_dims = 'latitude', + output_dims = 'latitude', + use_attributes = list(data = "Variables")) + wf2 <- AddStep(data, step2) + +## locally + res2 <- Compute(workflow = wf2, + chunks = list(ensemble = 2, sdate = 2)) + + expect_equal( + round(mean(apply(res2$output1, 1, mean)), digits = 5), + 217.0771 + ) + + + +#==================== 4 ======================== +#----- ex2_3_cdo.R ------- + + repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' + data <- Start(dat = repos, + var = 'tas', + sdate = c('20170101', '20180101'), + ensemble = indices(2), + time = indices(3:4), + latitude = 'all', + longitude = 'all', + return_vars = list(latitude = 'dat', longitude = 'dat', time = 'sdate'), + retrieve = FALSE) + + fun_deb2 <- function(x) { + lons_data = as.vector(attr(x, 'Variables')$dat1$longitude) + lats_data = as.vector(attr(x, 'Variables')$dat1$latitude) + resgrid = "r360x180" # prlr + r <- s2dverification::CDORemap(x, lons_data, lats_data, resgrid, + 'bil', crop = FALSE, force_remap = TRUE)[[1]] + return(r) + } + + step3 <- Step(fun = fun_deb2, + target_dims = c('latitude','longitude'), + output_dims = c('latitude', 'longitude'), + use_attributes = list(data = "Variables")) + wf3 <- AddStep(data, step3) + +## locally + res3 <- Compute(workflow = wf3, + chunks = list(time = 2, sdate = 2)) + + expect_equal( + dim(res3$output1)[1:2], + c(latitude = 180, longitude = 360) + ) + + +#==================== 5 ======================== +#----- ex2_4_two_func.R ------- + + repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' + data <- Start(dat = repos, + var = 'tas', + sdate = c('20170101'), + ensemble = indices(2), + time = indices(1:5), + latitude = values(list(0, 10)), + latitude_reorder = Sort(), + longitude = values(list(0, 10)), + return_vars = list(latitude = 'dat', longitude = 'dat', time = 'sdate'), + retrieve = FALSE) + + fun_deb3 <- function(x) { + source("/esarchive/scratch/nperez/Season_v2.R") + lons_data = as.vector(attr(x, 'Variables')$dat1$longitude) + lats_data = as.vector(attr(x, 'Variables')$dat1$latitude) + resgrid = "r360x180" # prlr + y = Season_v2(x, posdim = 'time', monini = 1, moninf = 1, monsup = 3) + r <- s2dverification::CDORemap(y, lons_data, lats_data, resgrid, + 'bil', crop = FALSE, force_remap = TRUE)[[1]] + return(r) + } + + step4 <- Step(fun = fun_deb3, + target_dims = c('latitude','longitude', 'time'), + output_dims = c('latitude', 'longitude', 'time'), + use_attributes = list(data = "Variables")) + wf4 <- AddStep(data, step4) + +## locally + res4 <- Compute(workflow = wf4, + chunks = list(ensemble = 1, sdate = 1)) + + expect_equal( + dim(res3$output1)[1:2], + c(latitude = 180, longitude = 360) + ) + expect_equal( + length(which(!is.na(res4$output1))), + 110 + ) + expect_equal( + mean(res4$output1[which(!is.na(res4$output1))]), + 300.8074, + tolerance = .0001 + ) + + +#==================== 6 ======================== +#------------ ex2_6_ext_param_func.R ------------ + + library(lubridate) + dataset <- "/esarchive/recon/ecmwf/era5/daily_mean/$var$_f1h/$var$_$sdate$.nc" + firsty <- "1981" + lasty <- "1982" + months <- 10:12 + firstd <- paste0(firsty,"1001") + sdates <- ymd(firstd) + months(0:2) + rep(years(0:(1987-1981)), each = 3) + + wind <- Start(dataset = dataset, + var = "sfcWind", + sdate = format(sdates, "%Y%m"), + time = 'all', + longitude = indices(1:30), + latitude = indices(1:20), + return_vars = list(time = NULL, latitude = NULL, longitude = NULL), + num_procs = 4, retrieve = FALSE, + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude'))) + + MJO <- data.frame(vdate = 1:(30 * 7 + 31 * 14), + phase = c(rep(1:8, 80), 1:4), + amplitude = 10 * rnorm(31 * 14 + 30 * 7)) + + stratify_atomic <- function(field, MJO, season = c("JFM", "OND"), + lag = 0, ampl = 2, relative = TRUE, signif = 0.05) { + nmonths <- dim(field)[3] + field <- aperm(field, c(1, 2, 4, 3)) + dim(field) <- c(31 * nmonths) + if(season == "JFM") { + daysok <- rep(c(rep(TRUE, 31), rep(TRUE, 28), + rep(FALSE, 3), rep(TRUE, 31)), nmonths / 3) + } else if (season == "OND") { + daysok <- rep(c(rep(TRUE, 31), rep(TRUE, 30), + rep(FALSE, 1), rep(TRUE, 31)), nmonths / 3) + } + field <- field[daysok] + dim(field) <- c(days = length(field)) + + if(dim(field)[1] != dim(MJO)[1]) { + stop("MJO indices and wind data have different number of days") + } + + idx <- function(MJO, phase, ampl, lag){ + if(lag == 0) { + return(MJO$phase == phase & MJO$amplitude > ampl) + } + if(lag > 0) { + return(dplyr::lag(MJO$phase == phase & MJO$amplitude > ampl, + lag, default = FALSE)) + } + if(lag < 0) { + return(dplyr::lead(MJO$phase == phase & MJO$amplitude > ampl, + - 1 * lag, default = FALSE)) + } + } + strat <- plyr::laply(1:8, function(i) { + idx2 <- idx(MJO, i, ampl, lag) + if (relative) { + return(mean(field[idx2]) / mean(field) - 1) + } else { + return(mean(field[idx2]) - mean(field)) + }}) + strat.t.test <- plyr::laply(1:8, function(i) { + idx2 <- idx(MJO, i, ampl, lag) + return(t.test(field[idx2], field)$p.value)}) + return(list(strat = strat, t_test = strat.t.test)) + } + + step <- Step(stratify_atomic, + target_dims = list(field = c('dataset', 'var', 'sdate', 'time')), + output_dims = list(strat = c('phase'), t_test = c('phase'))) + workflow <- AddStep(wind, step, MJO = MJO, season = "OND", lag = "0", amp = 0) + + res <- Compute(workflow$strat, + chunks = list(latitude = 2, longitude = 2), + threads_load = 2, + threads_compute = 4) + expect_equal( + dim(res$strat), + c(phase = 8, longitude = 30, latitude = 20) + ) + expect_equal( + dim(res$t_test), + c(phase = 8, longitude = 30, latitude = 20) + ) + + + +#==================== 7 ======================== +#------------ ex2_6_ext_param_func.R ------------ + + + + + + + + +}) -- GitLab From 5c9a031204a573bc7a38f0a7a63b5be96d5e1484 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 7 Nov 2019 12:48:25 +0100 Subject: [PATCH 2/3] Add Start() unit test --- tests/testthat/test-Start.R | 83 +++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 tests/testthat/test-Start.R diff --git a/tests/testthat/test-Start.R b/tests/testthat/test-Start.R new file mode 100644 index 0000000..20f6747 --- /dev/null +++ b/tests/testthat/test-Start.R @@ -0,0 +1,83 @@ +library(testthat) +library(startR) +context("Generic tests") +test_that("Sanity checks", { + +#============= 1 =============== +#-----ex1_1_transform.R------- + +obs_path <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h/$var$_$sdate$.nc' + +obs <- Start(dat = obs_path, + var = 'sfcWind', + sdate = '201811', + time = 'all', + latitude = values(list(0, 10)), + latitude_reorder = Sort(), + longitude = values(list(0, 10)), + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + transform = CDORemapper, + transform_extra_cells = 2, + transform_params = list(grid = 'r360x181', + method = 'conservative', + crop = c(0, 10, 0, 10)), + transform_vars = c('latitude', 'longitude'), + return_vars = list(time = NULL, + latitude = 'dat', + longitude = 'dat'), + split_multiselected_dims = TRUE, + merge_across_dims = TRUE, + retrieve = T) + + expect_equal( + dim(obs), + c(dat = 1, var = 1, sdate = 1, time = 1, latitude = 11, longitude = 11) + ) + + expect_equal( + as.vector(summary(obs)), + c(1.090, 1.684, 2.426, 2.728, 3.706, 4.970) + ) + +#============= 2 =============== + +obs <- Start(dat = obs_path, + var = 'sfcWind', + sdate = '201811', + time = 'all', + latitude = indices(1:10), + latitude_reorder = Sort(), + longitude = indices(1:10), + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + transform = CDORemapper, + transform_extra_cells = 2, + transform_params = list(grid = 'r360x181', + method = 'conservative'), + #crop = c(0, 10, 0, 10)), + transform_vars = c('latitude', 'longitude'), + return_vars = list(time = NULL, + latitude = 'dat', + longitude = 'dat'), + split_multiselected_dims = TRUE, + merge_across_dims = TRUE, + retrieve = T) + expect_equal( + dim(obs), + c(dat = 1, var = 1, sdate = 1, time = 1, latitude = 10, longitude = 10) + ) + expect_equal( + length(which(!is.na(obs[1,1,1,1,,]))), + 6 + ) + expect_equal( + mean(obs[1,1,1,1,,],1, na.rm = T), + 5.047489 + ) + + + + + +}) -- GitLab From 78418d1b506b8082ddd0d48697df0f0c5282ca77 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 27 Nov 2019 14:40:45 +0100 Subject: [PATCH 3/3] Add the tests in develop-test in --- tests/testthat/test-Start.R | 201 +++++++++++++++++++++++++++++++++++- 1 file changed, 198 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-Start.R b/tests/testthat/test-Start.R index 20f6747..39eeb6a 100644 --- a/tests/testthat/test-Start.R +++ b/tests/testthat/test-Start.R @@ -1,7 +1,201 @@ library(testthat) library(startR) -context("Generic tests") -test_that("Sanity checks", { +library(easyNCDF) +context("startR::Start tests") + +################################################################################ +################################################################################ +test_that("1. Call with no pattern dimension crashes.", { +################################################################################ + expect_error( + Start(), + "At least one pattern dim must be specified." + ) +################################################################################ + expect_error( + Start(1), + "At least one pattern dim must be specified." + ) +################################################################################ + expect_error( + Start('a'), + "At least one pattern dim must be specified." + ) +################################################################################ + expect_error( + Start('all'), + "At least one pattern dim must be specified." + ) +}) +################################################################################ + + + +################################################################################ +################################################################################ +test_that("2. Call with unexisting file crashes.", { +################################################################################ + expect_error( + Start(dataset = list( + list(path = './non_existing_file.nc', + name = 'Dataset A') + ), + pattern_dims = 'dataset'), + "No data files found for any of the specified datasets." + ) +}) + + + +################################################################################ +################################################################################ + # We need to put as.numeric, because loaded data with Start has to go through + # a big.matrix which is always of type 'numeric', regardless of the type of + # the data. Since the big.matrix is created before reading any data, it is + # not possible to create it with a special type depending on the type of + # the data. +sin_var__mul_val__mul_dim <- array(as.numeric(1:9), c(longitude = 9, latitude = 9)) +sin_var__mul_val__mul_dim__path <- paste0(test_dir, '/sin_var__mul_val__mul_dim.nc') +ArrayToNc(list(temperature = sin_var__mul_val__mul_dim), sin_var__mul_val__mul_dim__path) + +test_that("3. Call with existing file with single variable, multiple values and inner dimensions of length > 1, and no inner selectors specified crashes. One core only.", { +################################################################################ + expect_error( + Start(dataset = list( + list(path = sin_var__mul_val__mul_dim__path, + name = 'Dataset A') + ), + pattern_dims = 'dataset', + metadata_dims = NA, + num_procs = 1, + retrieve = TRUE), + regexp = "Unexpected extra dimensions \\(of length > 1\\) in the file.*" + ) +}) + +################################################################################ +################################################################################ +sin_var__sin_val__mul_dim <- array(5, c(longitude = 1, latitude = 1)) +sin_var__sin_val__mul_dim__path <- paste0(test_dir, '/sin_var__sin_val__mul_dim.nc') +ArrayToNc(list(temperature = sin_var__sin_val__mul_dim), sin_var__sin_val__mul_dim__path) + +test_that("4. Call with existing file with single variable, single value and inner dimensions of length = 1 and no inner selectors specified works. One core only.", { +################################################################################ + data <- Start(dataset = list( + list(path = sin_var__sin_val__mul_dim__path, + name = 'Dataset A') + ), + pattern_dims = 'dataset', + metadata_dims = NA, + num_procs = 1) +# ERROR when retrieve = TRUE. data$Data does not exist. +# expect_identical(drop2(data$Data), drop2(sin_var__sin_val__mul_dim)) +}) + + +################################################################################ +################################################################################ +test_that("5. Call with existing file with single variable, multiple values and inner dimensions of length > 1, and all inner selectors specified works. One core only.", { +################################################################################ + data <- Start(dataset = list( + list(path = sin_var__mul_val__mul_dim__path, + name = 'Dataset A') + ), + pattern_dims = 'dataset', + longitude = 'all', + latitude = 'all', + metadata_dims = NA, + num_procs = 1, + retrieve = TRUE) + expect_equivalent(drop(data)[,], sin_var__mul_val__mul_dim) + expect_equal(dim(Subset(data, 1, 1, drop = 'selected')), dim(sin_var__mul_val__mul_dim)) +}) + + +################################################################################ +################################################################################ +test_that("6. Call with existing file with single variable, multiple values and inner dimensions of length > 1, and all inner selectors specified works. One core only. Dataset specification without name.", { +################################################################################ + data <- Start(dataset = list( + list(path = sin_var__mul_val__mul_dim__path) + ), + pattern_dims = 'dataset', + longitude = 'all', + latitude = 'all', + metadata_dims = NA, + num_procs = 1, + retrieve = TRUE) + expect_equivalent(drop(data)[,], sin_var__mul_val__mul_dim) + expect_equal(dim(Subset(data, 1, 1, drop = 'selected')), dim(sin_var__mul_val__mul_dim)) +}) + + +################################################################################ +################################################################################ +test_that("7. Call with existing file with single variable, multiple values and inner dimensions of length > 1, and all inner selectors specified works. One core only. Dataset specification as single list.", { +################################################################################ + data <- Start(dataset = list(path = sin_var__mul_val__mul_dim__path), + pattern_dims = 'dataset', + longitude = 'all', + latitude = 'all', + metadata_dims = NA, + num_procs = 1, + retrieve = TRUE) + expect_equivalent(drop(data)[,], sin_var__mul_val__mul_dim) + expect_equal(dim(Subset(data, 1, 1, drop = 'selected')), dim(sin_var__mul_val__mul_dim)) +}) + +################################################################################ +################################################################################ +test_that("8. Call with existing file with single variable, multiple values and inner dimensions of length > 1, and all inner selectors specified works. One core only. Dataset specification as single path pattern.", { +################################################################################ + data <- Start(dataset = sin_var__mul_val__mul_dim__path, + pattern_dims = 'dataset', + longitude = 'all', + latitude = 'all', + metadata_dims = NA, + num_procs = 1, + retrieve = TRUE) + expect_equivalent(drop(data)[,], sin_var__mul_val__mul_dim) + expect_equal(dim(Subset(data, 1, 1, drop = 'selected')), dim(sin_var__mul_val__mul_dim)) +}) + + +################################################################################ +################################################################################ +test_that("9. Call with existing file with single variable, multiple values and inner dimensions of length > 1, and all inner selectors specified works. One core only. Multiple datasets specified as multiple path patterns.", { +################################################################################ + data <- Start(dataset = c(sin_var__mul_val__mul_dim__path, + sin_var__mul_val__mul_dim__path), + pattern_dims = 'dataset', + longitude = 'all', + latitude = 'all', + metadata_dims = NA, + num_procs = 1, + retrieve = TRUE) + expect_equivalent(dim(data), + c(dataset = 2, longitude = 9, latitude = 9)) +}) + + +################################################################################ +################################################################################ +test_that("10. Call with existing file with single variable, multiple values and inner dimensions of length > 1, and all inner selectors specified works. One core only. Dataset specified as path pattern. No pattern dim specified.", { +################################################################################ + data <- Start(dataset = sin_var__mul_val__mul_dim__path, + longitude = 'all', + latitude = 'all', + metadata_dims = NA, + num_procs = 1, + retrieve = TRUE) + expect_equivalent(drop(data)[,], sin_var__mul_val__mul_dim) + expect_equal(dim(Subset(data, 1, 1, drop = 'selected')), dim(sin_var__mul_val__mul_dim)) +}) + + +################################################################################ +################################################################################ +test_that("11. Example checks", { #============= 1 =============== #-----ex1_1_transform.R------- @@ -37,7 +231,8 @@ obs <- Start(dat = obs_path, expect_equal( as.vector(summary(obs)), - c(1.090, 1.684, 2.426, 2.728, 3.706, 4.970) + c(1.090, 1.684, 2.426, 2.728, 3.706, 4.970), + tolerance = .01 ) #============= 2 =============== -- GitLab