From 9a9a40f6212c983fda50cea08f51a379d1458152 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 15 Jun 2023 15:53:53 +0200 Subject: [PATCH 01/46] fix broken URL --- inst/doc/usecase.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index 47ee89e..80614d0 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -78,7 +78,7 @@ The problem may occur when the dimension number of the splitted selector is more 4. [Use two functions in workflow](inst/doc/usecase/ex2_4_two_func.R) - 5. [RainFARM precipitation downscaling](https://earth.bsc.es/gitlab/es/startR/-/blob/develop-RainFARMCase/inst/doc/usecase/ex2_5_rainFARM.R) + 5. [RainFARM precipitation downscaling](/inst/doc/usecase/ex2_5_rainFARM.R) This example shows how to apply a statistical downscaling function with startR and simultaneously (considering the memory size if unnecessary dimensions are included) saves the data by chunks (e.g., chunking those dimensions which are not required for downscaling) in the esarchive format. It is not recommended to save big outputs. Consider to perform some analysis and then retrieve the result instead of saving data. This is a simplified example of RainFARM for more information visit: https://www.medscope-project.eu/products/data/. Find more explanation of this use case in FAQ [How-to-27](inst/doc/faq.md#27-utilize-chunk-number-in-the-function). -- GitLab From ddf51ba036eec8126b46537eb9010c7c16cdb213 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 15 Jun 2023 23:23:12 +0200 Subject: [PATCH 02/46] return variable metadata when retrieve = F --- R/Start.R | 119 +++- R/zzz.R | 5 +- tests/testthat/test-Start-metadata_dims.R | 542 +++++++++++++++++- .../testthat/test-Start-metadata_reshaping.R | 2 +- 4 files changed, 620 insertions(+), 48 deletions(-) diff --git a/R/Start.R b/R/Start.R index 702b776..92eb16d 100644 --- a/R/Start.R +++ b/R/Start.R @@ -4108,7 +4108,90 @@ Start <- function(..., # dim = indices/selectors, } } } + # Retrieve variable metadata + # Compare array_of_metadata_flags with array_of_files_to_load to know which files to take for metadata + if (!is.null(metadata_dims)) { + array_of_metadata_flags <- array(FALSE, dim = dim(array_of_files_to_load)) + metadata_indices_to_load <- as.list(rep(1, length(dim(array_of_files_to_load)))) + names(metadata_indices_to_load) <- names(dim(array_of_files_to_load)) + metadata_indices_to_load[metadata_dims] <- as.list(rep(TRUE, length(metadata_dims))) + array_of_metadata_flags <- do.call('[<-', c(list(array_of_metadata_flags), metadata_indices_to_load, + list(value = rep(TRUE, prod(dim(array_of_files_to_load)[metadata_dims]))))) + + if (tail(names(dim(array_of_files_to_load)), 1) != found_pattern_dim) { + tmp1 <- s2dv::Reorder(array_of_files_to_load, c(2:length(dim(array_of_files_to_load)), 1)) + tmp2 <- s2dv::Reorder(array_of_metadata_flags, c(2:length(dim(array_of_metadata_flags)), 1)) + files_for_metadata <- tmp1[tmp2] + } else { + files_for_metadata <- array_of_files_to_load[array_of_metadata_flags] + } + # Get variable name + #NOTE: This part probably will fail when one netCDF file has more than one variable. + if (found_pattern_dim %in% metadata_dims) { # metadata_dims has "dat" + if (any(metadata_dims %in% c('var', 'variable'))) { # metadata_dim is c('dat', 'var') + how_many_vars <- length(dat[[1]][['selectors']]$var[[1]]) + } else if (length(metadata_dims) > 1) { # metadata_dims is c('dat', xxx) + how_many_vars <- length(dat[[1]][['selectors']][[metadata_dims[which(found_pattern_dim != metadata_dims)]]][[1]]) + } else { # metadata_dims is 'dat' + how_many_vars <- 1 + } + tmp_var <- matrix(NA, how_many_vars, length(dat)) + for (i_dat in 1:dim(array_of_metadata_flags)[found_pattern_dim]) { + if (any(metadata_dims %in% c('var', 'variable'))) { # metadata_dims has "var" + tmp_var[, i_dat] <- dat[[i_dat]][['selectors']]$var[[1]] + } else if (length(metadata_dims) > 1) { # metadata_dims is c('dat', xxx) + tmp_var[, i_dat] <- rep(dat[[i_dat]][['selectors']]$var[[1]][1], + length(dat[[1]][['selectors']][[metadata_dims[which(found_pattern_dim != metadata_dims)]]][[1]])) + } else { # metadata_dims is 'dat' + tmp_var[, i_dat] <- dat[[i_dat]][['selectors']]$var[[1]][1] + } + } + + # if metadat_dims = c('dat', 'var') and [dat = 2, var = 2], tmp_var has length 4, like c('tas', 'tos', 'tas', 'tos'). + # if metadata_dims = 'dat' and [dat = 2], tmp_var has length 2 like c('tas', 'tos'). + tmp_var <- c(tmp_var) + + } else { # metadata_dims doesn't have "dat" + if (any(metadata_dims %in% c('var', 'variable'))) { # metadata_dims has "var" + tmp_var <- dat[[1]][['selectors']]$var[[1]] + } else { + tmp_var <- rep(dat[[1]][['selectors']]$var[[1]][1], length(dat[[1]][['selectors']][[metadata_dims]][[1]])) + } + # if metadata_dims = 'var' and [var = 2], tmp_var has length 2 like c('tas', 'tos') + # if metadata_dims = 'table' and [table = 2], tmp_var has length 1 like 'tas' + } + + loaded_metadata <- vector('list', length = length(files_for_metadata)) + for (i_file in 1:length(files_for_metadata)) { + #NOTE: Not use ncatt_get() because it only gets the attr shown with ncdump -h + tmp <- file_opener(files_for_metadata[i_file]) + if (!is.null(tmp)) { # if file exists + loaded_metadata[[i_file]][[1]] <- tmp$var[[tmp_var[i_file]]] + names(loaded_metadata[[i_file]]) <- tmp_var[i_file] + file_closer(tmp) + } + } + # Find loaded_metadata_files identical as "retrieve = T" case. If dataset_has_files is F, deduct that dataset from counting + ind_loaded_metadata_has_values <- which(!sapply(loaded_metadata, is.null)) # c(1, 2, 4) + if (!all(dataset_has_files)) { # If dataset_has_files has F, deduct that dataset from counting + if (found_pattern_dim %in% metadata_dims) { # metadata_dims has "dat" + dataset_has_files_expand <- rep(dataset_has_files, each = how_many_vars) + i_ind <- 1 + while (i_ind <= length(ind_loaded_metadata_has_values)) { # 3, 4, 8 + if (ind_loaded_metadata_has_values[i_ind] > i_ind) { + ind_loaded_metadata_has_values[i_ind] <- ind_loaded_metadata_has_values[i_ind] - length(which(!dataset_has_files_expand[1:dataset_has_files_expand[i_ind]])) + } + i_ind <- i_ind + 1 + } + } + } + loaded_metadata_files <- as.character(ind_loaded_metadata_has_values) + loaded_metadata <- loaded_metadata[which(!sapply(loaded_metadata, is.null))] + return_metadata <- create_metadata_list(array_of_metadata_flags, metadata_dims, pattern_dims, + loaded_metadata_files, loaded_metadata, dat_names, + dataset_has_files) + } } # Print the warnings from transform if (!is.null(c(warnings1, warnings2, warnings3))) { @@ -4150,27 +4233,28 @@ Start <- function(..., # dim = indices/selectors, file_selectors[[dat[[i]][['name']]]] <- dat[[i]][['selectors']][which(names(dat[[i]][['selectors']]) %in% found_file_dims[[i]])] } + # Prepare attr Variables + if (all(sapply(return_metadata, is.null))) { + # We don't have metadata of the variable (e.g., tas). The returned metadata list only + # contains those are specified in argument "return_vars". + Variables_list <- c(list(common = picked_common_vars), picked_vars) + .warning(paste0("Metadata cannot be retrieved. The reason may be the ", + "non-existence of the first file. Use parameter 'metadata_dims'", + " to assign to file dimensions along which to return metadata, ", + "or check the existence of the first file.")) + } else { + # Add the metadata of the variable (e.g., tas) into the list of picked_vars or + # picked_common_vars. + Variables_list <- combine_metadata_picked_vars( + return_metadata, picked_vars, picked_common_vars, + metadata_dims, pattern_dims, length(dat)) + } + if (retrieve) { if (!silent) { .message("Successfully retrieved data.") } - if (all(sapply(return_metadata, is.null))) { - # We don't have metadata of the variable (e.g., tas). The returned metadata list only - # contains those are specified in argument "return_vars". - Variables_list <- c(list(common = picked_common_vars), picked_vars) - .warning(paste0("Metadata cannot be retrieved. The reason may be the ", - "non-existence of the first file. Use parameter 'metadata_dims'", - " to assign to file dimensions along which to return metadata, ", - "or check the existence of the first file.")) - } else { - # Add the metadata of the variable (e.g., tas) into the list of picked_vars or - # picked_common_vars. - Variables_list <- combine_metadata_picked_vars( - return_metadata, picked_vars, picked_common_vars, - metadata_dims, pattern_dims, length(dat)) - } - attributes(data_array) <- c(attributes(data_array), list(Variables = Variables_list, Files = array_of_files_to_load, @@ -4200,7 +4284,7 @@ Start <- function(..., # dim = indices/selectors, start_call[['retrieve']] <- TRUE attributes(start_call) <- c(attributes(start_call), list(Dimensions = final_dims_fake, - Variables = c(list(common = picked_common_vars), picked_vars), + Variables = Variables_list, ExpectedFiles = array_of_files_to_load, FileSelectors = file_selectors, PatternDim = found_pattern_dim, @@ -4237,6 +4321,7 @@ Start <- function(..., # dim = indices/selectors, second_round_indices <- work_piece[['second_round_indices']] #print("2") file_to_open <- work_piece[['file_path']] + # Get data and metadata sub_array <- file_data_reader(file_to_open, NULL, work_piece[['file_selectors']], first_round_indices, synonims) diff --git a/R/zzz.R b/R/zzz.R index 22805ff..1e56e29 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1363,7 +1363,10 @@ combine_metadata_picked_vars <- function(return_metadata, picked_vars, picked_co sublist_names <- lapply(return_metadata, names)[[kk]] if (!is.null(sublist_names)) { for (jj in 1:length(sublist_names)) { - picked_vars[[kk]][[sublist_names[jj]]] <- return_metadata[[kk]][[jj]] + if (!is.null(return_metadata[[kk]][[jj]])) { + picked_vars[[kk]] <- c(picked_vars[[kk]], list(return_metadata[[kk]][[jj]])) + names(picked_vars[[kk]])[length(picked_vars[[kk]])] <- names(return_metadata[[kk]][jj]) + } } } } diff --git a/tests/testthat/test-Start-metadata_dims.R b/tests/testthat/test-Start-metadata_dims.R index ce30eec..4251c71 100644 --- a/tests/testthat/test-Start-metadata_dims.R +++ b/tests/testthat/test-Start-metadata_dims.R @@ -19,10 +19,6 @@ suppressWarnings( retrieve = T ) ) - expect_equal( - length(attr(data, 'Variables')), - 2 - ) expect_equal( names(attr(data, 'Variables')), c("common", "system5_m1") @@ -45,6 +41,41 @@ suppressWarnings( tolerance = 0.0001 ) + +suppressWarnings( + dataF <- Start(dat = list(list(name = 'system5_m1', path = repos)), + var = 'tas', + sdate = '20170101', + ensemble = indices(1), + time = indices(1), + lat = indices(1:2), + lon = indices(2:3), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + lon = 'dat', + lat = 'dat'), + metadata_dims = 'dat', # it can be omitted since it is automatically specified as 'dat' + retrieve = F + ) +) + expect_equal( + names(attr(dataF, 'Variables')), + c("common", "system5_m1") + ) + expect_equal( + names(attr(dataF, 'Variables')$common), + c('time', 'tas') + ) + expect_equal( + names(attr(dataF, 'Variables')$system5_m1), + c("lon", "lat") + ) + expect_equal( + length(attr(dataF, 'Variables')$common$tas), + 22 + ) + }) @@ -69,10 +100,6 @@ suppressWarnings( retrieve = T ) ) - expect_equal( - length(attr(data, 'Variables')), - 3 - ) expect_equal( names(attr(data, 'Variables')), c("common", "system4_m1", "system5_m1") @@ -107,6 +134,52 @@ suppressWarnings( c(247.2570, 248.5016), tolerance = 0.0001 ) + +suppressWarnings( + dataF <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = 'tas', + sdate = '20170101', + ensemble = indices(1), + time = indices(1), + lat = indices(1:2), + lon = indices(2:3), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + lon = 'dat', + lat = 'dat'), + metadata_dims = 'dat', # it can be omitted since it is automatically specified as 'dat' + retrieve = F + ) +) + + expect_equal( + names(attr(dataF, 'Variables')), + c("common", "system4_m1", "system5_m1") + ) + expect_equal( + names(attr(dataF, 'Variables')$common), + 'time' + ) + expect_equal( + names(attr(dataF, 'Variables')$system4_m1), + c("lon", "lat", "tas") + ) + expect_equal( + names(attr(dataF, 'Variables')$system5_m1), + c("lon", "lat", "tas") + ) + expect_equal( + length(attr(dataF, 'Variables')$system5_m1$tas), + 22 + ) + expect_equal( + length(attr(dataF, 'Variables')$system4_m1$tas), + 22 + ) + + }) test_that("3. One data set, two vars", { @@ -127,10 +200,6 @@ suppressWarnings( retrieve = TRUE ) ) - expect_equal( - length(attr(data, 'Variables')), - 2 - ) expect_equal( names(attr(data, 'Variables')), c("common", "dat1") @@ -161,6 +230,43 @@ suppressWarnings( c(250.00110, 25.04345), tolerance = 0.0001 ) + + +suppressWarnings( + dataF <- Start(dat = repos, + var = var, + time = indices(1), + lat = indices(9:10), + lon = indices(10:11), + return_vars = list(lat = NULL, lon = NULL), + metadata_dims = 'var', + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + retrieve = F + ) +) + + expect_equal( + names(attr(dataF, 'Variables')), + c("common", "dat1") + ) + expect_equal( + names(attr(dataF, 'Variables')$common), + c('lat', 'lon', 'tas', 'clt') + ) + expect_equal( + is.null(attr(dataF, 'Variables')$dat1), + TRUE + ) + expect_equal( + length(attr(dataF, 'Variables')$common$tas), + 22 + ) + expect_equal( + length(attr(dataF, 'Variables')$common$clt), + 22 + ) + }) test_that("4. Two data sets, two vars", { @@ -184,10 +290,6 @@ suppressWarnings( retrieve = T ) ) - expect_equal( - length(attr(data, 'Variables')), - 3 - ) expect_equal( names(attr(data, 'Variables')), c("common", "system4_m1", "system5_m1") @@ -213,6 +315,14 @@ suppressWarnings( 11 ) expect_equal( + attr(data, 'Variables')$system4_m1$tas$dim[[1]][1:2], + list(name = 'longitude', len = 512) + ) + expect_equal( + attr(data, 'Variables')$system5_m1$tas$dim[[1]][1:2], + list(name = 'longitude', len = 1296) + ) + expect_equal( data[1, , 1, 1, 1, 2, 2], c(247.227219, 6.370782), tolerance = 0.0001 @@ -223,6 +333,57 @@ suppressWarnings( tolerance = 0.0001 ) +suppressWarnings( + dataF <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = c('tas', 'sfcWind'), + sdate = '20170101', + ensemble = indices(1), + time = indices(1), + lat = indices(1:2), + lon = indices(1:2), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + lon = 'dat', + lat = 'dat'), + metadata_dims = 'dat', + retrieve = F + ) +) + expect_equal( + names(attr(dataF, 'Variables')), + c("common", "system4_m1", "system5_m1") + ) + expect_equal( + names(attr(dataF, 'Variables')$common), + 'time' + ) + expect_equal( + names(attr(dataF, 'Variables')$system4_m1), + c("lon", "lat", "tas") + ) + expect_equal( + names(attr(dataF, 'Variables')$system5_m1), + c("lon", "lat", "tas") + ) + expect_equal( + length(attr(dataF, 'Variables')$system5_m1$tas), + 22 + ) + expect_equal( + length(attr(dataF, 'Variables')$system4_m1$tas), + 22 + ) + expect_equal( + attr(dataF, 'Variables')$system4_m1$tas$dim[[1]][1:2], + list(name = 'longitude', len = 512) + ) + expect_equal( + attr(dataF, 'Variables')$system5_m1$tas$dim[[1]][1:2], + list(name = 'longitude', len = 1296) + ) + #------------------------------------------------------------- suppressWarnings( data <- Start(dat = list(list(name = 'system4_m1', path = repos2), @@ -243,10 +404,6 @@ suppressWarnings( ) ) - expect_equal( - length(attr(data, 'Variables')), - 3 - ) expect_equal( names(attr(data, 'Variables')), c("common", "system4_m1", "system5_m1") @@ -289,6 +446,83 @@ suppressWarnings( c(248.781540, 5.794801), tolerance = 0.0001 ) + +suppressWarnings( + dataF <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = c('tas', 'sfcWind'), + sdate = '20170101', + ensemble = indices(1), + time = indices(1), + lat = indices(1:2), + lon = indices(1:2), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + lon = 'dat', + lat = 'dat'), + metadata_dims = c('dat', 'var'), + retrieve = F + ) +) + + expect_equal( + names(attr(dataF, 'Variables')), + c("common", "system4_m1", "system5_m1") + ) + expect_equal( + names(attr(dataF, 'Variables')$common), + 'time' + ) + expect_equal( + names(attr(dataF, 'Variables')$system4_m1), + c("lon", "lat", "tas", "sfcWind") + ) + expect_equal( + names(attr(dataF, 'Variables')$system5_m1), + c("lon", "lat", "tas", "sfcWind") + ) + expect_equal( + length(attr(dataF, 'Variables')$system5_m1$tas), + 22 + ) + expect_equal( + length(attr(dataF, 'Variables')$system5_m1$sfcWind), + 22 + ) + expect_equal( + length(attr(dataF, 'Variables')$system4_m1$tas), + 22 + ) + expect_equal( + length(attr(dataF, 'Variables')$system4_m1$sfcWind), + 22 + ) + expect_equal( + attr(dataF, 'Variables')$system4_m1$tas$dim[[1]][1:2], + list(name = 'longitude', len = 512) + ) + expect_equal( + attr(dataF, 'Variables')$system5_m1$tas$dim[[1]][1:2], + list(name = 'longitude', len = 1296) + ) + expect_equal( + attr(dataF, 'Variables')$system4_m1$sfcWind$dim[[1]][1:2], + list(name = 'longitude', len = 512) + ) + expect_equal( + attr(dataF, 'Variables')$system5_m1$sfcWind$dim[[1]][1:2], + list(name = 'lon', len = 1296) + ) + expect_equal( + attr(dataF, 'Variables')$system4_m1$tas$name, + 'tas' + ) + expect_equal( + attr(dataF, 'Variables')$system5_m1$sfcWind$name, + 'sfcWind' + ) + }) test_that("5. Specify metadata_dims with another file dimension", { @@ -313,10 +547,6 @@ suppressWarnings( ) ) - expect_equal( - length(attr(data, 'Variables')), - 3 - ) expect_equal( names(attr(data, 'Variables')), c("common", "system4_m1", "system5_m1") @@ -337,7 +567,167 @@ suppressWarnings( length(attr(data, 'Variables')$common$tas), 12 ) + expect_equal( + attr(data, 'Variables')$common[[3]]$dim[[4]]$units, + "hours since 2016-01-01 00:00:00" + ) + +suppressWarnings( + dataF <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = c('tas', 'sfcWind'), + sdate = c('20150101', '20160101', '20170101'), + ensemble = indices(1), + time = indices(1), + lat = indices(1:10), + lon = indices(1:10), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + lon = 'dat', + lat = 'dat'), + metadata_dims = 'sdate', + retrieve = F + ) +) + + expect_equal( + names(attr(dataF, 'Variables')), + c("common", "system4_m1", "system5_m1") + ) + expect_equal( + names(attr(dataF, 'Variables')$common), + c('time', 'tas', 'tas', 'tas') + ) + expect_equal( + names(attr(dataF, 'Variables')$system4_m1), + c("lon", "lat") + ) + expect_equal( + names(attr(dataF, 'Variables')$system5_m1), + c("lon", "lat") + ) + expect_equal( + length(attr(dataF, 'Variables')$common[[2]]), + 22 + ) + expect_equal( + attr(data, 'Variables')$common$time, + attr(dataF, 'Variables')$common$time + ) + expect_equal( + attr(dataF, 'Variables')$common[[3]]$dim[[4]]$units, + "hours since 2016-01-01 00:00:00" + ) + +#------------------------------------------------------------------ +suppressWarnings( + data <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = c('tas', 'sfcWind'), + sdate = c('20150101', '20160101', '20170101'), + ensemble = indices(1), + time = indices(1), + lat = indices(1:10), + lon = indices(1:10), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + lon = 'dat', + lat = 'dat'), + metadata_dims = c('dat', 'sdate'), + retrieve = T + ) +) + + expect_equal( + names(attr(data, 'Variables')), + c("common", "system4_m1", "system5_m1") + ) + expect_equal( + names(attr(data, 'Variables')$common), + c('time') + ) + expect_equal( + names(attr(data, 'Variables')$system4_m1), + c("lon", "lat", "tas", "tas", "tas") + ) + expect_equal( + names(attr(data, 'Variables')$system5_m1), + c("lon", "lat", "tas", "tas", "tas") + ) + expect_equal( + length(attr(data, 'Variables')$system4_m1[[3]]), + 12 + ) + expect_equal( + length(attr(data, 'Variables')$system4_m1[[4]]), + 12 + ) + expect_equal( + length(attr(data, 'Variables')$system5_m1[[5]]), + 12 + ) + expect_equal( + attr(data, 'Variables')$system4_m1[[3]]$dim[[4]]$units, + "hours since 2015-01-01 00:00:00" + ) + expect_equal( + attr(data, 'Variables')$system5_m1[[4]]$dim[[4]]$units, + "hours since 2016-01-01 00:00:00" + ) +suppressWarnings( + dataF <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = c('tas', 'sfcWind'), + sdate = c('20150101', '20160101', '20170101'), + ensemble = indices(1), + time = indices(1), + lat = indices(1:10), + lon = indices(1:10), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + lon = 'dat', + lat = 'dat'), + metadata_dims = c('dat', 'sdate'), + retrieve = F + ) +) + + expect_equal( + names(attr(dataF, 'Variables')), + c("common", "system4_m1", "system5_m1") + ) + expect_equal( + names(attr(dataF, 'Variables')$common), + c('time') + ) + expect_equal( + names(attr(dataF, 'Variables')$system4_m1), + c("lon", "lat", "tas", "tas", "tas") + ) + expect_equal( + names(attr(dataF, 'Variables')$system5_m1), + c("lon", "lat", "tas", "tas", "tas") + ) + expect_equal( + length(attr(dataF, 'Variables')$system4_m1[[3]]), + 22 + ) + expect_equal( + attr(data, 'Variables')$common$time, + attr(dataF, 'Variables')$common$time + ) + expect_equal( + attr(dataF, 'Variables')$system4_m1[[3]]$dim[[4]]$units, + "hours since 2015-01-01 00:00:00" + ) + expect_equal( + attr(dataF, 'Variables')$system5_m1[[4]]$dim[[4]]$units, + "hours since 2016-01-01 00:00:00" + ) }) @@ -374,6 +764,18 @@ data <- Start(repos = mask_path, length(attr(data, 'Variables')$common$nav_lat), 8 ) +#NOTE: The following code doesn't work, and since this netCDF file doesn't follow the convention (one var per file), we leave this development to the future. +#suppressWarnings( +#dataF <- Start(repos = mask_path, +# var = c('nav_lon', 'nav_lat'), +# t = 'first', +# z = 'first', +# x = 'all', +# y = 'all', +# return_vars = list(var_names = NULL), +# var_var = 'var_names', +# retrieve = F) +#) }) @@ -396,7 +798,7 @@ suppressWarnings( return_vars = list(time = 'sdate', lon = 'dat', lat = 'dat'), - metadata_dims = 'dat', # it can be omitted since it is automatically specified as 'dat' + metadata_dims = 'dat', retrieve = T ) ) @@ -409,6 +811,10 @@ suppressWarnings( list(lon = NULL, lat = NULL) ) expect_equal( + names(attr(data, "Variables")$system5_m1), + c('lon', 'lat', 'tas') + ) + expect_equal( length(attr(data, "Variables")$system5_m1$lon), 1296 ) @@ -421,6 +827,49 @@ suppressWarnings( array(c(NA, "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20170101.nc"), dim = c(dat = 2, var = 1, sdate = 1)) ) + +suppressWarnings( + dataF <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = var, + sdate = '20170101', + ensemble = indices(1), + time = indices(1), + lat = 'all', + lon = 'all', + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + lon = 'dat', + lat = 'dat'), + metadata_dims = 'dat', + retrieve = F + ) +) + + expect_equal( + attr(dataF, "Variables")$system4_m1, + list(lon = NULL, lat = NULL) + ) + expect_equal( + length(attr(dataF, "Variables")$system5_m1$lon), + 1296 + ) + expect_equal( + names(attr(dataF, "Variables")$system5_m1), + c('lon', 'lat', 'tas') + ) + expect_equal( + length(attr(dataF, "Variables")$system5_m1$tas), + 22 + ) + expect_equal( + attr(dataF, 'ExpectedFiles'), + array(c("/esarchive/exp/ecmwf/system4_m1/monthly_mean/tas_f2h/tas_20170101.nc", "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20170101.nc"), + dim = c(dat = 2, var = 1, sdate = 1)) + ) + + }) test_that("8. Two data sets, both have files but the first file is missing", { @@ -457,10 +906,6 @@ data <- Start(dataset = path_list, 5500 ) expect_equal( - length(attr(data, "Variables")$MPI_ESM), - 3 - ) - expect_equal( length(attr(data, "Variables")$MPI_ESM$lon), 30 ) @@ -489,4 +934,43 @@ data <- Start(dataset = path_list, ) +suppressWarnings( +dataF <- Start(dataset = path_list, + var = 'tasmin', + member = list(c('r1i1p1f1', 'r2i1p1f2')), + sdate = paste0(2018), + chunk = list(c('20181101-20281231', '20181101-20181230')), + time = indices(1), #'all', + lat = values(list(0, 14)), + lon = values(list(0, 28)), + synonims = list(time = c('fmonth','time'), + lon = c('lon', 'longitude'), + lat = c('lat', 'latitude')), + return_vars = list(lat = 'dataset', lon = 'dataset'), + lat_reorder = Sort(), + num_procs = 1, + retrieve = F) +) + + expect_equal( + length(attr(dataF, "Variables")$MPI_ESM$lon), + 30 + ) + expect_equal( + names(attr(dataF, "Variables")$MPI_ESM), + c('lat', 'lon', 'tasmin') + ) + expect_equal( + length(attr(dataF, "Variables")$MPI_ESM$tasmin), + 22 + ) + expect_equal( + names(attr(dataF, "Variables")$HadGEM3), + c('lat', 'lon') + ) + expect_equal( + length(attr(dataF, "Variables")$HadGEM3$lon), + 34 + ) + }) diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index ac332cd..7e9c280 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -651,7 +651,7 @@ data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', ) expect_equal( names(attributes(data)$Variables$common), -NULL +"tas" ) }) -- GitLab From b176daf773a33ff8ef1e775fa421214c34b6ee7a Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 4 Jul 2023 18:00:27 +0200 Subject: [PATCH 03/46] Create usecase --- inst/doc/usecase.md | 3 + inst/doc/usecase/ex2_14_margin_dim_indices.R | 86 ++++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 inst/doc/usecase/ex2_14_margin_dim_indices.R diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index 80614d0..592a620 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -106,6 +106,9 @@ Find more explanation of this use case in FAQ [How-to-27](inst/doc/faq.md#27-uti This script shows how to load irregular grid data by Start(), then regrid it by s2dv::CDORemap in the workflow. It is a solution before Start() can deal with irregular regridding directly. + 14. [Get margin dimension indices in startR workflow](inst/doc/usecase/ex2_14_margin_dim_indices.R) + This usecase shows you how to know the margin dimension indices in the self-defined function. + 3. **Verification workflows** 1. [Weekly ECV Subseasonal Hindcast Verification](inst/doc/usecase/ex3_1_SubseasonalECVHindcast.md) This is a practical case to compute monthly skill scores for the ECMWF/S2S-ENSForhc diff --git a/inst/doc/usecase/ex2_14_margin_dim_indices.R b/inst/doc/usecase/ex2_14_margin_dim_indices.R new file mode 100644 index 0000000..4b67196 --- /dev/null +++ b/inst/doc/usecase/ex2_14_margin_dim_indices.R @@ -0,0 +1,86 @@ +# Author: An-Chi Ho +# Date: 4th July 2023 +# ------------------------------------------------------------------ +# This usecase shows you how to know the margin dimension indices in the self-defined function. +# In this example, we chunk the data along dimensions 'var' and 'sdate'. We can get the indices of each chunck, and when dimension 'var' is 2 (i.e., 'tas'), we convert unit from K to degC. +#------------------------------------------------------------------ + + library(startR) + + data <- Start(dat = "/esarchive/exp/ecmwf/system5c3s/monthly_mean/$var$_f6h/$var$_$sdate$.nc", + var = c('psl', 'tas'), + sdate = paste0(2011:2018, '0501'), + ensemble = 'all', + time = indices(1:3), + lat = values(list(20, 80)), lat_reorder = Sort(), + lon = values(list(-80, 40)), lon_reorder = CircularSort(-180, 180), + synonims = list(lat = c('lat', 'latitude'), lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', lon = NULL, lat = NULL), + retrieve = FALSE) + + + #NOTE: 'chunk_indices', 'chunks', and 'start_call' are the variables from startR:::ByChunks + func <- function(x) { + # x: [lat, lon] + + #----- Get margin dim indices --------- + # code modified from startR Util.R .chunk() + dim_indices <- lapply(names(chunks), + function(x) { + n_indices <- attr(start_call, 'Dimensions')[[x]] + chunk_sizes <- rep(floor(n_indices / chunks[[x]]), chunks[[x]]) + chunks_to_extend <- n_indices - chunk_sizes[1] * chunks[[x]] + if (chunks_to_extend > 0) { + chunk_sizes[1:chunks_to_extend] <- chunk_sizes[1:chunks_to_extend] + 1 + } + chunk_size <- chunk_sizes[chunk_indices[x]] + offset <- 0 + if (chunk_indices[x] > 1) { + offset <- sum(chunk_sizes[1:(chunk_indices[x] - 1)]) + } + 1:chunk_sizes[chunk_indices[x]] + offset + }) + names(dim_indices) <- names(chunks) + + # The first chunk [var = 1, sdate = 1] + #str(dim_indices) + #List of 5 + # $ dat : num 1 + # $ var : num 1 + # $ sdate : num [1:4] 1 2 3 4 + # $ ensemble: num [1:25] 1 2 3 4 5 6 7 8 9 10 ... + # $ time : num [1:3] 1 2 3 + + # The fourth chunk [var = 2, sdate = 2] + #str(dim_indices) + #List of 5 + # $ dat : num 1 + # $ var : num 2 + # $ sdate : num [1:4] 5 6 7 8 + # $ ensemble: num [1:25] 1 2 3 4 5 6 7 8 9 10 ... + # $ time : num [1:3] 1 2 3 + + if (dim_indices$var == 2) { # tas + x <- x - 273.15 + } + + res <- ClimProjDiags::WeightedMean(x, lat = c(attr(x, 'Variables')$common$lat), lon = c(attr(x, 'Variables')$common$lon)) + + return(res) + } + + + step <- Step(func, target_dims = c('lat', 'lon'), output_dims = NULL, + use_attributes = list("Variables")) + wf <- AddStep(data, step) + + res <- Compute(wf, chunks = list(var = 2, sdate = 2)) + + +# Check result: +summary(res$output1[1, 1, , , ]) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 101373 101525 101573 101570 101615 101749 +summary(res$output1[1, 2, , , ]) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 13.56 14.27 17.24 16.87 19.06 19.78 -- GitLab From 48202a44886b5366de7b48e8466c6329f99d569c Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 4 Jul 2023 18:03:12 +0200 Subject: [PATCH 04/46] Correct format --- inst/doc/usecase.md | 4 ++-- inst/doc/usecase/ex2_14_margin_dim_indices.R | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index 592a620..47f807e 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -106,11 +106,11 @@ Find more explanation of this use case in FAQ [How-to-27](inst/doc/faq.md#27-uti This script shows how to load irregular grid data by Start(), then regrid it by s2dv::CDORemap in the workflow. It is a solution before Start() can deal with irregular regridding directly. - 14. [Get margin dimension indices in startR workflow](inst/doc/usecase/ex2_14_margin_dim_indices.R) + 14. [Get margin dimension indices in startR workflow](inst/doc/usecase/ex2_14_margin_dim_indices.R) This usecase shows you how to know the margin dimension indices in the self-defined function. 3. **Verification workflows** - 1. [Weekly ECV Subseasonal Hindcast Verification](inst/doc/usecase/ex3_1_SubseasonalECVHindcast.md) + 1. [Weekly ECV Subseasonal Hindcast Verification](inst/doc/usecase/ex3_1_SubseasonalECVHindcast.md) This is a practical case to compute monthly skill scores for the ECMWF/S2S-ENSForhc subseasonal hindcast using as a reference dataset ERA5. The ECV is air temperature at surface level (tas). Note that since this case is practical, it is heavy and takes much time to finish running. diff --git a/inst/doc/usecase/ex2_14_margin_dim_indices.R b/inst/doc/usecase/ex2_14_margin_dim_indices.R index 4b67196..747a4e3 100644 --- a/inst/doc/usecase/ex2_14_margin_dim_indices.R +++ b/inst/doc/usecase/ex2_14_margin_dim_indices.R @@ -2,7 +2,8 @@ # Date: 4th July 2023 # ------------------------------------------------------------------ # This usecase shows you how to know the margin dimension indices in the self-defined function. -# In this example, we chunk the data along dimensions 'var' and 'sdate'. We can get the indices of each chunck, and when dimension 'var' is 2 (i.e., 'tas'), we convert unit from K to degC. +# In this example, we chunk the data along dimensions 'var' and 'sdate'. We can get the indices +# of each chunck, and when dimension 'var' is 2 (i.e., 'tas'), we convert unit from K to degC. #------------------------------------------------------------------ library(startR) -- GitLab From a973596516a6f079dd43037d2fef29a97d8b8f45 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 12 Jul 2023 11:25:11 +0200 Subject: [PATCH 05/46] Refine usecase; provide another easy method to get chunk information --- inst/doc/faq.md | 9 +++----- inst/doc/usecase/ex2_14_margin_dim_indices.R | 17 +++++++++++++++ inst/doc/usecase/ex2_5_rainFARM.R | 22 ++++++++++++++++++-- 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/inst/doc/faq.md b/inst/doc/faq.md index 1508742..ffe91a5 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -1001,13 +1001,10 @@ See [How-to-21](#21-retrieve-the-complete-data-when-the-dimension-length-varies- In the self-defined function in startR workflow, the dimensions required for the computations are used as target dimensions, and the rest can be used to chunk the data in pieces. There is one situation that some information of one dimension is needed in the function but it is not depended by the computation. In this case, we may be able to chunk through this dimension while using it in the function still. It is a saver if you have a complex case with no margin dimension left (see [How-to-25](#25-what-to-do-if-your-function-has-too-many-target-dimensions).) -You just need to define a parameter in your function 'nchunks = chunk_indices' and use it in the function. -The use case [RainFARM precipitation downscaling](https://earth.bsc.es/gitlab/es/startR/-/blob/develop-RainFARMCase/inst/doc/usecase/ex2_5_rainFARM.R) demonstrates an example that the start date dimension is used as chunking dimension, -but we use its chunk number to know the start date value of each chunk. -The first part of the function performs downscaling method, which requres longitude and latitude dimensions, so these two dimensions must be the target dimensions in the workflow. -After that, the results are saved as netCDF file following esarchive convention. We need start date value here to decide the file name. -As you can see, the sdate dimension is not required for the computation, so it is not necessary to be the target dimension. We can just use 'chunk_indices' to get the chunk number therefore get the corresponding start date value for the file name. +We have two examples: (1) [ex2_5_RainFARM precipitation downscaling](inst/doc/usecase/ex2_5_rainFARM.R) +shows how to get start date for each chunk using chunk number; (2) [ex2_14](inst/doc/usecase/ex2_14_margin_dim_indices.R) shows how to distinguish the variable in each chunk since "variable" is one of the chunking dimensions +(__NOTE: In this case, it is easier to simply use attributes to find which variable it is. Check use case for more details.__) There are many other possible applications of this parameter. Please share with us other uses cases you may create. diff --git a/inst/doc/usecase/ex2_14_margin_dim_indices.R b/inst/doc/usecase/ex2_14_margin_dim_indices.R index 747a4e3..4ce1caf 100644 --- a/inst/doc/usecase/ex2_14_margin_dim_indices.R +++ b/inst/doc/usecase/ex2_14_margin_dim_indices.R @@ -4,6 +4,10 @@ # This usecase shows you how to know the margin dimension indices in the self-defined function. # In this example, we chunk the data along dimensions 'var' and 'sdate'. We can get the indices # of each chunck, and when dimension 'var' is 2 (i.e., 'tas'), we convert unit from K to degC. +# +# [UPDATE_12072023] This case can be much easier, simply use attributes to +# identify which variable it is in each chunk because attributes are also +# chunked along with data. #------------------------------------------------------------------ library(startR) @@ -19,7 +23,19 @@ return_vars = list(time = 'sdate', lon = NULL, lat = NULL), retrieve = FALSE) +#---------------- METHOD 1 (RECOMMENDED) ----------------- + func <- function(x) { + # x: [lat, lon] + attrs_names <- names(attr(x, 'Variables')$common) + if ('tas' %in% attrs_names) x <- x - 273.15 + + res <- ClimProjDiags::WeightedMean(x, lat = c(attr(x, 'Variables')$common$lat), lon = c(attr(x, 'Variables')$common$lon)) + + return(res) + } + +#---------------- METHOD 2 ----------------- #NOTE: 'chunk_indices', 'chunks', and 'start_call' are the variables from startR:::ByChunks func <- function(x) { # x: [lat, lon] @@ -70,6 +86,7 @@ return(res) } +#-------------------------------------------------------- step <- Step(func, target_dims = c('lat', 'lon'), output_dims = NULL, use_attributes = list("Variables")) diff --git a/inst/doc/usecase/ex2_5_rainFARM.R b/inst/doc/usecase/ex2_5_rainFARM.R index 8d315a0..8b58901 100644 --- a/inst/doc/usecase/ex2_5_rainFARM.R +++ b/inst/doc/usecase/ex2_5_rainFARM.R @@ -1,6 +1,19 @@ # ------------------------------------------------------------------------------ # Downscaling precipitation using RainFARM # ------------------------------------------------------------------------------ +# This usecase demonstrates that the start date dimension is used as chunking +# dimension, but the chunk number is used to know the start date value of each +# chunk. +# The first part of the function performs downscaling method, which requires +# longitude and latitude dimensions, so these two dimensions must be the target +# dimensions in the workflow. +# After that, the results are saved as netCDF file following esarchive convention. +# We need start date value here to decide the file name. +# As you can see, the sdate dimension is not required for the computation, so it +# is not necessary to be the target dimension. We can just use 'chunk_indices' to +# get the chunk number therefore get the corresponding start date value for the +# file name. +# ------------------------------------------------------------------------------ # Note 1: The data could be first transformed with QuantileMapping from CSTools # Note 2: Extra parameters could be used to downscale the data: weights, slope... # See more information in: @@ -74,15 +87,20 @@ step <- Step(Chunk_RF, use_libraries = c('CSTools', 'ncdf4'), use_attributes = list(data = "Variables")) -workflow <- AddStep(data, step, nf = 4, - destination = "/esarchive/scratch/nperez/git/Flor/cstools/test_RF_start/", +workflow <- AddStep(list(data = data), step, nf = 4, + destination = "./test_RF_start/", startdates = as.Date(sdates, format = "%Y%m%d")) + +#========= OPTION 1: Compute locally ============ res <- Compute(workflow, chunks = list(sdate = 4), threads_load = 2, threads_compute = 4) + +#========= OPTION 2: Compute ON NORD3 ============ + #-----------modify according to your personal info--------- queue_host = 'nord3' # your own host name for nord3v2 temp_dir = '/gpfs/scratch/bsc32/bsc32339/startR_hpc/' -- GitLab From 791377e8508f48134032b57a0cde9a4ccb0c43cd Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 27 Jul 2023 17:19:25 +0200 Subject: [PATCH 06/46] Autosubmit as workflow manager --- R/ByChunks.R | 20 +- R/ByChunks_autosubmit.R | 741 ++++++++++++++++++ R/Collect.R | 72 +- R/Compute.R | 49 +- R/Utils.R | 141 ++++ inst/chunking/Autosubmit/autosubmit.yml | 16 + inst/chunking/Autosubmit/expdef.yml | 33 + inst/chunking/Autosubmit/jobs.yml | 9 + .../Autosubmit/load_process_save_chunk_AS.R | 132 ++++ inst/chunking/Autosubmit/platforms.yml | 14 + inst/chunking/Autosubmit/startR_autosubmit.sh | 25 + 11 files changed, 1231 insertions(+), 21 deletions(-) create mode 100644 R/ByChunks_autosubmit.R create mode 100644 inst/chunking/Autosubmit/autosubmit.yml create mode 100644 inst/chunking/Autosubmit/expdef.yml create mode 100644 inst/chunking/Autosubmit/jobs.yml create mode 100644 inst/chunking/Autosubmit/load_process_save_chunk_AS.R create mode 100644 inst/chunking/Autosubmit/platforms.yml create mode 100644 inst/chunking/Autosubmit/startR_autosubmit.sh diff --git a/R/ByChunks.R b/R/ByChunks.R index 37a554c..547b6ea 100644 --- a/R/ByChunks.R +++ b/R/ByChunks.R @@ -181,15 +181,23 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', if (is.null(names(cluster))) { stop("Parameter 'cluster' must be a named list.") } - if (any(!(names(cluster) %in% c('queue_host', 'queue_type', 'data_dir', - 'temp_dir', 'lib_dir', 'init_commands', - 'r_module', 'CDO_module', - 'ecflow_module', 'node_memory', - 'cores_per_job', 'job_wallclock', 'max_jobs', + if (any(!(names(cluster) %in% c('queue_host', 'queue_type', 'data_dir', + 'temp_dir', 'lib_dir', 'init_commands', + 'r_module', 'CDO_module', 'autosubmit_module', + 'ecflow_module', 'node_memory', + 'cores_per_job', 'job_wallclock', 'max_jobs', 'extra_queue_params', 'bidirectional', - 'polling_period', 'special_setup')))) { + 'polling_period', 'special_setup', 'expid', 'hpc_user')))) { + stop("Found invalid component names in parameter 'cluster'.") } + # Remove ecFlow components + redundant_components <- c('autosubmit_module', 'expid', 'hpc_user') + if (any(redundant_components %in% names(cluster))) { + tmp <- redundant_components[which(redundant_components %in% names(cluster))] + warning("Cluster component ", paste(tmp, collapse = ','), " not used when ecFlow is the workflow manager.") + cluster[[tmp]] <- NULL + } default_cluster[names(cluster)] <- cluster } localhost_name <- NULL diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R new file mode 100644 index 0000000..a246676 --- /dev/null +++ b/R/ByChunks_autosubmit.R @@ -0,0 +1,741 @@ +#'Execute the operation by chunks +#' +#'This is an internal function used in Compute(), executing the operation by +#'the chunks specified in Compute(). It also returns the configuration details +#'and profiling information. +#' +#'@param step_fun A function with the class 'startR_step_fun' containing the +#' details of operation. +#'@param cube_headers A list with the class 'startR_cube' returned by Start(). +#' It contains the details of data to be operated. +#'@param \dots Additional parameters for the inputs of 'step_fun'. +#'@param chunks A named list of dimensions which to split the data along and +#' the number of chunks to make for each. The chunked dimension can only be +#' those not required as the target dimension in function Step(). The default +#' value is 'auto', which lists all the non-target dimensions and each one has +#' one chunk. +#'@param threads_load An integer indicating the number of execution threads to +#' use for the data retrieval stage. The default value is 1. +#'@param threads_compute An integer indicating the number of execution threads +#' to use for the computation. The default value is 1. +#'@param cluster A list of components that define the configuration of the +#' machine to be run on. The comoponents vary from the different machines. +#' Check \href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab} for more +#' details and examples. +#' Only needed when the computation is not run locally. The default value is +#' NULL. +#'@param ecflow_suite_dir A character string indicating the path to a folder in +#' the local workstation where to store temporary files generated for the +#' automatic management of the workflow. Only needed when the execution is run +#' remotely. The default value is NULL. +#'@param ecflow_server A named vector indicating the host and port of the +#' EC-Flow server. The vector form should be +#' \code{c(host = 'hostname', port = port_number)}. Only needed when the +#' execution is run remotely. The default value is NULL. +#'@param silent A logical value deciding whether to print the computation +#' progress (FALSE) on the R session or not (TRUE). It only works when the +#' execution runs locally or the parameter 'wait' is TRUE. The default value +#' is FALSE. +#'@param debug A logical value deciding whether to return detailed messages on +#' the progress and operations in a Compute() call (TRUE) or not (FALSE). +#' Automatically changed to FALSE if parameter 'silent' is TRUE. The default +#' value is FALSE. +#'@param wait A logical value deciding whether the R session waits for the +#' Compute() call to finish (TRUE) or not (FALSE). If FALSE, it will return an +#' object with all the information of the startR execution that can be stored +#' in your disk. After that, the R session can be closed and the results can +#' be collected later with the Collect() function. The default value is TRUE. +#' +#'@return A list of data arrays for the output returned by the last step in the +#' specified workflow. The configuration details and profiling information are +#' attached as attributes to the returned list of arrays. +#' +#'@examples +#' # ByChunks() is internally used in Compute(), not intended to be used by +#' # users. The example just illustrates the inputs of ByChunks(). +#' # data_path <- system.file('extdata', package = 'startR') +#' # path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') +#' # sdates <- c('200011', '200012') +#' # data <- Start(dat = list(list(path = path_obs)), +#' # var = 'tos', +#' # sdate = sdates, +#' # time = 'all', +#' # latitude = 'all', +#' # longitude = 'all', +#' # return_vars = list(latitude = 'dat', +#' # longitude = 'dat', +#' # time = 'sdate'), +#' # retrieve = FALSE) +#' # fun <- function(x) { +#' # lat = attributes(x)$Variables$dat1$latitude +#' # weight = sqrt(cos(lat * pi / 180)) +#' # corrected = Apply(list(x), target_dims = "latitude", +#' # fun = function(x) {x * weight}) +#' # } +#' # step <- Step(fun = fun, +#' # target_dims = 'latitude', +#' # output_dims = 'latitude', +#' # use_libraries = c('multiApply'), +#' # use_attributes = list(data = "Variables")) +#' #ByChunks(step, data) +#' +#'@import multiApply +#'@importFrom methods is +#'@noRd +ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', + threads_load = 2, threads_compute = 1, + cluster = NULL, + autosubmit_suite_dir = NULL, autosubmit_server = NULL, + silent = FALSE, debug = FALSE, wait = TRUE) { + + # Build object to store profiling timings + t_begin_total <- Sys.time() + t_begin_bychunks_setup <- t_begin_total + timings <- list(nchunks = NULL, + concurrent_chunks = NULL, + cores_per_job = NULL, + threads_load = NULL, + threads_compute = NULL, + bychunks_setup = NULL, + transfer = NULL, + queue = NULL, + job_setup = NULL, + load = NULL, + compute = NULL, + transfer_back = NULL, + merge = NULL, + total = NULL) + + MergeArrays <- .MergeArrays + + # Check input headers + if (is(cube_headers, 'startR_cube')) { + cube_headers <- list(cube_headers) + } + if (!all(sapply(lapply(cube_headers, class), + function(x) 'startR_cube' %in% x))) { + stop("All objects passed in 'cube_headers' must be of class 'startR_cube', ", + "as returned by Start().") + } + + # Check step_fun + if (!is.function(step_fun)) { + stop("Parameter 'step_fun' must be a function.") + } + + # Check cores + if (!is.numeric(threads_load)) { + stop("Parameter 'threads_load' must be a numeric value.") + } + threads_load <- round(threads_load) + if (!is.numeric(threads_compute)) { + stop("Parameter 'threads_compute' must be a numeric value.") + } + threads_compute <- round(threads_compute) + timings[['threads_load']] <- threads_load + timings[['threads_compute']] <- threads_compute + + on_cluster <- !is.null(cluster) + + # Check autosubmit_suite_dir + suite_id <- cluster[['expid']] + + #NOTE: + #autosubmit_suite_dir: /home/Earth/aho/startR_local_autosubmit/ + #autosubmit_suite_dir_suite: /home/Earth/aho/startR_local_autosubmit/STARTR_CHUNKING_a68h/ + #remote_autosubmit_suite_dir: /esarchive/autosubmit/a68h/proj/ + #remote_autosubmit_suite_dir_suite: /esarchive/autosubmit/a68h/proj/STARTR_CHUNKING_a68h/ + + autosubmit_suite_dir_suite <- '' + if (on_cluster) { + if (is.null(autosubmit_suite_dir)) { + stop("Parameter 'autosubmit_suite_dir' must be specified when dispatching on a cluster.") + } + if (!is.character(autosubmit_suite_dir)) { + stop("Parameter 'autosubmit_suite_dir' must be a character string.") + } +#----------NEW----------- + autosubmit_suite_dir_suite <- paste0(autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') + if (!dir.exists(autosubmit_suite_dir_suite)) { + dir.create(autosubmit_suite_dir_suite, recursive = TRUE) + } +#--------NEW_END---------- + if (!dir.exists(autosubmit_suite_dir_suite)) { + stop("Could not find or create the directory in parameter 'autosubmit_suite_dir'.") + } + if (!is.null(autosubmit_server)) { + if (!autosubmit_server %in% c('bscesautosubmit01', 'bscesautosubmit02')) { + stop("Parameter 'autosubmit_server' must be one existing Autosubmit machine login node, 'bscesautosubmit01' or 'bscesautosubmit02'.") + } + } else { + autosubmit_server <- paste0('bscesautosubmit0', sample(1:2, 1)) + } + } + +#----------NEW----------- + # Check cluster + default_cluster <- list(queue_host = NULL, +# queue_type = 'slurm', + data_dir = NULL, +# temp_dir = NULL, + lib_dir = NULL, + init_commands = list(''), + r_module = 'R', + CDO_module = NULL, + autosubmit_module = 'autosubmit', + node_memory = NULL, # not used + cores_per_job = NULL, + job_wallclock = '01:00:00', + max_jobs = 6, + extra_queue_params = list(''), +# bidirectional = TRUE, + polling_period = 10, + special_setup = 'none', + expid = NULL, + hpc_user = NULL) + if (on_cluster) { + if (!is.list(cluster)) { + stop("Parameter 'cluster' must be a named list.") + } + if (is.null(names(cluster))) { + stop("Parameter 'cluster' must be a named list.") + } + if (any(!(names(cluster) %in% c('queue_host', 'queue_type', 'data_dir', + 'temp_dir', 'lib_dir', 'init_commands', + 'r_module', 'CDO_module', 'autosubmit_module', + 'ecflow_module', 'node_memory', + 'cores_per_job', 'job_wallclock', 'max_jobs', + 'extra_queue_params', 'bidirectional', + 'polling_period', 'special_setup', 'expid', 'hpc_user')))) { + stop("Found invalid component names in parameter 'cluster'.") + } + # Remove ecFlow components + redundant_components <- c('queue_type', 'temp_dir', 'ecflow_module', 'bidirectional') + if (any(redundant_components %in% names(cluster))) { + tmp <- redundant_components[which(redundant_components %in% names(cluster))] + warning("Cluster component ", paste(tmp, collapse = ','), " not used when Autosubmit is the workflow manager.") + cluster[[tmp]] <- NULL + } + default_cluster[names(cluster)] <- cluster + } + cluster <- default_cluster + is_data_dir_shared <- FALSE + + # Cluster compoment check + if (on_cluster) { + # queue_host + support_hpcs <- c('local', 'nord3') # names in platforms.yml + if (is.null(cluster$queue_host) || !cluster$queue_host %in% support_hpcs) { + stop("Component 'queue_host' in parameter 'cluster' must be one of the follows: ", paste(support_hpcs, collapse = ','), '.') + } + # data_dir + if (is.null(cluster[['data_dir']])) { + is_data_dir_shared <- TRUE + } else { + if (!is.character(cluster[['data_dir']])) { + stop("The component 'data_dir' of the parameter 'cluster' must be a character string.") + } + remote_data_dir <- cluster[['data_dir']] + } + # lib_dir + if (!is.null(cluster[['lib_dir']])) { + if (!is.character(cluster[['lib_dir']])) { + stop("The component 'lib_dir', of the parameter 'cluster' must be NULL or ", + "a character string.") + } + } + # init_commands + if (!is.list(cluster[['init_commands']]) || + !all(sapply(cluster[['init_commands']], is.character))) { + stop("The component 'init_commands' of the parameter 'cluster' must be a list of ", + "character strings.") + } + # r_module + if (!is.character(cluster[['r_module']])) { + stop("The component 'r_module' of the parameter 'cluster' must be a character string.") + } + if ((nchar(cluster[['r_module']]) < 1) || (grepl(' ', cluster[['r_module']]))) { + stop("The component 'r_module' of the parameter 'cluster' must have at least one character ", + "and contain no blank spaces.") + } + # CDO_module + if (!is.null(cluster[['CDO_module']])) { + if (!is.character(cluster[['CDO_module']])) { + stop("The component 'CDO_module' of the parameter 'cluster' must be a character string.") + } + if (nchar(cluster[['CDO_module']]) < 1 || grepl(' ', cluster[['CDO_module']])) { + warning("The component 'CDO_module' of parameter 'cluster' must have ", + " than 1 and only the first element will be used.") + } + cluster[['r_module']] <- paste(cluster[['r_module']], cluster[['CDO_module']]) + } + # autosubmit_module + if (!is.character(cluster[['autosubmit_module']])) { + stop("The component 'autosubmit_module' of the parameter 'cluster' must be a character string.") + } + # cores_per_job + if (is.null(cluster[['cores_per_job']])) { + cluster[['cores_per_job']] <- threads_compute + } + if (!is.numeric(cluster[['cores_per_job']])) { + stop("The component 'cores_per_job' of the parameter 'cluster' must be numeric.") + } + cluster[['cores_per_job']] <- round(cluster[['cores_per_job']]) + if (cluster[['cores_per_job']] > threads_compute) { + .message("WARNING: 'threads_compute' should be >= cluster[['cores_per_job']].") + } + # job_wallclock + tmp <- strsplit( '01:00:00', ':')[[1]] + if (!length(tmp) %in% c(2, 3) | any(!grepl("^[0-9]+$", tmp)) | any(nchar(tmp) != 2)) { + stop("The compoment 'job_wallclock' should be the format of HH:MM or HH:MM:SS.") + } + # max_jobs + if (!is.numeric(cluster[['max_jobs']])) { + stop("The component 'max_jobs' of the parameter 'cluster' must be numeric.") + } + cluster[['max_jobs']] <- round(cluster[['max_jobs']]) + # extra_queue_params + if (!is.list(cluster[['extra_queue_params']]) || + !all(sapply(cluster[['extra_queue_params']], is.character))) { + stop("The component 'extra_queue_params' of the parameter 'cluster' must be a list of ", + "character strings.") + } + # polling_period + if (!is.numeric(cluster[['polling_period']])) { + stop("The component 'polling_period' of the parameter 'cluster' must be numeric.") + } + cluster[['polling_period']] <- round(cluster[['polling_period']]) + # special_setup + if (!(cluster[['special_setup']] %in% c('none', 'marenostrum4'))) { + stop("The value provided for the component 'special_setup' of the parameter ", + "'cluster' is not recognized.") + } + # expid + if (!is.character(cluster[['expid']]) | length(cluster[['expid']]) != 1) { + stop("The component 'expid' of the parameter 'cluster' must be a character string.") + } + # hpc_user + if (!is.null(cluster$hpc_user) && (!is.character(cluster$hpc_user) | length(cluster$hpc_user) != 1)) { + stop("The component 'hpc_user' of the parameter 'cluster' must be a character string.") + } + + } + + if (on_cluster) { + remote_autosubmit_suite_dir <- file.path("/esarchive/autosubmit/", cluster[['expid']], 'proj') + remote_autosubmit_suite_dir_suite <- paste0(remote_autosubmit_suite_dir, '/STARTR_CHUNKING_', cluster[['expid']], '/') + } + + # Check silent + if (!is.logical(silent)) { + stop("Parameter 'silent' must be logical.") + } + + # Check debug + if (!is.logical(debug)) { + stop("Parameter 'debug' must be logical.") + } + if (silent) { + debug <- FALSE + } + + # Check wait + if (!is.logical(wait)) { + stop("Parameter 'wait' must be logical.") + } + + # Work out chunked dimensions and target dimensions + all_dims <- lapply(cube_headers, attr, 'Dimensions') + all_dims_merged <- NULL + for (i in all_dims) { + if (is.null(all_dims_merged)) { + all_dims_merged <- i + } else { + all_dims_merged <- .MergeArrayDims(all_dims_merged, i)[[3]] + } + } + all_dimnames <- names(all_dims_merged) + + target_dims_indices <- which(all_dimnames %in% unlist(attr(step_fun, 'TargetDims'))) + target_dims <- NULL + if (length(target_dims_indices) > 0) { + target_dims <- all_dimnames[target_dims_indices] + } + + chunked_dims <- all_dimnames + if (length(target_dims_indices) > 0) { + chunked_dims <- chunked_dims[-target_dims_indices] + } + if (length(chunked_dims) < 1) { + stop("Not possible to process input by chunks. All input dimensions are ", + "target dimensions.") + } + + if (length(cube_headers) != length(attr(step_fun, 'TargetDims'))) { + stop("Number of inputs in parameter 'cube_headers' must be equal to the ", + "number of inputs expected by the function 'step_fun'.") + } + # Check all input headers have matching dimensions + cube_index <- 1 + for (cube_header in cube_headers) { + + # Check if all the margin dims are consistent among datasets + if (!all(chunked_dims %in% names(attr(cube_header, "Dimensions")))) { + trouble_dim_name <- chunked_dims[which(!chunked_dims %in% + names(attr(cube_header, "Dimensions")))] + stop(paste0("Found margin dimension, ", toString(trouble_dim_name), + ", is not in input data ", cube_index, ".")) + } + + # Only check margin dimensions (i.e., chunked_dims) + if (!all(attr(cube_header, 'Dimensions')[chunked_dims] == all_dims_merged[names(attr(cube_header, 'Dimensions'))][chunked_dims])) { + stop("All provided 'cube_headers' must have matching dimension lengths ", + "with each other.") + } + if (!all(attr(step_fun, 'TargetDims')[[cube_index]] %in% names(attr(cube_header, 'Dimensions')))) { + stop("All provided 'cube_headers' must contain at least the target dimensions ", + "expected by 'step_fun'.") + } + cube_index <- cube_index + 1 + # work out expected result dimensions + } + + # Check chunks + default_chunks <- as.list(rep(1, length(chunked_dims))) + names(default_chunks) <- chunked_dims + if (length(chunks) == 1 && chunks == 'auto') { + chunks <- default_chunks + } + if (!is.list(chunks)) { + stop("Parameter 'chunks' must be a named list or 'auto'.") + } + if (is.null(names(chunks))) { + stop("Parameter 'chunks' must be a named list or 'auto'.") + } + if (any(!(names(chunks) %in% chunked_dims))) { + stop("All names in parameter 'chunks' must be one of the non-target dimensions ", + "present in the cubes in 'cube_headers'. The target dimensions are ", + paste(paste0("'", target_dims, "'"), collapse = ', '), ". The non-target ", + "dimensions (margins) are ", paste(paste0("'", chunked_dims, "'"), collapse = ', '), ".") + } + if (any(!(((unlist(chunks) %% 1) == 0) | (unlist(chunks) == 'all')))) { + stop("All values in parameter 'chunks' must take a numeric value or 'all'.") + } + if (any(unlist(chunks) < 1)) { + stop("All values in parameter 'chunks' must be >= 1.") + } + for (chunk_spec in 1:length(chunks)) { + if (chunks[[chunk_spec]] > all_dims_merged[names(chunks)[chunk_spec]]) { + stop("Too many chunks requested for the dimension ", names(chunks)[chunk_spec], + ". Maximum allowed is ", all_dims_merged[names(chunks)[chunk_spec]]) + } + } + default_chunks[names(chunks)] <- chunks + #NOTE: chunks here has all the margin dims, not only the chunked ones + chunks <- default_chunks + timings[['nchunks']] <- prod(unlist(chunks)) + + # Check step_fun + if (!is(step_fun, 'startR_step_fun')) { + stop("Parameter 'step_fun' must be of the class 'startR_step_fun', as returned ", + "by the function Step.") + } + + # Replace 'all's + chunks_all <- which(unlist(chunks) == 'all') + if (length(chunks_all) > 0) { + chunks[chunks_all] <- all_dims[names(chunks)[chunks_all]] + } + + if (on_cluster) { + # Copy load_process_save_chunk_AS.R into local folder +#TODO: Change the following line to read from package + chunk_script <- file("/esarchive/scratch/aho/git/aho-testtest/startR/autosubmit/load_process_save_chunk_AS.R") +# chunk_script <- file(system.file('chunking/autosubmit/load_process_save_chunk_AS.R', +# package = 'startR')) + chunk_script_lines <- readLines(chunk_script) + close(chunk_script) + chunk_script_lines <- gsub('^lib_dir <- *', paste0('lib_dir <- ', + paste(deparse(cluster[['lib_dir']]), collapse = '\n')), + chunk_script_lines) + #TODO: Change out_dir to somewhere else like expid/outputs/ + chunk_script_lines <- gsub('^out_dir <- *', paste0('out_dir <- ', + paste(deparse(remote_autosubmit_suite_dir_suite), collapse = '\n')), chunk_script_lines) + chunk_script_lines <- gsub('^debug <- *', paste0('debug <- ', paste(deparse(debug), collapse = '\n')), + chunk_script_lines) + deparsed_calls <- paste0('start_calls <- list(') + extra_path <- '' + if (cluster[['special_setup']] == 'marenostrum4') { + extra_path <- '/gpfs/archive/bsc32/' + } + for (cube_header in 1:length(cube_headers)) { + pattern_dim <- attr(cube_headers[[cube_header]], 'PatternDim') + bk_pattern_dim <- cube_headers[[cube_header]][[pattern_dim]] + bk_expected_files <- attr(cube_headers[[cube_header]], 'ExpectedFiles') + if (!is_data_dir_shared) { + cube_headers[[cube_header]][[pattern_dim]] <- paste0(remote_data_dir, '/', + extra_path, '/', cube_headers[[cube_header]][[pattern_dim]]) + for (file_n in 1:length(bk_expected_files)) { + attr(cube_headers[[cube_header]], 'ExpectedFiles')[file_n] <- paste0(remote_data_dir, '/', + extra_path, '/', attr(cube_headers[[cube_header]], 'ExpectedFiles')[file_n]) + } + } + deparsed_calls <- paste0(deparsed_calls, '\nquote(', + paste(deparse(cube_headers[[cube_header]]), collapse = '\n'), + ')') + cube_headers[[cube_header]][[pattern_dim]] <- bk_pattern_dim + attr(cube_headers[[cube_header]], 'ExpectedFiles') <- bk_expected_files + if (cube_header < length(cube_headers)) { + deparsed_calls <- paste0(deparsed_calls, ', ') + } + } + deparsed_calls <- paste0(deparsed_calls, '\n)') + chunk_script_lines <- gsub('^start_calls <- *', deparsed_calls, chunk_script_lines) + chunk_script_lines <- gsub('^start_calls_attrs <- *', paste0('start_calls_attrs <- ', paste(deparse(lapply(cube_headers, attributes)), collapse = '\n')), + chunk_script_lines) + chunk_script_lines <- gsub('^param_dimnames <- *', paste0('param_dimnames <- ', paste(deparse(chunked_dims), collapse = '\n')), + chunk_script_lines) + chunk_script_lines <- gsub('^threads_load <- *', paste0('threads_load <- ', threads_load), + chunk_script_lines) + chunk_script_lines <- gsub('^threads_compute <- *', paste0('threads_compute <- ', threads_compute), + chunk_script_lines) + chunk_script_lines <- gsub('^fun <- *', paste0('fun <- ', paste(deparse(step_fun), collapse = '\n')), + chunk_script_lines) + chunk_script_lines <- gsub('^params <- *', paste0('params <- ', paste(deparse(list(...)), collapse = '\n')), + chunk_script_lines) + writeLines(chunk_script_lines, paste0(autosubmit_suite_dir_suite, '/load_process_save_chunk_AS.R')) + + # Write and copy startR_autosubmit.sh into local folder + write_autosubmit_bash(chunks, cluster, autosubmit_suite_dir = autosubmit_suite_dir) + + # Modify conf files from template and rewrite to /esarchive/autosubmit/expid/conf/ + #TODO: remove source() and put function under R/ or just below ByChunks_autosubmit() + source("/esarchive/scratch/aho/git/aho-testtest/startR/autosubmit/write_autosubmit_conf.R") + write_autosubmit_conf(chunks, cluster, autosubmit_suite_dir) + + } # if on_cluster + + + # Iterate through chunks + chunk_array <- array(1:prod(unlist(chunks)), dim = (unlist(chunks))) + arrays_of_results <- vector('list', length(attr(step_fun, 'OutputDims'))) + names(arrays_of_results) <- names(attr(step_fun, 'OutputDims')) + for (component in 1:length(arrays_of_results)) { + arrays_of_results[[component]] <- vector('list', prod((unlist(chunks)))) + dim(arrays_of_results[[component]]) <- (unlist(chunks)) + } + if (!on_cluster) { + t_end_bychunks_setup <- Sys.time() + timings[['bychunks_setup']] <- as.numeric(difftime(t_end_bychunks_setup, + t_begin_bychunks_setup, units = 'secs')) + timings[['transfer']] <- 0 + timings[['queue']] <- 0 + timings[['job_setup']] <- 0 + timings[['transfer_back']] <- 0 + if (!silent) { + .message(paste0("Processing chunks... ", + "remaining time estimate soon...")) + } + time_before_first_chunk <- Sys.time() + time_after_first_chunk <- NULL + } + found_first_result <- FALSE + for (i in 1:length(chunk_array)) { + chunk_indices <- which(chunk_array == i, arr.ind = TRUE)[1, ] + names(chunk_indices) <- names(dim(chunk_array)) + + if (!on_cluster) { + if (!silent) { + .message(paste("Loading chunk", i, + "out of", length(chunk_array), "...")) + } + data <- vector('list', length(cube_headers)) + t_begin_load <- Sys.time() + for (input in 1:length(data)) { + start_call <- cube_headers[[input]] + dims_to_alter <- which(names(attr(start_call, 'Dimensions')) %in% names(chunks)) + names_dims_to_alter <- names(attr(start_call, 'Dimensions'))[dims_to_alter] + # If any dimension comes from split dimensions + split_dims <- attr(start_call, 'SplitDims') + + if (length(split_dims) != 0){ + + for (k in 1:length(split_dims)) { + if (any(names(split_dims[[k]]) %in% names_dims_to_alter)) { + chunks_split_dims <- rep(1, length(split_dims[[k]])) + names(chunks_split_dims) <- names(split_dims[[k]]) + chunks_indices_split_dims <- chunks_split_dims + split_dims_to_alter <- which(names(split_dims[[k]]) %in% names_dims_to_alter) + chunks_split_dims[split_dims_to_alter] <- unlist(chunks[names(split_dims[[k]])[split_dims_to_alter]]) + chunks_indices_split_dims[split_dims_to_alter] <- chunk_indices[names(split_dims[[k]])[split_dims_to_alter]] + start_call[[names(split_dims)[k]]] <- .chunk(chunks_indices_split_dims, chunks_split_dims, + eval(start_call[[names(split_dims)[k]]])) + dims_to_alter_to_remove <- which(names_dims_to_alter %in% names(split_dims[[k]])) + if (length(dims_to_alter_to_remove) > 0) { + dims_to_alter <- dims_to_alter[-dims_to_alter_to_remove] + names_dims_to_alter <- names_dims_to_alter[-dims_to_alter_to_remove] + } + } + } + } + + if (length(dims_to_alter) > 0) { + for (call_dim in names(attr(start_call, 'Dimensions'))[dims_to_alter]) { + start_call[[call_dim]] <- .chunk(chunk_indices[call_dim], chunks[[call_dim]], + eval(start_call[[call_dim]])) + } + } + start_call[['silent']] <- !debug + if (!('num_procs' %in% names(start_call))) { + start_call[['num_procs']] <- threads_load + } + data[[input]] <- eval(start_call) + } + t_end_load <- Sys.time() + timings[['load']] <- c(timings[['load']], + as.numeric(difftime(t_end_load, t_begin_load, units = 'secs'))) + if (!silent) { + .message(paste("Processing...")) + } + #TODO: Find a better way to assign the names of data. When multiple steps for Compute is available, this way may fail. + names(data) <- names(cube_headers) + t_begin_compute <- Sys.time() + result <- multiApply::Apply(data, + target_dims = attr(step_fun, 'TargetDims'), + fun = step_fun, ..., + output_dims = attr(step_fun, 'OutputDims'), + use_attributes = attr(step_fun, 'UseAttributes'), + ncores = threads_compute) + if (!found_first_result) { + names(arrays_of_results) <- names(result) + found_first_result <- TRUE + } + for (component in 1:length(result)) { + arrays_of_results[[component]][[i]] <- result[[component]] + } + rm(data) + gc() + t_end_compute <- Sys.time() + timings[['compute']] <- c(timings[['compute']], + as.numeric(difftime(t_end_compute, + t_begin_compute, units = 'secs'))) + } + + # Time estimate + if (!on_cluster) { + if (is.null(time_after_first_chunk)) { + time_after_first_chunk <- Sys.time() + if (!silent) { + estimate <- (time_after_first_chunk - + time_before_first_chunk) * + (length(chunk_array) - 1) + units(estimate) <- 'mins' + .message( + paste0("Remaining time estimate (at ", format(time_after_first_chunk), ") ", + "(neglecting merge time): ", format(estimate)) + ) + } + } + } + } + + + if (on_cluster) { + timings[['cores_per_job']] <- cluster[['cores_per_job']] + timings[['concurrent_chunks']] <- cluster[['max_jobs']] + + t_end_bychunks_setup <- Sys.time() + timings[['bychunks_setup']] <- as.numeric(difftime(t_end_bychunks_setup, + t_begin_bychunks_setup, units = 'secs')) + if (!is_data_dir_shared) { + #NOTE: Not consider this part yet + t_begin_transfer <- Sys.time() + .message("Sending involved files to the cluster file system...") + files_to_send <- NULL + #files_to_check <- NULL + for (cube_header in 1:length(cube_headers)) { + expected_files <- attr(cube_headers[[cube_header]], 'ExpectedFiles') + #files_to_check <- c(files_to_check, expected_files) + #if (cluster[['special_setup']] == 'marenostrum4') { + # expected_files <- paste0('/gpfs/archive/bsc32/', expected_files) + #} + files_to_send <- c(files_to_send, expected_files) + } + #which_files_exist <- sapply(files_to_check, file.exists) + which_files_exist <- sapply(files_to_send, file.exists) + files_to_send <- files_to_send[which_files_exist] + if (cluster[['special_setup']] == 'marenostrum4') { + file_spec <- paste(paste0("/gpfs/archive/bsc32/", + files_to_send), collapse = ' ') + system(paste0("ssh ", cluster[['queue_host']], " 'mkdir -p ", remote_data_dir, + ' ; module load transfer ; cd ', remote_autosubmit_suite_dir_suite, + ' ; dtrsync -Rrav ', '\'', file_spec, '\' "', remote_data_dir, '/"', + " ; sleep 1 ; ", + "while [[ ! $(ls dtrsync_*.out 2>/dev/null | wc -l) -ge 1 ]] ; ", + "do sleep 2 ; done", + " ; sleep 1 ; ", + 'while [[ ! $(grep "total size is" dtrsync_*.out | ', + "wc -l) -ge 1 ]] ; ", + "do sleep 5 ; done", "'")) + } else { + file_spec <- paste(files_to_send, collapse = ' :') + system(paste0("ssh ", cluster[['queue_host']], ' "mkdir -p ', + remote_data_dir, '"')) + system(paste0("rsync -Rrav '", file_spec, "' '", + cluster[['queue_host']], ":", remote_data_dir, "/'")) + } + .message("Files sent successfully.") + t_end_transfer <- Sys.time() + timings[['transfer']] <- as.numeric(difftime(t_end_transfer, t_begin_transfer, units = 'secs')) + } else { + timings[['transfer']] <- 0 + } + if (!silent) { + .message(paste0("Processing chunks... ")) + } + time_begin_first_chunk <- Sys.time() + + as_module <- cluster[['autosubmit_module']] + sys_commands <- paste0("module load ", as_module, "; ", + "autosubmit create ", suite_id, " -np; ", + "autosubmit refresh ", suite_id, "; ") + if (wait) { + sys_commands <- paste0(sys_commands, "autosubmit run ", suite_id) + } else { + sys_commands <- paste0(sys_commands, "nohup autosubmit run ", suite_id, " >/dev/null 2>&1 &") # disown? + } + + if (gsub('[[:digit:]]', "", Sys.getenv('HOSTNAME')) == 'bscesautosubmit') { + #NOTE: If we ssh to AS VM and run everything there, we don't need to ssh here + system(sys_commands) + + } else if (gsub("[[:digit:]]", "", Sys.getenv("HOSTNAME")) == "bscearth") { + # ssh from WS to AS VM to run exp + as_login <- paste0(Sys.getenv("USER"), '@', autosubmit_server, '.bsc.es') + sys_commands <- paste0('ssh ', as_login, ' "', sys_commands, '"') #'; exit"') + system(sys_commands) + + } else { + stop("Cannot identify host", Sys.getenv("HOSTNAME"), ". Where to run AS exp?") + } + + timings[['total']] <- t_begin_total + startr_exec <- list(cluster = cluster, workflow_manager = 'autosubmit', + suite_id = suite_id, chunks = chunks, + num_outputs = length(arrays_of_results), + autosubmit_suite_dir = autosubmit_suite_dir, #ecflow_server = ecflow_server, + timings = timings) + class(startr_exec) <- 'startR_exec' + + if (wait) { + result <- Collect(startr_exec, wait = TRUE, remove = T) + .message("Computation ended successfully.") + return(result) + + } else { + # if wait = F, return startr_exec and merge chunks in Collect(). + return(startr_exec) + } + } +} + diff --git a/R/Collect.R b/R/Collect.R index 4c80b03..73e06b9 100644 --- a/R/Collect.R +++ b/R/Collect.R @@ -72,10 +72,34 @@ #' #'@export Collect <- function(startr_exec, wait = TRUE, remove = TRUE) { + + # Parameter checks if (!is(startr_exec, 'startR_exec')) { stop("Parameter 'startr_exec' must be an object of the class ", - "'startR_exec', as returned by Collect(..., wait = FALSE).") + "'startR_exec', as returned by Compute(..., wait = FALSE).") } + if (!tolower(startr_exec$workflow_manager) %in% c('ecflow', 'autosubmit')) { + stop("Cannot identify the workflow manager. Check the value of 'startr_exec$workflow_manager', which should be 'ecFlow' or 'Autosubmit'.") + } + if (!is.logical(wait)) { + stop("Parameter 'wait' must be logical.") + } + if (!is.logical(remove)) { + stop("Parameter 'remove' must be logical.") + } + + if (tolower(startr_exec$workflow_manager) == 'ecflow') { + res <- Collect_ecflow(startr_exec, wait = wait, remove = remove) + } else if (tolower(startr_exec$workflow_manager) == 'autosubmit') { + res <- Collect_autosubmit(startr_exec, wait = wait, remove = remove) + } + + return(res) +} + + +Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) { + if (Sys.which('ecflow_client') == '') { stop("ecFlow must be installed in order to collect results from a ", "Compute() execution.") @@ -346,3 +370,49 @@ Collect <- function(startr_exec, wait = TRUE, remove = TRUE) { attr(result, 'startR_compute_profiling') <- timings result } + + + +Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE) { + + suite_id <- startr_exec[['suite_id']] + chunks <- startr_exec[['chunks']] + num_outputs <- startr_exec[['num_outputs']] + autosubmit_suite_dir <- startr_exec[['autosubmit_suite_dir']] + autosubmit_suite_dir_suite <- paste0(autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') + remote_autosubmit_suite_dir <- file.path("/esarchive/autosubmit/", suite_id, 'proj') + remote_autosubmit_suite_dir_suite <- paste0(remote_autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') + + + done <- FALSE + sum_received_chunks <- sum(grepl('.*\\.Rds$', list.files(remote_autosubmit_suite_dir_suite))) + + while (!done) { # If wait, try until it is done + if (sum_received_chunks / num_outputs == prod(unlist(chunks))) { + done <- TRUE + + } else if (!wait) { + stop("Computation in progress...") + } else { + Sys.sleep(cluster[['polling_period']]) + message("Computation in progress, ", sum_received_chunks, " of ", num_outputs, " chunks are done...\n", + "Check status on Autosubmit GUI: https://earth.bsc.es/autosubmitapp/experiment/", suite_id) +# Sys.sleep(min(sqrt(attempt), 5)) + } + + } # while !done + + result <- .MergeChunks(remote_autosubmit_suite_dir, suite_id, remove = remove) + if (remove) { + .warning("ATTENTION: The source chunks will be removed from the ", + "system. Store the result after Collect() ends if needed.") + } + + # Remove bigmemory objects (e.g., a68h_1_1_1_1_1 and a68h_1_1_1_1_1.desc) + file.remove( + file.path(remote_autosubmit_suite_dir_suite, + list.files(remote_autosubmit_suite_dir_suite)[grepl(paste0("^", suite_id, "_.*"), list.files(remote_autosubmit_suite_dir_suite))]) + ) + + return(result) +} diff --git a/R/Compute.R b/R/Compute.R index 1450b01..c9f7426 100644 --- a/R/Compute.R +++ b/R/Compute.R @@ -84,11 +84,11 @@ #' #'@importFrom methods is #'@export -Compute <- function(workflow, chunks = 'auto', - threads_load = 1, threads_compute = 1, - cluster = NULL, ecflow_suite_dir = NULL, - ecflow_server = NULL, silent = FALSE, debug = FALSE, - wait = TRUE) { +Compute <- function(workflow, chunks = 'auto', workflow_manager = 'ecFlow', + threads_load = 1, threads_compute = 1, + cluster = NULL, ecflow_suite_dir = NULL, ecflow_server = NULL, + autosubmit_suite_dir = NULL, autosubmit_server = NULL, + silent = FALSE, debug = FALSE, wait = TRUE) { # Check workflow if (!is(workflow, 'startR_cube') & !is(workflow, 'startR_workflow')) { stop("Parameter 'workflow' must be an object of class 'startR_cube' as ", @@ -144,16 +144,37 @@ Compute <- function(workflow, chunks = 'auto', if (!all(sapply(workflow$inputs, class) == 'startR_cube')) { stop("Workflows with only one step supported by now.") } + # Run ByChunks with the combined operation - res <- ByChunks(step_fun = operation, - cube_headers = workflow$inputs, - chunks = chunks, - threads_load = threads_load, - threads_compute = threads_compute, - cluster = cluster, - ecflow_suite_dir = ecflow_suite_dir, - ecflow_server = ecflow_server, - silent = silent, debug = debug, wait = wait) + if (!is.null(cluster)) { + if (!tolower(workflow_manager) %in% c('ecflow', 'autosubmit')) { + stop("Parameter 'workflow_manager' can only be 'ecFlow' or 'Autosubmit'.") + } + } + if (tolower(workflow_manager) == 'autosubmit') { + res <- ByChunks_autosubmit(step_fun = operation, + cube_headers = workflow$inputs, + chunks = chunks, + threads_load = threads_load, + threads_compute = threads_compute, + cluster = cluster, + autosubmit_suite_dir = autosubmit_suite_dir, + autosubmit_server = autosubmit_server, + silent = silent, debug = debug, wait = wait) + + } else { + # ecFlow or run locally + res <- ByChunks(step_fun = operation, + cube_headers = workflow$inputs, + chunks = chunks, + threads_load = threads_load, + threads_compute = threads_compute, + cluster = cluster, + ecflow_suite_dir = ecflow_suite_dir, + ecflow_server = ecflow_server, + silent = silent, debug = debug, wait = wait) + } + # TODO: carry out remaining steps locally, using multiApply # Return results res diff --git a/R/Utils.R b/R/Utils.R index 3d4d864..5bf8988 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -869,3 +869,144 @@ val <- withCallingHandlers(expr, warning = wHandler) list(value = val, warnings = myWarnings) } + +# This function writes startR_autosubmit.sh to local startR_autosubmit folder, under expID/ +write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { + # "chunks" should be the argument "chunks" in Compute() plus the redundant margin dims, + # e.g., list(dat = 1, var = 1, sdate = 1, time = 1, lat = 2, lon = 3) + + # Loop through chunks to create load script for each + for (n_chunk in 0:(prod(unlist(chunks)) - 1)) { + + # Create chunk args + chunk_names <- names(chunks) + chunk_args <- matrix(NA, 2, length(chunks)) + chunk_args[1, ] <- paste0('%JOBS.CHUNK_', n_chunk, '.', chunk_names, '%') + chunk_args[2, ] <- paste0('%JOBS.CHUNK_', n_chunk, '.', chunk_names, '_N%') + chunk_args <- paste0('(', paste(c(chunk_args), collapse = ' '), ')') + + #TODO: Change to the following line getting .sh template from package + # bash_script <- file(system.file('chunking/Autosubmit/startR_autosubmit.sh', + # package = 'startR')) + bash_script_template <- file("/home/Earth/aho/startR/inst/chunking/Autosubmit/startR_autosubmit.sh") + bash_script_lines <- readLines(bash_script_template) + close(bash_script_template) + + # Rewrite chunk_args= + bash_script_lines <- gsub('^chunk_args=*', paste0('chunk_args=', chunk_args), + bash_script_lines) + # Include init commands + bash_script_lines <- gsub('^include_init_commands', + paste0(paste0(cluster[['init_commands']], collapse = '\n'), '\n'), + + bash_script_lines) + # Rewrite include_module_load + bash_script_lines <- gsub('^include_module_load', + paste0('module load ', cluster[['r_module']]), + bash_script_lines) + + # Save modified .sh file under local$PROJECT_PATH in expdef.yml + #NOTE: dest_dir is ecflow_suite_dir_suite in ByChunks_autosubmit() + #NOTE: the file will be copied to proj/ by "autosubmit create" + dest_dir <- file.path(autosubmit_suite_dir, paste0("/STARTR_CHUNKING_", cluster$expid)) + + if (!file.exists(dest_dir)) { + dir.create(savefile_path, recursive = TRUE) + } + writeLines(bash_script_lines, paste0(dest_dir, '/startR_autosubmit_', n_chunk, '.sh')) + } +} + +# This function generates the .yml files under autosubmit conf/ +write_autosubmit_conf <- function(chunks, cluster, autosubmit_suite_dir) { + #TODO: Remove this + library(configr) + # "chunks" is from Compute() input, e.g., chunks <- list(lat = 2, lon = 3) + # "cluster" is the argument "cluster" in Compute(), to set machine configuration + # "autosubmit_suite_dir" should be the local folder that has R script, like ecflow_suite_dir in Compute() + + # Get config template files from package + #TODO: Change to package path +# template_dir <- system.file('chunking/Autosubmit/', package = 'startR') + template_dir <- "/home/Earth/aho/startR/inst/chunking/Autosubmit/" + config_files <- list.files(template_dir, pattern = "*\\.yml") + + for (i_file in config_files) { + + conf <- configr::read.config(file.path(template_dir, i_file)) + conf_type <- strsplit(i_file, split = "[.]")[[1]][1] + +############################################################ + if (conf_type == "autosubmit") { + + #Q: Should it be the total amount of chunk? + conf$config$MAXWAITINGJOBS <- as.integer(prod(unlist(chunks))) # total amount of chunk + conf$config$TOTALJOBS <- as.integer(cluster$max_jobs) + +############################################################ + } else if (conf_type == "expdef") { + conf$default$EXPID <- cluster$expid + conf$default$HPCARCH <- cluster$queue_host + # PROJECT_PATH should be where submit.sh and load....R stored --> local startR_autosubmit folder, under expID/ + conf$local$PROJECT_PATH <- file.path(autosubmit_suite_dir, paste0("STARTR_CHUNKING_", cluster$expid)) + +############################################################ + } else if (conf_type == "jobs") { + + chunks_vec <- lapply(lapply(chunks, seq, 1), rev) # list(lat = 1:2, lon = 1:3) + chunk_df <- expand.grid(chunks_vec) + nchunks <- nrow(chunk_df) + chunk_name <- paste0("CHUNK_", 0:(nchunks - 1)) + + # Fill in common configurations + jobs <- conf$JOBS + # wallclock from '01:00:00' to '01:00' + jobs[[1]]$WALLCLOCK <- substr(cluster$job_wallclock, 1, 5) + jobs[[1]]$PLATFORM <- cluster$queue_host + #Q: Is it cores_per_job? + jobs[[1]]$THREADS <- as.integer(cluster$cores_per_job) + jobs[[1]][paste0(names(chunks), "_N")] <- as.integer(unlist(chunks)) + jobs[[1]][names(chunks)] <- "" + + # Create chunks and fill in info for each chunk + if (nchunks > 1) { + jobs <- c(jobs, rep(jobs, nchunks - 1)) + names(jobs) <- chunk_name + } + for (i_chunk in 1:nchunks) { + jobs[[i_chunk]][names(chunks)] <- chunk_df[i_chunk, ] + jobs[[i_chunk]]$FILE <- paste0('startR_autosubmit_', i_chunk - 1, '.sh') + } + + conf$JOBS <- jobs + +############################################################ + } else if (conf_type == "platforms") { + if (tolower(cluster$queue_host) != "local") { + conf$Platforms[[cluster$queue_host]]$USER <- cluster$hpc_user + conf$Platforms[[cluster$queue_host]]$PROCESSORS_PER_NODE <- as.integer(cluster$cores_per_job) + if (!is.null(cluster$extra_queue_params)) { + tmp <- unlist(cluster$extra_queue_params) + for (ii in 1:length(tmp)) { + tmp[ii] <- paste0('\"', tmp[ii], '\"') + } + conf$Platforms[[cluster$queue_host]]$CUSTOM_DIRECTIVES <- paste0('[ ', paste(tmp, collapse = ','), ' ]') + } + } + +############################################################ + } else { + stop("File ", i_file, " is not considered in this function.") + } + +############################################################ + # Output directory + dest_dir <- paste0("/esarchive/autosubmit/", cluster$expid, "/conf/") + dest_file <- paste0(conf_type, "_", cluster$expid, ".yml") + + # Write config file inside autosubmit dir + write.config(conf, paste0(dest_dir, dest_file), write.type = "yaml") + Sys.chmod(paste0(dest_dir, dest_file), mode = "755", use_umask = F) + + } # for loop each file +} diff --git a/inst/chunking/Autosubmit/autosubmit.yml b/inst/chunking/Autosubmit/autosubmit.yml new file mode 100644 index 0000000..8b129a0 --- /dev/null +++ b/inst/chunking/Autosubmit/autosubmit.yml @@ -0,0 +1,16 @@ +config: + AUTOSUBMIT_VERSION: 4.0.0b0 + MAXWAITINGJOBS: # Should it be the total amount of chunk? + TOTALJOBS: + SAFETYSLEEPTIME: 10 + RETRIALS: 0 +#wrappers: +# wrapper_sim: +# TYPE: "vertical" +# JOBS_IN_WRAPPER: "SIM" +mail: + NOTIFICATIONS: False + TO: +storage: + TYPE: "pkl" + COPY_REMOTE_LOGS: True diff --git a/inst/chunking/Autosubmit/expdef.yml b/inst/chunking/Autosubmit/expdef.yml new file mode 100644 index 0000000..a97141c --- /dev/null +++ b/inst/chunking/Autosubmit/expdef.yml @@ -0,0 +1,33 @@ +default: + EXPID: #a659 + HPCARCH: #nord3v2, local +experiment: + DATELIST: 20220401 + MEMBERS: "fc0" + CHUNKSIZEUNIT: month + CHUNKSIZE: 4 + NUMCHUNKS: 2 + CHUNKINI: '' + CALENDAR: standard +project: + PROJECT_TYPE: local + PROJECT_DESTINATION: '' +git: + PROJECT_ORIGIN: '' #https://xxx + PROJECT_BRANCH: '' #master + PROJECT_COMMIT: '' + PROJECT_SUBMODULES: '' + FETCH_SINGLE_BRANCH: True +svn: + PROJECT_URL: '' + PROJECT_REVISION: '' +local: + PROJECT_PATH: #'/esarchive/scratch/aho/tmp/startR_as/my_project' +project_files: + FILE_PROJECT_CONF: '' + FILE_JOBS_CONF: '' + JOB_SCRIPTS_TYPE: '' +rerun: + RERUN: FALSE + RERUN_JOBLIST: '' +#Q: Are these all needed and correct? diff --git a/inst/chunking/Autosubmit/jobs.yml b/inst/chunking/Autosubmit/jobs.yml new file mode 100644 index 0000000..3ff4d0b --- /dev/null +++ b/inst/chunking/Autosubmit/jobs.yml @@ -0,0 +1,9 @@ +JOBS: + CHUNK_0: + PLATFORM: #LOCAL + RUNNING: once + WALLCLOCK: #00:05 + THREADS: + FILE: startR_autosubmit.sh #templates/sleep_5.sh +# DIM: +# DIM_N: diff --git a/inst/chunking/Autosubmit/load_process_save_chunk_AS.R b/inst/chunking/Autosubmit/load_process_save_chunk_AS.R new file mode 100644 index 0000000..f7211d5 --- /dev/null +++ b/inst/chunking/Autosubmit/load_process_save_chunk_AS.R @@ -0,0 +1,132 @@ +lib_dir <- +if (!is.null(lib_dir)) { + if (!dir.exists(lib_dir)) { + stop("The specified 'lib_dir' does not exist.") + } + .libPaths(new = lib_dir) +} +library(startR) + +out_dir <- + +debug <- +start_calls <- +start_calls_attrs <- +param_dimnames <- +fun <- +params <- +threads_load <- +threads_compute <- + +task_path <- commandArgs(TRUE)[2] + +args <- as.integer(commandArgs(TRUE)[-c(1, 2)]) + +total_specified_dims <- length(args) / 2 +chunk_indices <- args[((1:total_specified_dims) - 1) * 2 + 1] +names(chunk_indices) <- param_dimnames +chunks <- as.list(args[((1:total_specified_dims) - 1) * 2 + 2]) +names(chunks) <- param_dimnames + +t_begin_load <- Sys.time() +data <- vector('list', length(start_calls)) +for (input in 1:length(data)) { + start_call <- start_calls[[input]] + call_dims <- names(start_calls_attrs[[input]][['Dimensions']]) + dims_to_alter <- which(call_dims %in% param_dimnames) + names_dims_to_alter <- call_dims[dims_to_alter] + # If any dimension comes from split dimensions + split_dims <- start_calls_attrs[[input]][['SplitDims']] + for (k in 1:length(split_dims)) { + if (any(names(split_dims[[k]]) %in% names_dims_to_alter)) { + chunks_split_dims <- rep(1, length(split_dims[[k]])) + names(chunks_split_dims) <- names(split_dims[[k]]) + chunks_indices_split_dims <- chunks_split_dims + split_dims_to_alter <- which(names(split_dims[[k]]) %in% names_dims_to_alter) + chunks_split_dims[split_dims_to_alter] <- unlist(chunks[names(split_dims[[k]])[split_dims_to_alter]]) + chunks_indices_split_dims[split_dims_to_alter] <- chunk_indices[names(split_dims[[k]])[split_dims_to_alter]] + start_call[[names(split_dims)[k]]] <- startR:::.chunk(chunks_indices_split_dims, chunks_split_dims, + eval(start_call[[names(split_dims)[k]]])) + dims_to_alter_to_remove <- which(names_dims_to_alter %in% names(split_dims[[k]])) + if (length(dims_to_alter_to_remove) > 0) { + dims_to_alter <- dims_to_alter[-dims_to_alter_to_remove] + names_dims_to_alter <- names_dims_to_alter[-dims_to_alter_to_remove] + } + } + } + if (length(dims_to_alter) > 0) { + for (call_dim in names_dims_to_alter) { + start_call[[call_dim]] <- startR:::.chunk(chunk_indices[call_dim], chunks[[call_dim]], + eval(start_call[[call_dim]])) + } + } + if (!('num_procs' %in% names(start_call))) { + start_call[['num_procs']] <- threads_load + } + # Creates a name for the temporal file using the chunks numbers: + ## ecFlow should be like "_4737920362_1_1_1_1_1_1_" + ## autosubmit should be like "a659_1_1_1_1_1_1" + + nameMemoryObject <- paste0(task_path, '_', paste(chunk_indices, collapse='_')) #task_path is EXPID actually + + start_call[['ObjectBigmemory']] <- nameMemoryObject + data[[input]] <- tryCatch(eval(start_call), + # Handler when an error occurs: + error = function(e) { + message(paste("The data cannot be loaded.")) + message("See the original error message:") + message(e) + message("\n Current files in /dev/shm:") + noreturn <- lapply(list.files("/dev/shm"), function (x) { + info <- file.info(paste0("/dev/shm/", x)) + message(paste("file:", rownames(info), + "size:", info$size, + "uname:", info$uname))}) + message(getwd()) + file.remove(nameMemoryObject) + file.remove(paste0(nameMemoryObject, ".desc")) + message(paste("Files", nameMemoryObject, "has been removed.")) + stop("The job has failed while loading data. See original error reported above.") + }) + warning(attributes(data[[input]])$ObjectBigmemory) +} +t_end_load <- Sys.time() +t_load <- as.numeric(difftime(t_end_load, t_begin_load, units = 'secs')) + +t_begin_compute <- Sys.time() +if (!is.null(attr(fun, 'UseLibraries'))) { + for (i in seq_along(attr(fun, 'UseLibraries'))) { + require(attr(fun, 'UseLibraries')[i], character.only = TRUE) + } +} +chunk_indices_apply <- setNames(as.integer(chunk_indices), names(chunk_indices)) +chunk_indices_apply <- chunk_indices_apply[names(chunks)[which(chunks > 1)]] +Apply <- multiApply::Apply +res <- do.call("Apply", + c( + list(data, + target_dims = attr(fun, 'TargetDims'), + fun = fun, + output_dims = attr(fun, 'OutputDims'), + use_attributes = attr(fun, 'UseAttributes'), + extra_info = list(chunk_indices = chunk_indices_apply), + ncores = threads_compute), + params + ) + ) +rm(data) +gc() + +for (component in names(res)) { + filename <- paste0(component, '__') + for (i in 1:total_specified_dims) { + filename <- paste0(filename, param_dimnames[i], '_', chunk_indices[i], '__') + } + # Saving in a temporary file, then renaming. This way, the polling mechanism + # won't transfer back results before the save is completed. + saveRDS(res[[component]], file = paste0(out_dir, '/', filename, '.Rds.tmp')) + file.rename(paste0(out_dir, '/', filename, '.Rds.tmp'), + paste0(out_dir, '/', filename, '.Rds')) +} +rm(res) +gc() diff --git a/inst/chunking/Autosubmit/platforms.yml b/inst/chunking/Autosubmit/platforms.yml new file mode 100644 index 0000000..f8d8f70 --- /dev/null +++ b/inst/chunking/Autosubmit/platforms.yml @@ -0,0 +1,14 @@ +Platforms: + nord3: + TYPE: SLURM + HOST: nord4.bsc.es #Q: Should we have more login nodes? + PROJECT: bsc32 + ADD_PROJECT_TO_HOST: "false" + USER: #bsc32734 + PROCESSORS_PER_NODE: #16 + SERIAL_QUEUE: debug + QUEUE: bsc_es + SCRATCH_DIR: /gpfs/scratch + CUSTOM_DIRECTIVES: # "['#SBATCH --exclusive']" "['#SBATCH --constraint=medmem']" +# MAX_WALLCLOCK: '48:00' +#Q: ARE THESE SETTING CORRECT? diff --git a/inst/chunking/Autosubmit/startR_autosubmit.sh b/inst/chunking/Autosubmit/startR_autosubmit.sh new file mode 100644 index 0000000..99347ea --- /dev/null +++ b/inst/chunking/Autosubmit/startR_autosubmit.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +######## AUTOSUBMIT INPUTS ####### +proj_dir=%PROJDIR% +task_path=%DEFAULT.EXPID% +chunknum=%JOBNAME% #e.g., a68h_CHUNK_0 +chunknum="${chunknum:5}" #e.g., CHUNK_0 +################################## + +# Modified by write_bash.R +# e.g., chunk_args=(%JOBS."${chunknum}".dat% %JOBS."${chunknum}".dat_N% %JOBS."${chunknum}".var% %JOBS."${chunknum}".var_N% %JOBS."${chunknum}".sdate% %JOBS."${chunknum}".sdate_N%) +chunk_args= + +include_init_commands +include_module_load + +#Should move to the path that has load_process_save_chunk_AS.R +cd ${proj_dir} +#cd /esarchive/autosubmit/%EXPID%/proj/STARTR_CHUNKING_${task_path} + +#Q: Which path under /autosubmit/ can save random things? + +#e.g., Rscript load_process_save_chunk.R --args $task_path 1 1 1 1 2 2 1 1 1 2 1 2 +Rscript load_process_save_chunk_AS.R --args ${task_path} ${chunk_args[@]} + -- GitLab From 79c6b357d9c387c48c1f425bae1e18df26c5e9d1 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 27 Jul 2023 17:30:03 +0200 Subject: [PATCH 07/46] Change testing phase path to /esarchive/scratch/ --- R/ByChunks_autosubmit.R | 6 ++---- R/Utils.R | 4 ++-- inst/chunking/Autosubmit/expdef.yml | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index a246676..82773e6 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -450,8 +450,8 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', if (on_cluster) { # Copy load_process_save_chunk_AS.R into local folder #TODO: Change the following line to read from package - chunk_script <- file("/esarchive/scratch/aho/git/aho-testtest/startR/autosubmit/load_process_save_chunk_AS.R") -# chunk_script <- file(system.file('chunking/autosubmit/load_process_save_chunk_AS.R', + chunk_script <- file("/esarchive/scratch/aho/git/startR/inst/chunking/Autosubmit/load_process_save_chunk_AS.R") +# chunk_script <- file(system.file('chunking/Autosubmit/load_process_save_chunk_AS.R', # package = 'startR')) chunk_script_lines <- readLines(chunk_script) close(chunk_script) @@ -509,8 +509,6 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', write_autosubmit_bash(chunks, cluster, autosubmit_suite_dir = autosubmit_suite_dir) # Modify conf files from template and rewrite to /esarchive/autosubmit/expid/conf/ - #TODO: remove source() and put function under R/ or just below ByChunks_autosubmit() - source("/esarchive/scratch/aho/git/aho-testtest/startR/autosubmit/write_autosubmit_conf.R") write_autosubmit_conf(chunks, cluster, autosubmit_suite_dir) } # if on_cluster diff --git a/R/Utils.R b/R/Utils.R index 5bf8988..fe49148 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -888,7 +888,7 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { #TODO: Change to the following line getting .sh template from package # bash_script <- file(system.file('chunking/Autosubmit/startR_autosubmit.sh', # package = 'startR')) - bash_script_template <- file("/home/Earth/aho/startR/inst/chunking/Autosubmit/startR_autosubmit.sh") + bash_script_template <- file("/esarchive/scratch/aho/startR/inst/chunking/Autosubmit/startR_autosubmit.sh") bash_script_lines <- readLines(bash_script_template) close(bash_script_template) @@ -928,7 +928,7 @@ write_autosubmit_conf <- function(chunks, cluster, autosubmit_suite_dir) { # Get config template files from package #TODO: Change to package path # template_dir <- system.file('chunking/Autosubmit/', package = 'startR') - template_dir <- "/home/Earth/aho/startR/inst/chunking/Autosubmit/" + template_dir <- "/esarchive/scratch/aho/startR/inst/chunking/Autosubmit/" config_files <- list.files(template_dir, pattern = "*\\.yml") for (i_file in config_files) { diff --git a/inst/chunking/Autosubmit/expdef.yml b/inst/chunking/Autosubmit/expdef.yml index a97141c..624040d 100644 --- a/inst/chunking/Autosubmit/expdef.yml +++ b/inst/chunking/Autosubmit/expdef.yml @@ -22,7 +22,7 @@ svn: PROJECT_URL: '' PROJECT_REVISION: '' local: - PROJECT_PATH: #'/esarchive/scratch/aho/tmp/startR_as/my_project' + PROJECT_PATH: #'/home/Earth/aho/startR_local_autosubmit/' project_files: FILE_PROJECT_CONF: '' FILE_JOBS_CONF: '' -- GitLab From 3679ac082e94ce5f618561aea2b6059a9eb10276 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 27 Jul 2023 17:35:00 +0200 Subject: [PATCH 08/46] Correct path --- R/Utils.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/Utils.R b/R/Utils.R index fe49148..31f4ed7 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -888,7 +888,7 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { #TODO: Change to the following line getting .sh template from package # bash_script <- file(system.file('chunking/Autosubmit/startR_autosubmit.sh', # package = 'startR')) - bash_script_template <- file("/esarchive/scratch/aho/startR/inst/chunking/Autosubmit/startR_autosubmit.sh") + bash_script_template <- file("/esarchive/scratch/aho/git/startR/inst/chunking/Autosubmit/startR_autosubmit.sh") bash_script_lines <- readLines(bash_script_template) close(bash_script_template) @@ -928,7 +928,7 @@ write_autosubmit_conf <- function(chunks, cluster, autosubmit_suite_dir) { # Get config template files from package #TODO: Change to package path # template_dir <- system.file('chunking/Autosubmit/', package = 'startR') - template_dir <- "/esarchive/scratch/aho/startR/inst/chunking/Autosubmit/" + template_dir <- "/esarchive/scratch/aho/git/startR/inst/chunking/Autosubmit/" config_files <- list.files(template_dir, pattern = "*\\.yml") for (i_file in config_files) { -- GitLab From 860a1fce225a3869d7d359f7ab369fbadc5d930b Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 28 Jul 2023 15:21:22 +0200 Subject: [PATCH 09/46] Reorganize chunking folder; rename ByChunks() and load_process_save_chunk.R --- R/ByChunks_autosubmit.R | 8 +-- R/{ByChunks.R => ByChunks_ecflow.R} | 61 +++++++++++++------ R/Compute.R | 18 +++--- ...R => load_process_save_chunk_autosubmit.R} | 0 inst/chunking/Autosubmit/startR_autosubmit.sh | 6 +- inst/chunking/{ => ecFlow}/Chunk.ecf | 2 +- inst/chunking/{ => ecFlow}/clean_devshm.sh | 0 inst/chunking/{ => ecFlow}/head.h | 0 .../load_process_save_chunk_ecflow.R} | 0 inst/chunking/{ => ecFlow}/lsf.h | 0 inst/chunking/{ => ecFlow}/pbs.h | 0 inst/chunking/{ => ecFlow}/slurm.h | 0 inst/chunking/{ => ecFlow}/tail.h | 0 13 files changed, 58 insertions(+), 37 deletions(-) rename R/{ByChunks.R => ByChunks_ecflow.R} (93%) rename inst/chunking/Autosubmit/{load_process_save_chunk_AS.R => load_process_save_chunk_autosubmit.R} (100%) rename inst/chunking/{ => ecFlow}/Chunk.ecf (82%) rename inst/chunking/{ => ecFlow}/clean_devshm.sh (100%) rename inst/chunking/{ => ecFlow}/head.h (100%) rename inst/chunking/{load_process_save_chunk.R => ecFlow/load_process_save_chunk_ecflow.R} (100%) rename inst/chunking/{ => ecFlow}/lsf.h (100%) rename inst/chunking/{ => ecFlow}/pbs.h (100%) rename inst/chunking/{ => ecFlow}/slurm.h (100%) rename inst/chunking/{ => ecFlow}/tail.h (100%) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 82773e6..1d0a84b 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -448,10 +448,10 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', } if (on_cluster) { - # Copy load_process_save_chunk_AS.R into local folder + # Copy load_process_save_chunk_autosubmit.R into local folder #TODO: Change the following line to read from package - chunk_script <- file("/esarchive/scratch/aho/git/startR/inst/chunking/Autosubmit/load_process_save_chunk_AS.R") -# chunk_script <- file(system.file('chunking/Autosubmit/load_process_save_chunk_AS.R', + chunk_script <- file("/esarchive/scratch/aho/git/startR/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R") +# chunk_script <- file(system.file('chunking/Autosubmit/load_process_save_chunk_autosubmit.R', # package = 'startR')) chunk_script_lines <- readLines(chunk_script) close(chunk_script) @@ -503,7 +503,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', chunk_script_lines) chunk_script_lines <- gsub('^params <- *', paste0('params <- ', paste(deparse(list(...)), collapse = '\n')), chunk_script_lines) - writeLines(chunk_script_lines, paste0(autosubmit_suite_dir_suite, '/load_process_save_chunk_AS.R')) + writeLines(chunk_script_lines, paste0(autosubmit_suite_dir_suite, '/load_process_save_chunk_autosubmit.R')) # Write and copy startR_autosubmit.sh into local folder write_autosubmit_bash(chunks, cluster, autosubmit_suite_dir = autosubmit_suite_dir) diff --git a/R/ByChunks.R b/R/ByChunks_ecflow.R similarity index 93% rename from R/ByChunks.R rename to R/ByChunks_ecflow.R index 547b6ea..5dc306d 100644 --- a/R/ByChunks.R +++ b/R/ByChunks_ecflow.R @@ -82,13 +82,13 @@ #'@import multiApply #'@importFrom methods is #'@noRd -ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', - threads_load = 2, threads_compute = 1, - cluster = NULL, - ecflow_suite_dir = NULL, - ecflow_server = NULL, - silent = FALSE, debug = FALSE, - wait = TRUE) { +ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', + threads_load = 2, threads_compute = 1, + cluster = NULL, + ecflow_suite_dir = NULL, + ecflow_server = NULL, + silent = FALSE, debug = FALSE, + wait = TRUE) { # Build object to store profiling timings t_begin_total <- Sys.time() t_begin_bychunks_setup <- t_begin_total @@ -436,9 +436,11 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', ". Make sure passwordless ", "access is properly set in both directions.")) - # Copy load_process_save_chunk.R into shared folder - chunk_script <- file(system.file('chunking/load_process_save_chunk.R', - package = 'startR')) + # Copy load_process_save_chunk_ecflow.R into shared folder + #TODO: Change to package file + chunk_script <- file("/esarchive/scratch/aho/git/startR/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R") +# chunk_script <- file(system.file('chunking/ecFlow/load_process_save_chunk_ecflow.R', +# package = 'startR')) chunk_script_lines <- readLines(chunk_script) close(chunk_script) chunk_script_lines <- gsub('^lib_dir <- *', paste0('lib_dir <- ', @@ -488,11 +490,13 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', chunk_script_lines) chunk_script_lines <- gsub('^params <- *', paste0('params <- ', paste(deparse(list(...)), collapse = '\n')), chunk_script_lines) - writeLines(chunk_script_lines, paste0(ecflow_suite_dir_suite, '/load_process_save_chunk.R')) + writeLines(chunk_script_lines, paste0(ecflow_suite_dir_suite, '/load_process_save_chunk_ecflow.R')) # Copy Chunk.ecf into shared folder - chunk_ecf_script <- file(system.file('chunking/Chunk.ecf', - package = 'startR')) + #TODO: Change to package file + chunk_ecf_script <- file("/esarchive/scratch/aho/git/startR/inst/chunking/ecFlow/Chunk.ecf") +# chunk_ecf_script <- file(system.file('chunking/ecFlow/Chunk.ecf', +# package = 'startR')) chunk_ecf_script_lines <- readLines(chunk_ecf_script) close(chunk_ecf_script) if (cluster[['queue_type']] == 'host') { @@ -530,8 +534,8 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', # } else { # transfer_back_line <- '' # } - chunk_ecf_script_lines <- gsub('^Rscript load_process_save_chunk.R --args \\$task_path insert_indices', - paste0('Rscript load_process_save_chunk.R --args $task_path ', paste(ecf_vars, collapse = ' ')), + chunk_ecf_script_lines <- gsub('^Rscript load_process_save_chunk_ecflow.R --args \\$task_path insert_indices', + paste0('Rscript load_process_save_chunk_ecflow.R --args $task_path ', paste(ecf_vars, collapse = ' ')), chunk_ecf_script_lines) #chunk_ecf_script_lines <- gsub('^include_transfer_back_and_rm', transfer_back_line, chunk_ecf_script_lines) writeLines(chunk_ecf_script_lines, paste0(ecflow_suite_dir_suite, '/Chunk.ecf')) @@ -557,7 +561,9 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', # Copy queue header into shared folder #file.copy(system.file(paste0('chunking/', cluster[['queue_type']], '.h'), package = 'startR'), # ecflow_suite_dir_suite) - chunk_queue_header <- file(system.file(paste0('chunking/', cluster[['queue_type']], '.h'), package = 'startR')) + #TODO: Change to package file + chunk_queue_header <- file(paste0("/esarchive/scratch/aho/git/startR/inst/chunking/ecFlow/", cluster[['queue_type']], '.h')) +# chunk_queue_header <- file(system.file(paste0('chunking/ecFlow/', cluster[['queue_type']], '.h'), package = 'startR')) chunk_queue_header_lines <- readLines(chunk_queue_header) close(chunk_queue_header) chunk_queue_header_lines <- gsub('^include_extra_queue_params', @@ -566,10 +572,13 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', writeLines(chunk_queue_header_lines, paste0(ecflow_suite_dir_suite, '/', cluster[['queue_type']], '.h')) # Copy headers - file.copy(system.file('chunking/head.h', package = 'startR'), - ecflow_suite_dir_suite) - file.copy(system.file('chunking/tail.h', package = 'startR'), - ecflow_suite_dir_suite) + #TODO: Change to package file + file.copy(file("/esarchive/scratch/aho/git/startR/inst/chunking/ecFlow/head.h"), ecflow_suite_dir_suite) + file.copy(file("/esarchive/scratch/aho/git/startR/inst/chunking/ecFlow/tail.h"), ecflow_suite_dir_suite) +# file.copy(system.file('chunking/ecFlow/head.h', package = 'startR'), +# ecflow_suite_dir_suite) +# file.copy(system.file('chunking/ecFlow/tail.h', package = 'startR'), +# ecflow_suite_dir_suite) #file.copy(system.file('chunking/clean_devshm.sh', package = 'startR'), # ecflow_suite_dir_suite) } @@ -1008,3 +1017,15 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', } #TODO: check result dimensions match expected dimensions } + +ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', + threads_load = 2, threads_compute = 1, + cluster = NULL, + ecflow_suite_dir = NULL, + ecflow_server = NULL, + silent = FALSE, debug = FALSE, + wait = TRUE) { + + stop(.Deprecated("ByChunks_ecflow")) +} + diff --git a/R/Compute.R b/R/Compute.R index c9f7426..81f83e6 100644 --- a/R/Compute.R +++ b/R/Compute.R @@ -164,15 +164,15 @@ Compute <- function(workflow, chunks = 'auto', workflow_manager = 'ecFlow', } else { # ecFlow or run locally - res <- ByChunks(step_fun = operation, - cube_headers = workflow$inputs, - chunks = chunks, - threads_load = threads_load, - threads_compute = threads_compute, - cluster = cluster, - ecflow_suite_dir = ecflow_suite_dir, - ecflow_server = ecflow_server, - silent = silent, debug = debug, wait = wait) + res <- ByChunks_ecflow(step_fun = operation, + cube_headers = workflow$inputs, + chunks = chunks, + threads_load = threads_load, + threads_compute = threads_compute, + cluster = cluster, + ecflow_suite_dir = ecflow_suite_dir, + ecflow_server = ecflow_server, + silent = silent, debug = debug, wait = wait) } # TODO: carry out remaining steps locally, using multiApply diff --git a/inst/chunking/Autosubmit/load_process_save_chunk_AS.R b/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R similarity index 100% rename from inst/chunking/Autosubmit/load_process_save_chunk_AS.R rename to inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R diff --git a/inst/chunking/Autosubmit/startR_autosubmit.sh b/inst/chunking/Autosubmit/startR_autosubmit.sh index 99347ea..d0c16d5 100644 --- a/inst/chunking/Autosubmit/startR_autosubmit.sh +++ b/inst/chunking/Autosubmit/startR_autosubmit.sh @@ -14,12 +14,12 @@ chunk_args= include_init_commands include_module_load -#Should move to the path that has load_process_save_chunk_AS.R +#Should move to the path that has load_process_save_chunk_autosubmit.R cd ${proj_dir} #cd /esarchive/autosubmit/%EXPID%/proj/STARTR_CHUNKING_${task_path} #Q: Which path under /autosubmit/ can save random things? -#e.g., Rscript load_process_save_chunk.R --args $task_path 1 1 1 1 2 2 1 1 1 2 1 2 -Rscript load_process_save_chunk_AS.R --args ${task_path} ${chunk_args[@]} +#e.g., Rscript load_process_save_chunk_autosubmit.R --args $task_path 1 1 1 1 2 2 1 1 1 2 1 2 +Rscript load_process_save_chunk_autosubmit.R --args ${task_path} ${chunk_args[@]} diff --git a/inst/chunking/Chunk.ecf b/inst/chunking/ecFlow/Chunk.ecf similarity index 82% rename from inst/chunking/Chunk.ecf rename to inst/chunking/ecFlow/Chunk.ecf index 60bd051..5a265fb 100644 --- a/inst/chunking/Chunk.ecf +++ b/inst/chunking/ecFlow/Chunk.ecf @@ -12,7 +12,7 @@ set -vx cd %REMOTE_ECF_HOME% task_path=%REMOTE_ECF_HOME%/%ECF_NAME% -Rscript load_process_save_chunk.R --args $task_path insert_indices +Rscript load_process_save_chunk_ecflow.R --args $task_path insert_indices #include_transfer_back_and_rm #clean temporal folder diff --git a/inst/chunking/clean_devshm.sh b/inst/chunking/ecFlow/clean_devshm.sh similarity index 100% rename from inst/chunking/clean_devshm.sh rename to inst/chunking/ecFlow/clean_devshm.sh diff --git a/inst/chunking/head.h b/inst/chunking/ecFlow/head.h similarity index 100% rename from inst/chunking/head.h rename to inst/chunking/ecFlow/head.h diff --git a/inst/chunking/load_process_save_chunk.R b/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R similarity index 100% rename from inst/chunking/load_process_save_chunk.R rename to inst/chunking/ecFlow/load_process_save_chunk_ecflow.R diff --git a/inst/chunking/lsf.h b/inst/chunking/ecFlow/lsf.h similarity index 100% rename from inst/chunking/lsf.h rename to inst/chunking/ecFlow/lsf.h diff --git a/inst/chunking/pbs.h b/inst/chunking/ecFlow/pbs.h similarity index 100% rename from inst/chunking/pbs.h rename to inst/chunking/ecFlow/pbs.h diff --git a/inst/chunking/slurm.h b/inst/chunking/ecFlow/slurm.h similarity index 100% rename from inst/chunking/slurm.h rename to inst/chunking/ecFlow/slurm.h diff --git a/inst/chunking/tail.h b/inst/chunking/ecFlow/tail.h similarity index 100% rename from inst/chunking/tail.h rename to inst/chunking/ecFlow/tail.h -- GitLab From aed5a4f51ec9a466dfe3e037d0b093576968c352 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 28 Jul 2023 16:44:45 +0200 Subject: [PATCH 10/46] minor bugfix --- R/Collect.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/Collect.R b/R/Collect.R index 73e06b9..70b52db 100644 --- a/R/Collect.R +++ b/R/Collect.R @@ -394,8 +394,8 @@ Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE) { } else if (!wait) { stop("Computation in progress...") } else { - Sys.sleep(cluster[['polling_period']]) - message("Computation in progress, ", sum_received_chunks, " of ", num_outputs, " chunks are done...\n", + Sys.sleep(startr_exec$cluster[['polling_period']]) + message("Computation in progress, ", sum_received_chunks, " of ", prod(unlist(chunks)), " chunks are done...\n", "Check status on Autosubmit GUI: https://earth.bsc.es/autosubmitapp/experiment/", suite_id) # Sys.sleep(min(sqrt(attempt), 5)) } -- GitLab From 1bc09e11789895c2e2d8d669696f993e965a7e1c Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 31 Jul 2023 13:28:17 +0200 Subject: [PATCH 11/46] Add run_dir --- R/ByChunks_autosubmit.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 1d0a84b..f276284 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -192,7 +192,8 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', polling_period = 10, special_setup = 'none', expid = NULL, - hpc_user = NULL) + hpc_user = NULL, + run_dir = NULL) if (on_cluster) { if (!is.list(cluster)) { stop("Parameter 'cluster' must be a named list.") @@ -206,7 +207,8 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', 'ecflow_module', 'node_memory', 'cores_per_job', 'job_wallclock', 'max_jobs', 'extra_queue_params', 'bidirectional', - 'polling_period', 'special_setup', 'expid', 'hpc_user')))) { + 'polling_period', 'special_setup', 'expid', 'hpc_user', + 'run_dir')))) { stop("Found invalid component names in parameter 'cluster'.") } # Remove ecFlow components @@ -697,6 +699,9 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', sys_commands <- paste0("module load ", as_module, "; ", "autosubmit create ", suite_id, " -np; ", "autosubmit refresh ", suite_id, "; ") + if (!is.null(cluster$run_dir)) { + sys_commands <- paste0("cd ", cluster$run_dir, "; ", sys_commands) + } if (wait) { sys_commands <- paste0(sys_commands, "autosubmit run ", suite_id) } else { -- GitLab From 9786b0c45d461d2fe4160aeaaf9834a57c0f94fb Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 31 Jul 2023 14:33:02 +0200 Subject: [PATCH 12/46] Cannot have run_dir; autosubmit run under proj/ (PROJECT_PATH) automatically --- R/ByChunks_autosubmit.R | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index f276284..e0417e9 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -192,8 +192,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', polling_period = 10, special_setup = 'none', expid = NULL, - hpc_user = NULL, - run_dir = NULL) + hpc_user = NULL) if (on_cluster) { if (!is.list(cluster)) { stop("Parameter 'cluster' must be a named list.") @@ -207,8 +206,8 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', 'ecflow_module', 'node_memory', 'cores_per_job', 'job_wallclock', 'max_jobs', 'extra_queue_params', 'bidirectional', - 'polling_period', 'special_setup', 'expid', 'hpc_user', - 'run_dir')))) { + 'polling_period', 'special_setup', 'expid', 'hpc_user' + )))) { stop("Found invalid component names in parameter 'cluster'.") } # Remove ecFlow components @@ -699,9 +698,6 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', sys_commands <- paste0("module load ", as_module, "; ", "autosubmit create ", suite_id, " -np; ", "autosubmit refresh ", suite_id, "; ") - if (!is.null(cluster$run_dir)) { - sys_commands <- paste0("cd ", cluster$run_dir, "; ", sys_commands) - } if (wait) { sys_commands <- paste0(sys_commands, "autosubmit run ", suite_id) } else { -- GitLab From 6fe80d79012c58d977f9a2da0a4524bbc8ee72f0 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 3 Aug 2023 16:09:28 +0200 Subject: [PATCH 13/46] Use yaml instead of configr --- R/Utils.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/Utils.R b/R/Utils.R index 31f4ed7..69a8e03 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -929,11 +929,11 @@ write_autosubmit_conf <- function(chunks, cluster, autosubmit_suite_dir) { #TODO: Change to package path # template_dir <- system.file('chunking/Autosubmit/', package = 'startR') template_dir <- "/esarchive/scratch/aho/git/startR/inst/chunking/Autosubmit/" - config_files <- list.files(template_dir, pattern = "*\\.yml") + config_files <- list.files(template_dir, pattern = "*\\.yml$") for (i_file in config_files) { - conf <- configr::read.config(file.path(template_dir, i_file)) + conf <- yaml::read_yaml(file.path(template_dir, i_file)) conf_type <- strsplit(i_file, split = "[.]")[[1]][1] ############################################################ @@ -1005,7 +1005,7 @@ write_autosubmit_conf <- function(chunks, cluster, autosubmit_suite_dir) { dest_file <- paste0(conf_type, "_", cluster$expid, ".yml") # Write config file inside autosubmit dir - write.config(conf, paste0(dest_dir, dest_file), write.type = "yaml") + yaml::write_yaml(conf, paste0(dest_dir, dest_file)) Sys.chmod(paste0(dest_dir, dest_file), mode = "755", use_umask = F) } # for loop each file -- GitLab From 8e5b55bee2487a2d335c2530d7753229ff3b68a5 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 3 Aug 2023 16:10:26 +0200 Subject: [PATCH 14/46] Add yaml to Suggest --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2d569fe..173baca 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -35,7 +35,8 @@ Imports: Suggests: stats, utils, - testthat + testthat, + yaml License: GPL-3 URL: https://earth.bsc.es/gitlab/es/startR/ BugReports: https://earth.bsc.es/gitlab/es/startR/-/issues -- GitLab From 2e9c6d70e9049676c7b946b61cbd08d9c3227923 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 3 Aug 2023 19:37:08 +0200 Subject: [PATCH 15/46] Add check to see if jobs failed. If yes, stop the function --- R/ByChunks_autosubmit.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index e0417e9..5d0bfc4 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -718,6 +718,10 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', stop("Cannot identify host", Sys.getenv("HOSTNAME"), ". Where to run AS exp?") } + # Check the size of tmp/ASLOGS/jobs_failed_status.log. If it is not 0, the jobs failed. + failed_file_size <- system(paste0("du /esarchive/autosubmit/", suite_id, "/tmp/ASLOGS/jobs_failed_status.log"), intern = T) + if (substr(failed_file_size, 1, 1) != 0) stop("Some Autosubmit jobs failed. Check GUI and logs.") + timings[['total']] <- t_begin_total startr_exec <- list(cluster = cluster, workflow_manager = 'autosubmit', suite_id = suite_id, chunks = chunks, -- GitLab From 25eeb54de91abd089d026969660fc105a226403a Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 4 Aug 2023 10:00:59 +0200 Subject: [PATCH 16/46] Change threads_load default to 1 (be consistent with documentation) --- R/ByChunks_autosubmit.R | 2 +- R/ByChunks_ecflow.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 5d0bfc4..094d77b 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -83,7 +83,7 @@ #'@importFrom methods is #'@noRd ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', - threads_load = 2, threads_compute = 1, + threads_load = 1, threads_compute = 1, cluster = NULL, autosubmit_suite_dir = NULL, autosubmit_server = NULL, silent = FALSE, debug = FALSE, wait = TRUE) { diff --git a/R/ByChunks_ecflow.R b/R/ByChunks_ecflow.R index 5dc306d..74f0932 100644 --- a/R/ByChunks_ecflow.R +++ b/R/ByChunks_ecflow.R @@ -83,7 +83,7 @@ #'@importFrom methods is #'@noRd ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', - threads_load = 2, threads_compute = 1, + threads_load = 1, threads_compute = 1, cluster = NULL, ecflow_suite_dir = NULL, ecflow_server = NULL, -- GitLab From 64f9d736c8bfed30e573ca9e3df59dd7e27f3e2f Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 8 Aug 2023 13:13:31 +0200 Subject: [PATCH 17/46] Remove bigmemory files after failure; remove folder after job finished; add 'run_dir' --- R/ByChunks_autosubmit.R | 33 +++++++++++++++---- R/Collect.R | 30 +++++++++++++---- R/Utils.R | 13 +++++++- inst/chunking/Autosubmit/startR_autosubmit.sh | 10 +++--- 4 files changed, 67 insertions(+), 19 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 094d77b..70e750e 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -154,12 +154,10 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', if (!is.character(autosubmit_suite_dir)) { stop("Parameter 'autosubmit_suite_dir' must be a character string.") } -#----------NEW----------- autosubmit_suite_dir_suite <- paste0(autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') if (!dir.exists(autosubmit_suite_dir_suite)) { dir.create(autosubmit_suite_dir_suite, recursive = TRUE) } -#--------NEW_END---------- if (!dir.exists(autosubmit_suite_dir_suite)) { stop("Could not find or create the directory in parameter 'autosubmit_suite_dir'.") } @@ -172,7 +170,6 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', } } -#----------NEW----------- # Check cluster default_cluster <- list(queue_host = NULL, # queue_type = 'slurm', @@ -192,7 +189,10 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', polling_period = 10, special_setup = 'none', expid = NULL, - hpc_user = NULL) + hpc_user = NULL, +#------------NEW----------- + run_dir = NULL) +#---------NEW_END-------------- if (on_cluster) { if (!is.list(cluster)) { stop("Parameter 'cluster' must be a named list.") @@ -206,7 +206,10 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', 'ecflow_module', 'node_memory', 'cores_per_job', 'job_wallclock', 'max_jobs', 'extra_queue_params', 'bidirectional', - 'polling_period', 'special_setup', 'expid', 'hpc_user' + 'polling_period', 'special_setup', 'expid', 'hpc_user', +#------------NEW----------- + 'run_dir' +#---------NEW_END-------------- )))) { stop("Found invalid component names in parameter 'cluster'.") } @@ -720,7 +723,25 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', # Check the size of tmp/ASLOGS/jobs_failed_status.log. If it is not 0, the jobs failed. failed_file_size <- system(paste0("du /esarchive/autosubmit/", suite_id, "/tmp/ASLOGS/jobs_failed_status.log"), intern = T) - if (substr(failed_file_size, 1, 1) != 0) stop("Some Autosubmit jobs failed. Check GUI and logs.") +#------------NEW---------------- + if (substr(failed_file_size, 1, 1) != 0) { + # Remove bigmemory objects (e.g., a68h_1_1 and a68h_1_1.desc) if they exist + # If run_dir is specified, the files are under run_dir; if not, files are under proj/STARTR_CHUNKING_xxxx/ + if (!is.null(cluster[['run_dir']])) { + file.remove( + file.path(run_dir, + list.files(run_dir)[grepl(paste0("^", suite_id, "_.*"), list.files(run_dir))]) + ) + } else { + file.remove( + file.path(remote_autosubmit_suite_dir_suite, + list.files(remote_autosubmit_suite_dir_suite)[grepl(paste0("^", suite_id, "_.*"), list.files(remote_autosubmit_suite_dir_suite))]) + ) + } + + stop("Some Autosubmit jobs failed. Check GUI and logs.") + } +#-----------NEW_END---------- timings[['total']] <- t_begin_total startr_exec <- list(cluster = cluster, workflow_manager = 'autosubmit', diff --git a/R/Collect.R b/R/Collect.R index 70b52db..73eaa07 100644 --- a/R/Collect.R +++ b/R/Collect.R @@ -19,8 +19,10 @@ #' merged array. The default value is TRUE. #'@param remove A logical value deciding whether to remove of all data results #' received from the HPC (and stored under 'ecflow_suite_dir', the parameter in -#' Compute()) after being collected. To preserve the data and Collect() it as -#' many times as desired, set remove to FALSE. The default value is TRUE. +#' Compute()) after being collected, as well as the local job folder under +#' 'ecflow_suite_dir' or 'autosubmit_suite_dir'. To preserve the data and +#' Collect() it as many times as desired, set remove to FALSE. The default +#' value is TRUE. #'@return A list of merged data array. #' #'@examples @@ -382,7 +384,7 @@ Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE) { autosubmit_suite_dir_suite <- paste0(autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') remote_autosubmit_suite_dir <- file.path("/esarchive/autosubmit/", suite_id, 'proj') remote_autosubmit_suite_dir_suite <- paste0(remote_autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') - + run_dir <- startr_exec[['run_dir']] done <- FALSE sum_received_chunks <- sum(grepl('.*\\.Rds$', list.files(remote_autosubmit_suite_dir_suite))) @@ -406,13 +408,27 @@ Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE) { if (remove) { .warning("ATTENTION: The source chunks will be removed from the ", "system. Store the result after Collect() ends if needed.") +#--------NEW---------- + unlink(paste0(autosubmit_suite_dir_suite), + recursive = TRUE) +#---------NEW_END-------------- } # Remove bigmemory objects (e.g., a68h_1_1_1_1_1 and a68h_1_1_1_1_1.desc) - file.remove( - file.path(remote_autosubmit_suite_dir_suite, - list.files(remote_autosubmit_suite_dir_suite)[grepl(paste0("^", suite_id, "_.*"), list.files(remote_autosubmit_suite_dir_suite))]) - ) +#-----------NEW------------ + # If run_dir is specified, the files are under run_dir; if not, files are under proj/STARTR_CHUNKING_xxxx/ + if (!is.null(run_dir)) { + file.remove( + file.path(run_dir, + list.files(run_dir)[grepl(paste0("^", suite_id, "_.*"), list.files(run_dir))]) + ) + } else { + file.remove( + file.path(remote_autosubmit_suite_dir_suite, + list.files(remote_autosubmit_suite_dir_suite)[grepl(paste0("^", suite_id, "_.*"), list.files(remote_autosubmit_suite_dir_suite))]) + ) + } +#---------NEW_END---------- return(result) } diff --git a/R/Utils.R b/R/Utils.R index 69a8e03..37fb6f2 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -904,7 +904,18 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { bash_script_lines <- gsub('^include_module_load', paste0('module load ', cluster[['r_module']]), bash_script_lines) - +#----------NEW--------- + # Rewrite cd run_dir + # If run_dir is not specified, the script will run under ${proj_dir} + if (!is.null(cluster[['run_dir']])) { + bash_script_lines <- gsub('^cd_run_dir', + paste0('cd ', cluster[['run_dir']]), + bash_script_lines) + } else { + bash_script_lines <- gsub('^cd_run_dir', '#cd_run_dir', + bash_script_lines) + } +#---------NEW_END---------- # Save modified .sh file under local$PROJECT_PATH in expdef.yml #NOTE: dest_dir is ecflow_suite_dir_suite in ByChunks_autosubmit() #NOTE: the file will be copied to proj/ by "autosubmit create" diff --git a/inst/chunking/Autosubmit/startR_autosubmit.sh b/inst/chunking/Autosubmit/startR_autosubmit.sh index d0c16d5..63d9e61 100644 --- a/inst/chunking/Autosubmit/startR_autosubmit.sh +++ b/inst/chunking/Autosubmit/startR_autosubmit.sh @@ -14,12 +14,12 @@ chunk_args= include_init_commands include_module_load -#Should move to the path that has load_process_save_chunk_autosubmit.R -cd ${proj_dir} -#cd /esarchive/autosubmit/%EXPID%/proj/STARTR_CHUNKING_${task_path} +##Should move to the path that has load_process_save_chunk_autosubmit.R +#cd ${proj_dir} -#Q: Which path under /autosubmit/ can save random things? +# move to run_dir +cd_run_dir #e.g., Rscript load_process_save_chunk_autosubmit.R --args $task_path 1 1 1 1 2 2 1 1 1 2 1 2 -Rscript load_process_save_chunk_autosubmit.R --args ${task_path} ${chunk_args[@]} +Rscript ${proj_dir}/load_process_save_chunk_autosubmit.R --args ${task_path} ${chunk_args[@]} -- GitLab From 15c67ede2b64d50283ca4c6afc856480bae2f722 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 8 Aug 2023 16:02:30 +0200 Subject: [PATCH 18/46] Correct run_dir --- R/Collect.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Collect.R b/R/Collect.R index 73eaa07..fc89ed5 100644 --- a/R/Collect.R +++ b/R/Collect.R @@ -384,7 +384,7 @@ Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE) { autosubmit_suite_dir_suite <- paste0(autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') remote_autosubmit_suite_dir <- file.path("/esarchive/autosubmit/", suite_id, 'proj') remote_autosubmit_suite_dir_suite <- paste0(remote_autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') - run_dir <- startr_exec[['run_dir']] + run_dir <- startr_exec$cluster[['run_dir']] done <- FALSE sum_received_chunks <- sum(grepl('.*\\.Rds$', list.files(remote_autosubmit_suite_dir_suite))) -- GitLab From 75cb16ecf8dd3f1c2ac79dd94a2be4e7cf182dee Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 8 Aug 2023 18:08:31 +0200 Subject: [PATCH 19/46] Add names to data inputs so Apply won't return warnings later --- .../Autosubmit/load_process_save_chunk_autosubmit.R | 6 ++++++ inst/chunking/ecFlow/load_process_save_chunk_ecflow.R | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R b/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R index f7211d5..1f8b6f9 100644 --- a/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R +++ b/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R @@ -30,6 +30,12 @@ names(chunks) <- param_dimnames t_begin_load <- Sys.time() data <- vector('list', length(start_calls)) +#---------NEW-------------- +# Add data names if data input has names +if (!is.null(names(start_calls_attrs))) { + names(data) <- names(start_calls_attrs) +} +#--------NEW_END----------- for (input in 1:length(data)) { start_call <- start_calls[[input]] call_dims <- names(start_calls_attrs[[input]][['Dimensions']]) diff --git a/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R b/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R index b7b73a9..ee3aa04 100644 --- a/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R +++ b/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R @@ -37,6 +37,12 @@ t_job_setup <- as.numeric(difftime(t_end_job_setup, t_begin_job_setup, units = ' t_begin_load <- Sys.time() data <- vector('list', length(start_calls)) +#---------NEW-------------- +# Add data names if data input has names +if (!is.null(names(start_calls_attrs))) { + names(data) <- names(start_calls_attrs) +} +#--------NEW_END----------- for (input in 1:length(data)) { start_call <- start_calls[[input]] call_dims <- names(start_calls_attrs[[input]][['Dimensions']]) -- GitLab From ee215fbb1a9878c8defdcb9dea5a2e9e75e6648d Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 8 Aug 2023 18:15:52 +0200 Subject: [PATCH 20/46] Change temp dir --- R/ByChunks_autosubmit.R | 2 +- R/ByChunks_ecflow.R | 10 +++++----- R/Utils.R | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 70e750e..0c6be79 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -454,7 +454,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', if (on_cluster) { # Copy load_process_save_chunk_autosubmit.R into local folder #TODO: Change the following line to read from package - chunk_script <- file("/esarchive/scratch/aho/git/startR/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R") + chunk_script <- file("/esarchive/scratch/aho/tmp/chunking/Autosubmit/load_process_save_chunk_autosubmit.R") # chunk_script <- file(system.file('chunking/Autosubmit/load_process_save_chunk_autosubmit.R', # package = 'startR')) chunk_script_lines <- readLines(chunk_script) diff --git a/R/ByChunks_ecflow.R b/R/ByChunks_ecflow.R index 74f0932..d251015 100644 --- a/R/ByChunks_ecflow.R +++ b/R/ByChunks_ecflow.R @@ -438,7 +438,7 @@ ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', # Copy load_process_save_chunk_ecflow.R into shared folder #TODO: Change to package file - chunk_script <- file("/esarchive/scratch/aho/git/startR/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R") + chunk_script <- file("/esarchive/scratch/aho/tmp/chunking/ecFlow/load_process_save_chunk_ecflow.R") # chunk_script <- file(system.file('chunking/ecFlow/load_process_save_chunk_ecflow.R', # package = 'startR')) chunk_script_lines <- readLines(chunk_script) @@ -494,7 +494,7 @@ ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', # Copy Chunk.ecf into shared folder #TODO: Change to package file - chunk_ecf_script <- file("/esarchive/scratch/aho/git/startR/inst/chunking/ecFlow/Chunk.ecf") + chunk_ecf_script <- file("/esarchive/scratch/aho/tmp/chunking/ecFlow/Chunk.ecf") # chunk_ecf_script <- file(system.file('chunking/ecFlow/Chunk.ecf', # package = 'startR')) chunk_ecf_script_lines <- readLines(chunk_ecf_script) @@ -562,7 +562,7 @@ ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', #file.copy(system.file(paste0('chunking/', cluster[['queue_type']], '.h'), package = 'startR'), # ecflow_suite_dir_suite) #TODO: Change to package file - chunk_queue_header <- file(paste0("/esarchive/scratch/aho/git/startR/inst/chunking/ecFlow/", cluster[['queue_type']], '.h')) + chunk_queue_header <- file(paste0("/esarchive/scratch/aho/tmp/chunking/ecFlow/", cluster[['queue_type']], '.h')) # chunk_queue_header <- file(system.file(paste0('chunking/ecFlow/', cluster[['queue_type']], '.h'), package = 'startR')) chunk_queue_header_lines <- readLines(chunk_queue_header) close(chunk_queue_header) @@ -573,8 +573,8 @@ ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', # Copy headers #TODO: Change to package file - file.copy(file("/esarchive/scratch/aho/git/startR/inst/chunking/ecFlow/head.h"), ecflow_suite_dir_suite) - file.copy(file("/esarchive/scratch/aho/git/startR/inst/chunking/ecFlow/tail.h"), ecflow_suite_dir_suite) + file.copy(file("/esarchive/scratch/aho/tmp/chunking/ecFlow/head.h"), ecflow_suite_dir_suite) + file.copy(file("/esarchive/scratch/aho/tmp/chunking/ecFlow/tail.h"), ecflow_suite_dir_suite) # file.copy(system.file('chunking/ecFlow/head.h', package = 'startR'), # ecflow_suite_dir_suite) # file.copy(system.file('chunking/ecFlow/tail.h', package = 'startR'), diff --git a/R/Utils.R b/R/Utils.R index 37fb6f2..2d9cc5e 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -888,7 +888,7 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { #TODO: Change to the following line getting .sh template from package # bash_script <- file(system.file('chunking/Autosubmit/startR_autosubmit.sh', # package = 'startR')) - bash_script_template <- file("/esarchive/scratch/aho/git/startR/inst/chunking/Autosubmit/startR_autosubmit.sh") + bash_script_template <- file("/esarchive/scratch/aho/tmp/chunking/Autosubmit/startR_autosubmit.sh") bash_script_lines <- readLines(bash_script_template) close(bash_script_template) @@ -939,7 +939,7 @@ write_autosubmit_conf <- function(chunks, cluster, autosubmit_suite_dir) { # Get config template files from package #TODO: Change to package path # template_dir <- system.file('chunking/Autosubmit/', package = 'startR') - template_dir <- "/esarchive/scratch/aho/git/startR/inst/chunking/Autosubmit/" + template_dir <- "/esarchive/scratch/aho/tmp/chunking/Autosubmit/" config_files <- list.files(template_dir, pattern = "*\\.yml$") for (i_file in config_files) { -- GitLab From 748bf10a1ad837ea81dff50a63a9eaa266b9e877 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 8 Aug 2023 18:18:39 +0200 Subject: [PATCH 21/46] rename write_autosubmit_conf to avoid conflict with SUNSET --- R/ByChunks_autosubmit.R | 2 +- R/Utils.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 0c6be79..4850f82 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -513,7 +513,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', write_autosubmit_bash(chunks, cluster, autosubmit_suite_dir = autosubmit_suite_dir) # Modify conf files from template and rewrite to /esarchive/autosubmit/expid/conf/ - write_autosubmit_conf(chunks, cluster, autosubmit_suite_dir) + write_autosubmit_confs(chunks, cluster, autosubmit_suite_dir) } # if on_cluster diff --git a/R/Utils.R b/R/Utils.R index 2d9cc5e..87f36eb 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -929,7 +929,7 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { } # This function generates the .yml files under autosubmit conf/ -write_autosubmit_conf <- function(chunks, cluster, autosubmit_suite_dir) { +write_autosubmit_confs <- function(chunks, cluster, autosubmit_suite_dir) { #TODO: Remove this library(configr) # "chunks" is from Compute() input, e.g., chunks <- list(lat = 2, lon = 3) -- GitLab From 01899116775967babdd5d8f2dffc547829b86f7c Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 9 Aug 2023 09:42:41 +0200 Subject: [PATCH 22/46] Correct run_dir for file cleaning --- R/ByChunks_autosubmit.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 4850f82..fb3e2b7 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -729,8 +729,8 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', # If run_dir is specified, the files are under run_dir; if not, files are under proj/STARTR_CHUNKING_xxxx/ if (!is.null(cluster[['run_dir']])) { file.remove( - file.path(run_dir, - list.files(run_dir)[grepl(paste0("^", suite_id, "_.*"), list.files(run_dir))]) + file.path(cluster[['run_dir']], + list.files(cluster[['run_dir']])[grepl(paste0("^", suite_id, "_.*"), list.files(cluster[['run_dir']]))]) ) } else { file.remove( -- GitLab From ca103df1de1a64d52324bdf50417e001bcb0cb2c Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 9 Aug 2023 13:21:02 +0200 Subject: [PATCH 23/46] Create temp dir if autosubmit_suite_dir is not provided. Move to proj_dir if run_dir is not specified. --- R/ByChunks_autosubmit.R | 19 +++++++++++-------- R/ByChunks_ecflow.R | 6 +++--- R/Collect.R | 4 ---- R/Utils.R | 5 ++--- .../load_process_save_chunk_autosubmit.R | 2 -- .../ecFlow/load_process_save_chunk_ecflow.R | 2 -- 6 files changed, 16 insertions(+), 22 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index fb3e2b7..4b45198 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -137,7 +137,6 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', on_cluster <- !is.null(cluster) - # Check autosubmit_suite_dir suite_id <- cluster[['expid']] #NOTE: @@ -148,8 +147,17 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', autosubmit_suite_dir_suite <- '' if (on_cluster) { + # autosubmit_suite_dir if (is.null(autosubmit_suite_dir)) { - stop("Parameter 'autosubmit_suite_dir' must be specified when dispatching on a cluster.") +# stop("Parameter 'autosubmit_suite_dir' must be specified when dispatching on a cluster.") + # Create a tmp folder as autosubmit_suite_dir + autosubmit_suite_dir <- file.path(getwd(), "startR_autosubmit_temp") + if (!dir.exists(autosubmit_suite_dir)) { + dir.create("startR_autosubmit_temp", recursive = FALSE) + } + .warning(paste0("Parameter 'autosubmit_suite_dir' is not specified. Create a temporary ", + "folder under current directory: ", autosubmit_suite_dir, "/. Make sure ", + "that Autosubmit machine can find this path.")) } if (!is.character(autosubmit_suite_dir)) { stop("Parameter 'autosubmit_suite_dir' must be a character string.") @@ -161,6 +169,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', if (!dir.exists(autosubmit_suite_dir_suite)) { stop("Could not find or create the directory in parameter 'autosubmit_suite_dir'.") } + # autosubmit_server if (!is.null(autosubmit_server)) { if (!autosubmit_server %in% c('bscesautosubmit01', 'bscesautosubmit02')) { stop("Parameter 'autosubmit_server' must be one existing Autosubmit machine login node, 'bscesautosubmit01' or 'bscesautosubmit02'.") @@ -190,9 +199,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', special_setup = 'none', expid = NULL, hpc_user = NULL, -#------------NEW----------- run_dir = NULL) -#---------NEW_END-------------- if (on_cluster) { if (!is.list(cluster)) { stop("Parameter 'cluster' must be a named list.") @@ -207,9 +214,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', 'cores_per_job', 'job_wallclock', 'max_jobs', 'extra_queue_params', 'bidirectional', 'polling_period', 'special_setup', 'expid', 'hpc_user', -#------------NEW----------- 'run_dir' -#---------NEW_END-------------- )))) { stop("Found invalid component names in parameter 'cluster'.") } @@ -723,7 +728,6 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', # Check the size of tmp/ASLOGS/jobs_failed_status.log. If it is not 0, the jobs failed. failed_file_size <- system(paste0("du /esarchive/autosubmit/", suite_id, "/tmp/ASLOGS/jobs_failed_status.log"), intern = T) -#------------NEW---------------- if (substr(failed_file_size, 1, 1) != 0) { # Remove bigmemory objects (e.g., a68h_1_1 and a68h_1_1.desc) if they exist # If run_dir is specified, the files are under run_dir; if not, files are under proj/STARTR_CHUNKING_xxxx/ @@ -741,7 +745,6 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', stop("Some Autosubmit jobs failed. Check GUI and logs.") } -#-----------NEW_END---------- timings[['total']] <- t_begin_total startr_exec <- list(cluster = cluster, workflow_manager = 'autosubmit', diff --git a/R/ByChunks_ecflow.R b/R/ByChunks_ecflow.R index d251015..793052c 100644 --- a/R/ByChunks_ecflow.R +++ b/R/ByChunks_ecflow.R @@ -195,7 +195,7 @@ ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', redundant_components <- c('autosubmit_module', 'expid', 'hpc_user') if (any(redundant_components %in% names(cluster))) { tmp <- redundant_components[which(redundant_components %in% names(cluster))] - warning("Cluster component ", paste(tmp, collapse = ','), " not used when ecFlow is the workflow manager.") + .warning(paste0("Cluster component ", paste(tmp, collapse = ','), " not used when ecFlow is the workflow manager.")) cluster[[tmp]] <- NULL } default_cluster[names(cluster)] <- cluster @@ -266,8 +266,8 @@ ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', stop("The component 'CDO_module' of the parameter 'cluster' must be a character string.") } if (nchar(cluster[['CDO_module']]) < 1 || grepl(' ', cluster[['CDO_module']])) { - warning("The component 'CDO_module' of parameter 'cluster' must have ", - " than 1 and only the first element will be used.") + .warning(paste0("The component 'CDO_module' of parameter 'cluster' must have ", + " than 1 and only the first element will be used.")) } cluster[['r_module']] <- paste(cluster[['r_module']], cluster[['CDO_module']]) } diff --git a/R/Collect.R b/R/Collect.R index fc89ed5..b00ae6c 100644 --- a/R/Collect.R +++ b/R/Collect.R @@ -408,14 +408,11 @@ Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE) { if (remove) { .warning("ATTENTION: The source chunks will be removed from the ", "system. Store the result after Collect() ends if needed.") -#--------NEW---------- unlink(paste0(autosubmit_suite_dir_suite), recursive = TRUE) -#---------NEW_END-------------- } # Remove bigmemory objects (e.g., a68h_1_1_1_1_1 and a68h_1_1_1_1_1.desc) -#-----------NEW------------ # If run_dir is specified, the files are under run_dir; if not, files are under proj/STARTR_CHUNKING_xxxx/ if (!is.null(run_dir)) { file.remove( @@ -428,7 +425,6 @@ Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE) { list.files(remote_autosubmit_suite_dir_suite)[grepl(paste0("^", suite_id, "_.*"), list.files(remote_autosubmit_suite_dir_suite))]) ) } -#---------NEW_END---------- return(result) } diff --git a/R/Utils.R b/R/Utils.R index 87f36eb..b0478cf 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -904,7 +904,6 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { bash_script_lines <- gsub('^include_module_load', paste0('module load ', cluster[['r_module']]), bash_script_lines) -#----------NEW--------- # Rewrite cd run_dir # If run_dir is not specified, the script will run under ${proj_dir} if (!is.null(cluster[['run_dir']])) { @@ -912,10 +911,10 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { paste0('cd ', cluster[['run_dir']]), bash_script_lines) } else { - bash_script_lines <- gsub('^cd_run_dir', '#cd_run_dir', + bash_script_lines <- gsub('^cd_run_dir', 'cd ${proj_dir}', bash_script_lines) } -#---------NEW_END---------- + # Save modified .sh file under local$PROJECT_PATH in expdef.yml #NOTE: dest_dir is ecflow_suite_dir_suite in ByChunks_autosubmit() #NOTE: the file will be copied to proj/ by "autosubmit create" diff --git a/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R b/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R index 1f8b6f9..8762eeb 100644 --- a/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R +++ b/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R @@ -30,12 +30,10 @@ names(chunks) <- param_dimnames t_begin_load <- Sys.time() data <- vector('list', length(start_calls)) -#---------NEW-------------- # Add data names if data input has names if (!is.null(names(start_calls_attrs))) { names(data) <- names(start_calls_attrs) } -#--------NEW_END----------- for (input in 1:length(data)) { start_call <- start_calls[[input]] call_dims <- names(start_calls_attrs[[input]][['Dimensions']]) diff --git a/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R b/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R index ee3aa04..1bc5d6d 100644 --- a/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R +++ b/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R @@ -37,12 +37,10 @@ t_job_setup <- as.numeric(difftime(t_end_job_setup, t_begin_job_setup, units = ' t_begin_load <- Sys.time() data <- vector('list', length(start_calls)) -#---------NEW-------------- # Add data names if data input has names if (!is.null(names(start_calls_attrs))) { names(data) <- names(start_calls_attrs) } -#--------NEW_END----------- for (input in 1:length(data)) { start_call <- start_calls[[input]] call_dims <- names(start_calls_attrs[[input]][['Dimensions']]) -- GitLab From d079d2172aef4cd5970fdbbfa717b59d4a28d178 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 10 Aug 2023 17:43:18 +0200 Subject: [PATCH 24/46] Create exp if expid is not provided. Clean the code. --- R/ByChunks_autosubmit.R | 831 ++++++++++++++++++---------------------- R/Compute.R | 34 +- R/Utils.R | 2 +- 3 files changed, 384 insertions(+), 483 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 4b45198..8789963 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -88,6 +88,12 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', autosubmit_suite_dir = NULL, autosubmit_server = NULL, silent = FALSE, debug = FALSE, wait = TRUE) { + #NOTE: + #autosubmit_suite_dir: /home/Earth/aho/startR_local_autosubmit/ + #autosubmit_suite_dir_suite: /home/Earth/aho/startR_local_autosubmit/STARTR_CHUNKING_a68h/ + #remote_autosubmit_suite_dir: /esarchive/autosubmit/a68h/proj/ + #remote_autosubmit_suite_dir_suite: /esarchive/autosubmit/a68h/proj/STARTR_CHUNKING_a68h/ + # Build object to store profiling timings t_begin_total <- Sys.time() t_begin_bychunks_setup <- t_begin_total @@ -108,7 +114,14 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', MergeArrays <- .MergeArrays - # Check input headers + # Sanity checks + ## step_fun + if (!is(step_fun, 'startR_step_fun')) { + stop("Parameter 'step_fun' must be of the class 'startR_step_fun', as returned ", + "by the function Step.") + } + + ## cube_headers if (is(cube_headers, 'startR_cube')) { cube_headers <- list(cube_headers) } @@ -117,13 +130,12 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', stop("All objects passed in 'cube_headers' must be of class 'startR_cube', ", "as returned by Start().") } - - # Check step_fun - if (!is.function(step_fun)) { - stop("Parameter 'step_fun' must be a function.") + if (length(cube_headers) != length(attr(step_fun, 'TargetDims'))) { + stop("Number of inputs in parameter 'cube_headers' must be equal to the ", + "number of inputs expected by the function 'step_fun'.") } - # Check cores + ## threads_load and threads_compute if (!is.numeric(threads_load)) { stop("Parameter 'threads_load' must be a numeric value.") } @@ -134,52 +146,50 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', threads_compute <- round(threads_compute) timings[['threads_load']] <- threads_load timings[['threads_compute']] <- threads_compute - - on_cluster <- !is.null(cluster) - - suite_id <- cluster[['expid']] - - #NOTE: - #autosubmit_suite_dir: /home/Earth/aho/startR_local_autosubmit/ - #autosubmit_suite_dir_suite: /home/Earth/aho/startR_local_autosubmit/STARTR_CHUNKING_a68h/ - #remote_autosubmit_suite_dir: /esarchive/autosubmit/a68h/proj/ - #remote_autosubmit_suite_dir_suite: /esarchive/autosubmit/a68h/proj/STARTR_CHUNKING_a68h/ + + ## autosubmit_suite_dir + if (is.null(autosubmit_suite_dir)) { + # Create a tmp folder as autosubmit_suite_dir + autosubmit_suite_dir <- file.path(getwd(), "startR_autosubmit_temp") + if (!dir.exists(autosubmit_suite_dir)) { + dir.create("startR_autosubmit_temp", recursive = FALSE) + } + .warning(paste0("Parameter 'autosubmit_suite_dir' is not specified. Create a temporary ", + "folder under current directory: ", autosubmit_suite_dir, "/. Make sure ", + "that Autosubmit machine can find this path.")) + } + if (!is.character(autosubmit_suite_dir)) { + stop("Parameter 'autosubmit_suite_dir' must be a character string.") + } - autosubmit_suite_dir_suite <- '' - if (on_cluster) { - # autosubmit_suite_dir - if (is.null(autosubmit_suite_dir)) { -# stop("Parameter 'autosubmit_suite_dir' must be specified when dispatching on a cluster.") - # Create a tmp folder as autosubmit_suite_dir - autosubmit_suite_dir <- file.path(getwd(), "startR_autosubmit_temp") - if (!dir.exists(autosubmit_suite_dir)) { - dir.create("startR_autosubmit_temp", recursive = FALSE) - } - .warning(paste0("Parameter 'autosubmit_suite_dir' is not specified. Create a temporary ", - "folder under current directory: ", autosubmit_suite_dir, "/. Make sure ", - "that Autosubmit machine can find this path.")) - } - if (!is.character(autosubmit_suite_dir)) { - stop("Parameter 'autosubmit_suite_dir' must be a character string.") - } - autosubmit_suite_dir_suite <- paste0(autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') - if (!dir.exists(autosubmit_suite_dir_suite)) { - dir.create(autosubmit_suite_dir_suite, recursive = TRUE) - } - if (!dir.exists(autosubmit_suite_dir_suite)) { - stop("Could not find or create the directory in parameter 'autosubmit_suite_dir'.") - } - # autosubmit_server - if (!is.null(autosubmit_server)) { - if (!autosubmit_server %in% c('bscesautosubmit01', 'bscesautosubmit02')) { - stop("Parameter 'autosubmit_server' must be one existing Autosubmit machine login node, 'bscesautosubmit01' or 'bscesautosubmit02'.") - } - } else { - autosubmit_server <- paste0('bscesautosubmit0', sample(1:2, 1)) + ## autosubmit_server + if (!is.null(autosubmit_server)) { + if (!autosubmit_server %in% c('bscesautosubmit01', 'bscesautosubmit02')) { + stop("Parameter 'autosubmit_server' must be one existing Autosubmit machine login node, 'bscesautosubmit01' or 'bscesautosubmit02'.") } + } else { + autosubmit_server <- paste0('bscesautosubmit0', sample(1:2, 1)) } - # Check cluster + ## silent + if (!is.logical(silent)) { + stop("Parameter 'silent' must be logical.") + } + + ## debug + if (!is.logical(debug)) { + stop("Parameter 'debug' must be logical.") + } + if (silent) { + debug <- FALSE + } + + ## wait + if (!is.logical(wait)) { + stop("Parameter 'wait' must be logical.") + } + + ## cluster default_cluster <- list(queue_host = NULL, # queue_type = 'slurm', data_dir = NULL, @@ -200,158 +210,167 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', expid = NULL, hpc_user = NULL, run_dir = NULL) - if (on_cluster) { - if (!is.list(cluster)) { - stop("Parameter 'cluster' must be a named list.") - } - if (is.null(names(cluster))) { - stop("Parameter 'cluster' must be a named list.") - } - if (any(!(names(cluster) %in% c('queue_host', 'queue_type', 'data_dir', - 'temp_dir', 'lib_dir', 'init_commands', - 'r_module', 'CDO_module', 'autosubmit_module', - 'ecflow_module', 'node_memory', - 'cores_per_job', 'job_wallclock', 'max_jobs', - 'extra_queue_params', 'bidirectional', - 'polling_period', 'special_setup', 'expid', 'hpc_user', - 'run_dir' - )))) { - stop("Found invalid component names in parameter 'cluster'.") - } - # Remove ecFlow components - redundant_components <- c('queue_type', 'temp_dir', 'ecflow_module', 'bidirectional') - if (any(redundant_components %in% names(cluster))) { - tmp <- redundant_components[which(redundant_components %in% names(cluster))] - warning("Cluster component ", paste(tmp, collapse = ','), " not used when Autosubmit is the workflow manager.") - cluster[[tmp]] <- NULL - } - default_cluster[names(cluster)] <- cluster + if (!is.list(cluster) || is.null(names(cluster))) { + stop("Parameter 'cluster' must be a named list.") + } + if (any(!(names(cluster) %in% c('queue_host', 'queue_type', 'data_dir', + 'temp_dir', 'lib_dir', 'init_commands', + 'r_module', 'CDO_module', 'autosubmit_module', + 'ecflow_module', 'node_memory', + 'cores_per_job', 'job_wallclock', 'max_jobs', + 'extra_queue_params', 'bidirectional', + 'polling_period', 'special_setup', 'expid', 'hpc_user', + 'run_dir' + )))) { + stop("Found invalid component names in parameter 'cluster'.") } + # Remove ecFlow components + redundant_components <- c('queue_type', 'temp_dir', 'ecflow_module', 'bidirectional') + if (any(redundant_components %in% names(cluster))) { + tmp <- redundant_components[which(redundant_components %in% names(cluster))] + .warning(paste0("Cluster component ", paste(tmp, collapse = ','), + " not used when Autosubmit is the workflow manager.")) + cluster[[tmp]] <- NULL + } + default_cluster[names(cluster)] <- cluster cluster <- default_cluster - is_data_dir_shared <- FALSE - # Cluster compoment check - if (on_cluster) { - # queue_host - support_hpcs <- c('local', 'nord3') # names in platforms.yml - if (is.null(cluster$queue_host) || !cluster$queue_host %in% support_hpcs) { - stop("Component 'queue_host' in parameter 'cluster' must be one of the follows: ", paste(support_hpcs, collapse = ','), '.') - } - # data_dir - if (is.null(cluster[['data_dir']])) { - is_data_dir_shared <- TRUE - } else { - if (!is.character(cluster[['data_dir']])) { - stop("The component 'data_dir' of the parameter 'cluster' must be a character string.") - } - remote_data_dir <- cluster[['data_dir']] - } - # lib_dir - if (!is.null(cluster[['lib_dir']])) { - if (!is.character(cluster[['lib_dir']])) { - stop("The component 'lib_dir', of the parameter 'cluster' must be NULL or ", - "a character string.") - } - } - # init_commands - if (!is.list(cluster[['init_commands']]) || - !all(sapply(cluster[['init_commands']], is.character))) { - stop("The component 'init_commands' of the parameter 'cluster' must be a list of ", - "character strings.") - } - # r_module - if (!is.character(cluster[['r_module']])) { - stop("The component 'r_module' of the parameter 'cluster' must be a character string.") - } - if ((nchar(cluster[['r_module']]) < 1) || (grepl(' ', cluster[['r_module']]))) { - stop("The component 'r_module' of the parameter 'cluster' must have at least one character ", - "and contain no blank spaces.") - } - # CDO_module - if (!is.null(cluster[['CDO_module']])) { - if (!is.character(cluster[['CDO_module']])) { - stop("The component 'CDO_module' of the parameter 'cluster' must be a character string.") - } - if (nchar(cluster[['CDO_module']]) < 1 || grepl(' ', cluster[['CDO_module']])) { - warning("The component 'CDO_module' of parameter 'cluster' must have ", - " than 1 and only the first element will be used.") - } - cluster[['r_module']] <- paste(cluster[['r_module']], cluster[['CDO_module']]) - } - # autosubmit_module - if (!is.character(cluster[['autosubmit_module']])) { - stop("The component 'autosubmit_module' of the parameter 'cluster' must be a character string.") - } - # cores_per_job - if (is.null(cluster[['cores_per_job']])) { - cluster[['cores_per_job']] <- threads_compute - } - if (!is.numeric(cluster[['cores_per_job']])) { - stop("The component 'cores_per_job' of the parameter 'cluster' must be numeric.") - } - cluster[['cores_per_job']] <- round(cluster[['cores_per_job']]) - if (cluster[['cores_per_job']] > threads_compute) { - .message("WARNING: 'threads_compute' should be >= cluster[['cores_per_job']].") - } - # job_wallclock - tmp <- strsplit( '01:00:00', ':')[[1]] - if (!length(tmp) %in% c(2, 3) | any(!grepl("^[0-9]+$", tmp)) | any(nchar(tmp) != 2)) { - stop("The compoment 'job_wallclock' should be the format of HH:MM or HH:MM:SS.") - } - # max_jobs - if (!is.numeric(cluster[['max_jobs']])) { - stop("The component 'max_jobs' of the parameter 'cluster' must be numeric.") + ### queue_host + support_hpcs <- c('local', 'nord3') # names in platforms.yml + if (is.null(cluster$queue_host) || !cluster$queue_host %in% support_hpcs) { + stop("Cluster component 'queue_host' must be one of the follows: ", + paste(support_hpcs, collapse = ','), '.') + } + + ### data_dir + is_data_dir_shared <- FALSE + if (is.null(cluster[['data_dir']])) { + is_data_dir_shared <- TRUE + } else { + if (!is.character(cluster[['data_dir']])) { + stop("The component 'data_dir' of the parameter 'cluster' must be a character string.") } - cluster[['max_jobs']] <- round(cluster[['max_jobs']]) - # extra_queue_params - if (!is.list(cluster[['extra_queue_params']]) || - !all(sapply(cluster[['extra_queue_params']], is.character))) { - stop("The component 'extra_queue_params' of the parameter 'cluster' must be a list of ", - "character strings.") + remote_data_dir <- cluster[['data_dir']] + } + ### lib_dir + if (!is.null(cluster[['lib_dir']])) { + if (!is.character(cluster[['lib_dir']])) { + stop("The component 'lib_dir', of the parameter 'cluster' must be NULL or ", + "a character string.") } - # polling_period - if (!is.numeric(cluster[['polling_period']])) { - stop("The component 'polling_period' of the parameter 'cluster' must be numeric.") + } + ### init_commands + if (!is.list(cluster[['init_commands']]) || + !all(sapply(cluster[['init_commands']], is.character))) { + stop("The component 'init_commands' of the parameter 'cluster' must be a list of ", + "character strings.") + } + ### r_module + if (!is.character(cluster[['r_module']])) { + stop("The component 'r_module' of the parameter 'cluster' must be a character string.") + } + if ((nchar(cluster[['r_module']]) < 1) || (grepl(' ', cluster[['r_module']]))) { + stop("The component 'r_module' of the parameter 'cluster' must have at least one character ", + "and contain no blank spaces.") + } + ### CDO_module + if (!is.null(cluster[['CDO_module']])) { + if (!is.character(cluster[['CDO_module']])) { + stop("The component 'CDO_module' of the parameter 'cluster' must be a character string.") } - cluster[['polling_period']] <- round(cluster[['polling_period']]) - # special_setup - if (!(cluster[['special_setup']] %in% c('none', 'marenostrum4'))) { - stop("The value provided for the component 'special_setup' of the parameter ", - "'cluster' is not recognized.") + if (nchar(cluster[['CDO_module']]) < 1 || grepl(' ', cluster[['CDO_module']])) { + warning("The component 'CDO_module' of parameter 'cluster' must have ", + " than 1 and only the first element will be used.") } - # expid + cluster[['r_module']] <- paste(cluster[['r_module']], cluster[['CDO_module']]) + } + ### autosubmit_module + if (!is.character(cluster[['autosubmit_module']])) { + stop("The component 'autosubmit_module' of the parameter 'cluster' must be a character string.") + } + ### cores_per_job + if (is.null(cluster[['cores_per_job']])) { + cluster[['cores_per_job']] <- threads_compute + } + if (!is.numeric(cluster[['cores_per_job']])) { + stop("The component 'cores_per_job' of the parameter 'cluster' must be numeric.") + } + cluster[['cores_per_job']] <- round(cluster[['cores_per_job']]) + if (cluster[['cores_per_job']] > threads_compute) { + .message("WARNING: 'threads_compute' should be >= cluster[['cores_per_job']].") + } + ### job_wallclock + tmp <- strsplit( '01:00:00', ':')[[1]] + if (!length(tmp) %in% c(2, 3) | any(!grepl("^[0-9]+$", tmp)) | any(nchar(tmp) != 2)) { + stop("The compoment 'job_wallclock' should be the format of HH:MM or HH:MM:SS.") + } + ### max_jobs + if (!is.numeric(cluster[['max_jobs']])) { + stop("The component 'max_jobs' of the parameter 'cluster' must be numeric.") + } + cluster[['max_jobs']] <- round(cluster[['max_jobs']]) + ### extra_queue_params + if (!is.list(cluster[['extra_queue_params']]) || + !all(sapply(cluster[['extra_queue_params']], is.character))) { + stop("The component 'extra_queue_params' of the parameter 'cluster' must be a list of ", + "character strings.") + } + ### polling_period + if (!is.numeric(cluster[['polling_period']])) { + stop("The component 'polling_period' of the parameter 'cluster' must be numeric.") + } + cluster[['polling_period']] <- round(cluster[['polling_period']]) + ### special_setup + if (!(cluster[['special_setup']] %in% c('none', 'marenostrum4'))) { + stop("The value provided for the component 'special_setup' of the parameter ", + "'cluster' is not recognized.") + } + ### expid + as_module <- cluster[['autosubmit_module']] + if (is.null(cluster[['expid']])) { + text <- system( + paste0("module load ", as_module, "; ", + "autosubmit expid -H local -d 'startR computation'"), + intern = T) + cluster[['expid']] <- strsplit( + text[grep("The new experiment", text)], + "\"")[[1]][2] + message(paste0("ATTENTION: The new experiment '", cluster[['expid']], + "' is created. Please note it down.")) + } else { if (!is.character(cluster[['expid']]) | length(cluster[['expid']]) != 1) { stop("The component 'expid' of the parameter 'cluster' must be a character string.") } - # hpc_user - if (!is.null(cluster$hpc_user) && (!is.character(cluster$hpc_user) | length(cluster$hpc_user) != 1)) { - stop("The component 'hpc_user' of the parameter 'cluster' must be a character string.") + if (!dir.exists(file.path("/esarchive/autosubmit", cluster[['expid']]))) { + stop("Cluster component 'expid' is not found under /esarchive/autosubmit/.") } - } + suite_id <- cluster[['expid']] - if (on_cluster) { - remote_autosubmit_suite_dir <- file.path("/esarchive/autosubmit/", cluster[['expid']], 'proj') - remote_autosubmit_suite_dir_suite <- paste0(remote_autosubmit_suite_dir, '/STARTR_CHUNKING_', cluster[['expid']], '/') + ### hpc_user + if (!is.null(cluster$hpc_user) && (!is.character(cluster$hpc_user) | length(cluster$hpc_user) != 1)) { + stop("Cluster component 'hpc_user' must be a character string.") } - - # Check silent - if (!is.logical(silent)) { - stop("Parameter 'silent' must be logical.") + ### run_dir + if (!is.null(cluster$run_dir)) { + if (!dir.exists(cluster$run_dir)) { + stop("Cluster component 'run_dir' ", cluster$run_dir," is not found.") + } } - - # Check debug - if (!is.logical(debug)) { - stop("Parameter 'debug' must be logical.") + +#============================================== + + autosubmit_suite_dir_suite <- paste0(autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') + if (!dir.exists(autosubmit_suite_dir_suite)) { + dir.create(autosubmit_suite_dir_suite, recursive = TRUE) } - if (silent) { - debug <- FALSE - } - - # Check wait - if (!is.logical(wait)) { - stop("Parameter 'wait' must be logical.") + if (!dir.exists(autosubmit_suite_dir_suite)) { + stop("Could not find or create the directory in parameter 'autosubmit_suite_dir'.") } + + remote_autosubmit_suite_dir <- file.path("/esarchive/autosubmit/", suite_id, 'proj') + remote_autosubmit_suite_dir_suite <- paste0(remote_autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') + # Work out chunked dimensions and target dimensions all_dims <- lapply(cube_headers, attr, 'Dimensions') @@ -380,10 +399,6 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', "target dimensions.") } - if (length(cube_headers) != length(attr(step_fun, 'TargetDims'))) { - stop("Number of inputs in parameter 'cube_headers' must be equal to the ", - "number of inputs expected by the function 'step_fun'.") - } # Check all input headers have matching dimensions cube_index <- 1 for (cube_header in cube_headers) { @@ -444,84 +459,78 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', chunks <- default_chunks timings[['nchunks']] <- prod(unlist(chunks)) - # Check step_fun - if (!is(step_fun, 'startR_step_fun')) { - stop("Parameter 'step_fun' must be of the class 'startR_step_fun', as returned ", - "by the function Step.") - } - # Replace 'all's chunks_all <- which(unlist(chunks) == 'all') if (length(chunks_all) > 0) { chunks[chunks_all] <- all_dims[names(chunks)[chunks_all]] } - if (on_cluster) { - # Copy load_process_save_chunk_autosubmit.R into local folder + # Copy load_process_save_chunk_autosubmit.R into local folder #TODO: Change the following line to read from package - chunk_script <- file("/esarchive/scratch/aho/tmp/chunking/Autosubmit/load_process_save_chunk_autosubmit.R") -# chunk_script <- file(system.file('chunking/Autosubmit/load_process_save_chunk_autosubmit.R', -# package = 'startR')) - chunk_script_lines <- readLines(chunk_script) - close(chunk_script) - chunk_script_lines <- gsub('^lib_dir <- *', paste0('lib_dir <- ', - paste(deparse(cluster[['lib_dir']]), collapse = '\n')), - chunk_script_lines) - #TODO: Change out_dir to somewhere else like expid/outputs/ - chunk_script_lines <- gsub('^out_dir <- *', paste0('out_dir <- ', - paste(deparse(remote_autosubmit_suite_dir_suite), collapse = '\n')), chunk_script_lines) - chunk_script_lines <- gsub('^debug <- *', paste0('debug <- ', paste(deparse(debug), collapse = '\n')), - chunk_script_lines) - deparsed_calls <- paste0('start_calls <- list(') - extra_path <- '' - if (cluster[['special_setup']] == 'marenostrum4') { - extra_path <- '/gpfs/archive/bsc32/' - } - for (cube_header in 1:length(cube_headers)) { - pattern_dim <- attr(cube_headers[[cube_header]], 'PatternDim') - bk_pattern_dim <- cube_headers[[cube_header]][[pattern_dim]] - bk_expected_files <- attr(cube_headers[[cube_header]], 'ExpectedFiles') - if (!is_data_dir_shared) { - cube_headers[[cube_header]][[pattern_dim]] <- paste0(remote_data_dir, '/', - extra_path, '/', cube_headers[[cube_header]][[pattern_dim]]) - for (file_n in 1:length(bk_expected_files)) { - attr(cube_headers[[cube_header]], 'ExpectedFiles')[file_n] <- paste0(remote_data_dir, '/', - extra_path, '/', attr(cube_headers[[cube_header]], 'ExpectedFiles')[file_n]) - } - } - deparsed_calls <- paste0(deparsed_calls, '\nquote(', - paste(deparse(cube_headers[[cube_header]]), collapse = '\n'), - ')') - cube_headers[[cube_header]][[pattern_dim]] <- bk_pattern_dim - attr(cube_headers[[cube_header]], 'ExpectedFiles') <- bk_expected_files - if (cube_header < length(cube_headers)) { - deparsed_calls <- paste0(deparsed_calls, ', ') + chunk_script <- file("/esarchive/scratch/aho/tmp/chunking/Autosubmit/load_process_save_chunk_autosubmit.R") +# chunk_script <- file(system.file('chunking/Autosubmit/load_process_save_chunk_autosubmit.R', +# package = 'startR')) + chunk_script_lines <- readLines(chunk_script) + close(chunk_script) + chunk_script_lines <- gsub('^lib_dir <- *', paste0('lib_dir <- ', + paste(deparse(cluster[['lib_dir']]), collapse = '\n')), + chunk_script_lines) + #TODO: Change out_dir to somewhere else like expid/outputs/ + chunk_script_lines <- gsub('^out_dir <- *', paste0('out_dir <- ', + paste(deparse(remote_autosubmit_suite_dir_suite), collapse = '\n')), chunk_script_lines) + chunk_script_lines <- gsub('^debug <- *', paste0('debug <- ', paste(deparse(debug), collapse = '\n')), + chunk_script_lines) + deparsed_calls <- paste0('start_calls <- list(') + extra_path <- '' + if (cluster[['special_setup']] == 'marenostrum4') { + extra_path <- '/gpfs/archive/bsc32/' + } + for (cube_header in 1:length(cube_headers)) { + pattern_dim <- attr(cube_headers[[cube_header]], 'PatternDim') + bk_pattern_dim <- cube_headers[[cube_header]][[pattern_dim]] + bk_expected_files <- attr(cube_headers[[cube_header]], 'ExpectedFiles') + if (!is_data_dir_shared) { + cube_headers[[cube_header]][[pattern_dim]] <- paste0(remote_data_dir, '/', + extra_path, '/', cube_headers[[cube_header]][[pattern_dim]]) + for (file_n in 1:length(bk_expected_files)) { + attr(cube_headers[[cube_header]], 'ExpectedFiles')[file_n] <- paste0(remote_data_dir, '/', + extra_path, '/', attr(cube_headers[[cube_header]], 'ExpectedFiles')[file_n]) } } - deparsed_calls <- paste0(deparsed_calls, '\n)') - chunk_script_lines <- gsub('^start_calls <- *', deparsed_calls, chunk_script_lines) - chunk_script_lines <- gsub('^start_calls_attrs <- *', paste0('start_calls_attrs <- ', paste(deparse(lapply(cube_headers, attributes)), collapse = '\n')), - chunk_script_lines) - chunk_script_lines <- gsub('^param_dimnames <- *', paste0('param_dimnames <- ', paste(deparse(chunked_dims), collapse = '\n')), - chunk_script_lines) - chunk_script_lines <- gsub('^threads_load <- *', paste0('threads_load <- ', threads_load), - chunk_script_lines) - chunk_script_lines <- gsub('^threads_compute <- *', paste0('threads_compute <- ', threads_compute), - chunk_script_lines) - chunk_script_lines <- gsub('^fun <- *', paste0('fun <- ', paste(deparse(step_fun), collapse = '\n')), - chunk_script_lines) - chunk_script_lines <- gsub('^params <- *', paste0('params <- ', paste(deparse(list(...)), collapse = '\n')), - chunk_script_lines) - writeLines(chunk_script_lines, paste0(autosubmit_suite_dir_suite, '/load_process_save_chunk_autosubmit.R')) + deparsed_calls <- paste0(deparsed_calls, '\nquote(', + paste(deparse(cube_headers[[cube_header]]), collapse = '\n'), + ')') + cube_headers[[cube_header]][[pattern_dim]] <- bk_pattern_dim + attr(cube_headers[[cube_header]], 'ExpectedFiles') <- bk_expected_files + if (cube_header < length(cube_headers)) { + deparsed_calls <- paste0(deparsed_calls, ', ') + } + } + deparsed_calls <- paste0(deparsed_calls, '\n)') + chunk_script_lines <- gsub('^start_calls <- *', deparsed_calls, chunk_script_lines) + chunk_script_lines <- gsub('^start_calls_attrs <- *', + paste0('start_calls_attrs <- ', + paste(deparse(lapply(cube_headers, attributes)), collapse = '\n')), + chunk_script_lines) + chunk_script_lines <- gsub('^param_dimnames <- *', + paste0('param_dimnames <- ', + paste(deparse(chunked_dims), collapse = '\n')), + chunk_script_lines) + chunk_script_lines <- gsub('^threads_load <- *', paste0('threads_load <- ', threads_load), + chunk_script_lines) + chunk_script_lines <- gsub('^threads_compute <- *', paste0('threads_compute <- ', threads_compute), + chunk_script_lines) + chunk_script_lines <- gsub('^fun <- *', paste0('fun <- ', paste(deparse(step_fun), collapse = '\n')), + chunk_script_lines) + chunk_script_lines <- gsub('^params <- *', paste0('params <- ', paste(deparse(list(...)), collapse = '\n')), + chunk_script_lines) + writeLines(chunk_script_lines, paste0(autosubmit_suite_dir_suite, '/load_process_save_chunk_autosubmit.R')) - # Write and copy startR_autosubmit.sh into local folder - write_autosubmit_bash(chunks, cluster, autosubmit_suite_dir = autosubmit_suite_dir) - - # Modify conf files from template and rewrite to /esarchive/autosubmit/expid/conf/ - write_autosubmit_confs(chunks, cluster, autosubmit_suite_dir) + # Write and copy startR_autosubmit.sh into local folder + write_autosubmit_bash(chunks, cluster, autosubmit_suite_dir = autosubmit_suite_dir) - } # if on_cluster - + # Modify conf files from template and rewrite to /esarchive/autosubmit/expid/conf/ + write_autosubmit_confs(chunks, cluster, autosubmit_suite_dir) # Iterate through chunks chunk_array <- array(1:prod(unlist(chunks)), dim = (unlist(chunks))) @@ -531,238 +540,124 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', arrays_of_results[[component]] <- vector('list', prod((unlist(chunks)))) dim(arrays_of_results[[component]]) <- (unlist(chunks)) } - if (!on_cluster) { - t_end_bychunks_setup <- Sys.time() - timings[['bychunks_setup']] <- as.numeric(difftime(t_end_bychunks_setup, - t_begin_bychunks_setup, units = 'secs')) - timings[['transfer']] <- 0 - timings[['queue']] <- 0 - timings[['job_setup']] <- 0 - timings[['transfer_back']] <- 0 - if (!silent) { - .message(paste0("Processing chunks... ", - "remaining time estimate soon...")) - } - time_before_first_chunk <- Sys.time() - time_after_first_chunk <- NULL - } found_first_result <- FALSE for (i in 1:length(chunk_array)) { chunk_indices <- which(chunk_array == i, arr.ind = TRUE)[1, ] names(chunk_indices) <- names(dim(chunk_array)) - - if (!on_cluster) { - if (!silent) { - .message(paste("Loading chunk", i, - "out of", length(chunk_array), "...")) - } - data <- vector('list', length(cube_headers)) - t_begin_load <- Sys.time() - for (input in 1:length(data)) { - start_call <- cube_headers[[input]] - dims_to_alter <- which(names(attr(start_call, 'Dimensions')) %in% names(chunks)) - names_dims_to_alter <- names(attr(start_call, 'Dimensions'))[dims_to_alter] - # If any dimension comes from split dimensions - split_dims <- attr(start_call, 'SplitDims') - - if (length(split_dims) != 0){ - - for (k in 1:length(split_dims)) { - if (any(names(split_dims[[k]]) %in% names_dims_to_alter)) { - chunks_split_dims <- rep(1, length(split_dims[[k]])) - names(chunks_split_dims) <- names(split_dims[[k]]) - chunks_indices_split_dims <- chunks_split_dims - split_dims_to_alter <- which(names(split_dims[[k]]) %in% names_dims_to_alter) - chunks_split_dims[split_dims_to_alter] <- unlist(chunks[names(split_dims[[k]])[split_dims_to_alter]]) - chunks_indices_split_dims[split_dims_to_alter] <- chunk_indices[names(split_dims[[k]])[split_dims_to_alter]] - start_call[[names(split_dims)[k]]] <- .chunk(chunks_indices_split_dims, chunks_split_dims, - eval(start_call[[names(split_dims)[k]]])) - dims_to_alter_to_remove <- which(names_dims_to_alter %in% names(split_dims[[k]])) - if (length(dims_to_alter_to_remove) > 0) { - dims_to_alter <- dims_to_alter[-dims_to_alter_to_remove] - names_dims_to_alter <- names_dims_to_alter[-dims_to_alter_to_remove] - } - } - } - } - - if (length(dims_to_alter) > 0) { - for (call_dim in names(attr(start_call, 'Dimensions'))[dims_to_alter]) { - start_call[[call_dim]] <- .chunk(chunk_indices[call_dim], chunks[[call_dim]], - eval(start_call[[call_dim]])) - } - } - start_call[['silent']] <- !debug - if (!('num_procs' %in% names(start_call))) { - start_call[['num_procs']] <- threads_load - } - data[[input]] <- eval(start_call) - } - t_end_load <- Sys.time() - timings[['load']] <- c(timings[['load']], - as.numeric(difftime(t_end_load, t_begin_load, units = 'secs'))) - if (!silent) { - .message(paste("Processing...")) - } - #TODO: Find a better way to assign the names of data. When multiple steps for Compute is available, this way may fail. - names(data) <- names(cube_headers) - t_begin_compute <- Sys.time() - result <- multiApply::Apply(data, - target_dims = attr(step_fun, 'TargetDims'), - fun = step_fun, ..., - output_dims = attr(step_fun, 'OutputDims'), - use_attributes = attr(step_fun, 'UseAttributes'), - ncores = threads_compute) - if (!found_first_result) { - names(arrays_of_results) <- names(result) - found_first_result <- TRUE - } - for (component in 1:length(result)) { - arrays_of_results[[component]][[i]] <- result[[component]] - } - rm(data) - gc() - t_end_compute <- Sys.time() - timings[['compute']] <- c(timings[['compute']], - as.numeric(difftime(t_end_compute, - t_begin_compute, units = 'secs'))) - } - - # Time estimate - if (!on_cluster) { - if (is.null(time_after_first_chunk)) { - time_after_first_chunk <- Sys.time() - if (!silent) { - estimate <- (time_after_first_chunk - - time_before_first_chunk) * - (length(chunk_array) - 1) - units(estimate) <- 'mins' - .message( - paste0("Remaining time estimate (at ", format(time_after_first_chunk), ") ", - "(neglecting merge time): ", format(estimate)) - ) - } - } - } } - if (on_cluster) { - timings[['cores_per_job']] <- cluster[['cores_per_job']] - timings[['concurrent_chunks']] <- cluster[['max_jobs']] + timings[['cores_per_job']] <- cluster[['cores_per_job']] + timings[['concurrent_chunks']] <- cluster[['max_jobs']] - t_end_bychunks_setup <- Sys.time() - timings[['bychunks_setup']] <- as.numeric(difftime(t_end_bychunks_setup, - t_begin_bychunks_setup, units = 'secs')) - if (!is_data_dir_shared) { + t_end_bychunks_setup <- Sys.time() + timings[['bychunks_setup']] <- as.numeric(difftime(t_end_bychunks_setup, + t_begin_bychunks_setup, units = 'secs')) + if (!is_data_dir_shared) { #NOTE: Not consider this part yet - t_begin_transfer <- Sys.time() - .message("Sending involved files to the cluster file system...") - files_to_send <- NULL - #files_to_check <- NULL - for (cube_header in 1:length(cube_headers)) { - expected_files <- attr(cube_headers[[cube_header]], 'ExpectedFiles') - #files_to_check <- c(files_to_check, expected_files) - #if (cluster[['special_setup']] == 'marenostrum4') { - # expected_files <- paste0('/gpfs/archive/bsc32/', expected_files) - #} - files_to_send <- c(files_to_send, expected_files) - } - #which_files_exist <- sapply(files_to_check, file.exists) - which_files_exist <- sapply(files_to_send, file.exists) - files_to_send <- files_to_send[which_files_exist] - if (cluster[['special_setup']] == 'marenostrum4') { - file_spec <- paste(paste0("/gpfs/archive/bsc32/", - files_to_send), collapse = ' ') - system(paste0("ssh ", cluster[['queue_host']], " 'mkdir -p ", remote_data_dir, - ' ; module load transfer ; cd ', remote_autosubmit_suite_dir_suite, - ' ; dtrsync -Rrav ', '\'', file_spec, '\' "', remote_data_dir, '/"', - " ; sleep 1 ; ", - "while [[ ! $(ls dtrsync_*.out 2>/dev/null | wc -l) -ge 1 ]] ; ", - "do sleep 2 ; done", - " ; sleep 1 ; ", - 'while [[ ! $(grep "total size is" dtrsync_*.out | ', - "wc -l) -ge 1 ]] ; ", - "do sleep 5 ; done", "'")) - } else { - file_spec <- paste(files_to_send, collapse = ' :') - system(paste0("ssh ", cluster[['queue_host']], ' "mkdir -p ', - remote_data_dir, '"')) - system(paste0("rsync -Rrav '", file_spec, "' '", - cluster[['queue_host']], ":", remote_data_dir, "/'")) - } - .message("Files sent successfully.") - t_end_transfer <- Sys.time() - timings[['transfer']] <- as.numeric(difftime(t_end_transfer, t_begin_transfer, units = 'secs')) - } else { - timings[['transfer']] <- 0 - } - if (!silent) { - .message(paste0("Processing chunks... ")) - } - time_begin_first_chunk <- Sys.time() - - as_module <- cluster[['autosubmit_module']] - sys_commands <- paste0("module load ", as_module, "; ", - "autosubmit create ", suite_id, " -np; ", - "autosubmit refresh ", suite_id, "; ") - if (wait) { - sys_commands <- paste0(sys_commands, "autosubmit run ", suite_id) + t_begin_transfer <- Sys.time() + .message("Sending involved files to the cluster file system...") + files_to_send <- NULL + #files_to_check <- NULL + for (cube_header in 1:length(cube_headers)) { + expected_files <- attr(cube_headers[[cube_header]], 'ExpectedFiles') + #files_to_check <- c(files_to_check, expected_files) + #if (cluster[['special_setup']] == 'marenostrum4') { + # expected_files <- paste0('/gpfs/archive/bsc32/', expected_files) + #} + files_to_send <- c(files_to_send, expected_files) + } + #which_files_exist <- sapply(files_to_check, file.exists) + which_files_exist <- sapply(files_to_send, file.exists) + files_to_send <- files_to_send[which_files_exist] + if (cluster[['special_setup']] == 'marenostrum4') { + file_spec <- paste(paste0("/gpfs/archive/bsc32/", + files_to_send), collapse = ' ') + system(paste0("ssh ", cluster[['queue_host']], " 'mkdir -p ", remote_data_dir, + ' ; module load transfer ; cd ', remote_autosubmit_suite_dir_suite, + ' ; dtrsync -Rrav ', '\'', file_spec, '\' "', remote_data_dir, '/"', + " ; sleep 1 ; ", + "while [[ ! $(ls dtrsync_*.out 2>/dev/null | wc -l) -ge 1 ]] ; ", + "do sleep 2 ; done", + " ; sleep 1 ; ", + 'while [[ ! $(grep "total size is" dtrsync_*.out | ', + "wc -l) -ge 1 ]] ; ", + "do sleep 5 ; done", "'")) } else { - sys_commands <- paste0(sys_commands, "nohup autosubmit run ", suite_id, " >/dev/null 2>&1 &") # disown? - } + file_spec <- paste(files_to_send, collapse = ' :') + system(paste0("ssh ", cluster[['queue_host']], ' "mkdir -p ', + remote_data_dir, '"')) + system(paste0("rsync -Rrav '", file_spec, "' '", + cluster[['queue_host']], ":", remote_data_dir, "/'")) + } + .message("Files sent successfully.") + t_end_transfer <- Sys.time() + timings[['transfer']] <- as.numeric(difftime(t_end_transfer, t_begin_transfer, units = 'secs')) + } else { + timings[['transfer']] <- 0 + } + if (!silent) { + .message(paste0("Processing chunks... ")) + } + time_begin_first_chunk <- Sys.time() + sys_commands <- paste0("module load ", as_module, "; ", + "autosubmit create ", suite_id, " -np; ", + "autosubmit refresh ", suite_id, "; ") + if (wait) { + sys_commands <- paste0(sys_commands, "autosubmit run ", suite_id) + } else { + sys_commands <- paste0(sys_commands, "nohup autosubmit run ", suite_id, " >/dev/null 2>&1 &") # disown? + } + if (gsub('[[:digit:]]', "", Sys.getenv('HOSTNAME')) == 'bscesautosubmit') { + #NOTE: If we ssh to AS VM and run everything there, we don't need to ssh here + system(sys_commands) - if (gsub('[[:digit:]]', "", Sys.getenv('HOSTNAME')) == 'bscesautosubmit') { - #NOTE: If we ssh to AS VM and run everything there, we don't need to ssh here - system(sys_commands) + } else if (gsub("[[:digit:]]", "", Sys.getenv("HOSTNAME")) == "bscearth") { + # ssh from WS to AS VM to run exp + as_login <- paste0(Sys.getenv("USER"), '@', autosubmit_server, '.bsc.es') + sys_commands <- paste0('ssh ', as_login, ' "', sys_commands, '"') #'; exit"') + system(sys_commands) - } else if (gsub("[[:digit:]]", "", Sys.getenv("HOSTNAME")) == "bscearth") { - # ssh from WS to AS VM to run exp - as_login <- paste0(Sys.getenv("USER"), '@', autosubmit_server, '.bsc.es') - sys_commands <- paste0('ssh ', as_login, ' "', sys_commands, '"') #'; exit"') - system(sys_commands) + } else { + stop("Cannot identify host", Sys.getenv("HOSTNAME"), ". Where to run AS exp?") + } + # Check the size of tmp/ASLOGS/jobs_failed_status.log. If it is not 0, the jobs failed. + failed_file_size <- system(paste0("du /esarchive/autosubmit/", suite_id, "/tmp/ASLOGS/jobs_failed_status.log"), intern = T) + if (substr(failed_file_size, 1, 1) != 0) { + # Remove bigmemory objects (e.g., a68h_1_1 and a68h_1_1.desc) if they exist + # If run_dir is specified, the files are under run_dir; if not, files are under proj/STARTR_CHUNKING_xxxx/ + if (!is.null(cluster[['run_dir']])) { + file.remove( + file.path(cluster[['run_dir']], + list.files(cluster[['run_dir']])[grepl(paste0("^", suite_id, "_.*"), list.files(cluster[['run_dir']]))]) + ) } else { - stop("Cannot identify host", Sys.getenv("HOSTNAME"), ". Where to run AS exp?") + file.remove( + file.path(remote_autosubmit_suite_dir_suite, + list.files(remote_autosubmit_suite_dir_suite)[grepl(paste0("^", suite_id, "_.*"), list.files(remote_autosubmit_suite_dir_suite))]) + ) } - # Check the size of tmp/ASLOGS/jobs_failed_status.log. If it is not 0, the jobs failed. - failed_file_size <- system(paste0("du /esarchive/autosubmit/", suite_id, "/tmp/ASLOGS/jobs_failed_status.log"), intern = T) - if (substr(failed_file_size, 1, 1) != 0) { - # Remove bigmemory objects (e.g., a68h_1_1 and a68h_1_1.desc) if they exist - # If run_dir is specified, the files are under run_dir; if not, files are under proj/STARTR_CHUNKING_xxxx/ - if (!is.null(cluster[['run_dir']])) { - file.remove( - file.path(cluster[['run_dir']], - list.files(cluster[['run_dir']])[grepl(paste0("^", suite_id, "_.*"), list.files(cluster[['run_dir']]))]) - ) - } else { - file.remove( - file.path(remote_autosubmit_suite_dir_suite, - list.files(remote_autosubmit_suite_dir_suite)[grepl(paste0("^", suite_id, "_.*"), list.files(remote_autosubmit_suite_dir_suite))]) - ) - } + stop("Some Autosubmit jobs failed. Check GUI and logs.") + } - stop("Some Autosubmit jobs failed. Check GUI and logs.") - } + timings[['total']] <- t_begin_total + startr_exec <- list(cluster = cluster, workflow_manager = 'autosubmit', + suite_id = suite_id, chunks = chunks, + num_outputs = length(arrays_of_results), + autosubmit_suite_dir = autosubmit_suite_dir, #ecflow_server = ecflow_server, + timings = timings) + class(startr_exec) <- 'startR_exec' - timings[['total']] <- t_begin_total - startr_exec <- list(cluster = cluster, workflow_manager = 'autosubmit', - suite_id = suite_id, chunks = chunks, - num_outputs = length(arrays_of_results), - autosubmit_suite_dir = autosubmit_suite_dir, #ecflow_server = ecflow_server, - timings = timings) - class(startr_exec) <- 'startR_exec' + if (wait) { + result <- Collect(startr_exec, wait = TRUE, remove = T) + .message("Computation ended successfully.") + return(result) - if (wait) { - result <- Collect(startr_exec, wait = TRUE, remove = T) - .message("Computation ended successfully.") - return(result) + } else { + # if wait = F, return startr_exec and merge chunks in Collect(). + return(startr_exec) + } - } else { - # if wait = F, return startr_exec and merge chunks in Collect(). - return(startr_exec) - } - } } - diff --git a/R/Compute.R b/R/Compute.R index 81f83e6..981f28b 100644 --- a/R/Compute.R +++ b/R/Compute.R @@ -28,6 +28,7 @@ #' Check \href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{Practical guide on GitLab} for more #' details and examples. Only needed when the computation is not run locally. #' The default value is NULL. +#'@param workflow_manager Can be NULL, 'ecFlow' or 'Autosubmit'. The default is 'ecFlow'. #'@param ecflow_suite_dir A character string indicating the path to a folder in #' the local workstation where to store temporary files generated for the #' automatic management of the workflow. Only needed when the execution is run @@ -145,24 +146,18 @@ Compute <- function(workflow, chunks = 'auto', workflow_manager = 'ecFlow', stop("Workflows with only one step supported by now.") } - # Run ByChunks with the combined operation + # Run ByChunks with the chosen operation if (!is.null(cluster)) { - if (!tolower(workflow_manager) %in% c('ecflow', 'autosubmit')) { + if (is.null(workflow_manager)) { + stop("Specify parameter 'workflow_manager' as 'ecFlow' or 'Autosubmit'.") + } else if (!tolower(workflow_manager) %in% c('ecflow', 'autosubmit')) { stop("Parameter 'workflow_manager' can only be 'ecFlow' or 'Autosubmit'.") } + } else { # run locally + workflow_manager <- 'ecflow' } - if (tolower(workflow_manager) == 'autosubmit') { - res <- ByChunks_autosubmit(step_fun = operation, - cube_headers = workflow$inputs, - chunks = chunks, - threads_load = threads_load, - threads_compute = threads_compute, - cluster = cluster, - autosubmit_suite_dir = autosubmit_suite_dir, - autosubmit_server = autosubmit_server, - silent = silent, debug = debug, wait = wait) - } else { + if (tolower(workflow_manager) == 'ecflow') { # ecFlow or run locally res <- ByChunks_ecflow(step_fun = operation, cube_headers = workflow$inputs, @@ -173,7 +168,18 @@ Compute <- function(workflow, chunks = 'auto', workflow_manager = 'ecFlow', ecflow_suite_dir = ecflow_suite_dir, ecflow_server = ecflow_server, silent = silent, debug = debug, wait = wait) - } + } else { + res <- ByChunks_autosubmit(step_fun = operation, + cube_headers = workflow$inputs, + chunks = chunks, + threads_load = threads_load, + threads_compute = threads_compute, + cluster = cluster, + autosubmit_suite_dir = autosubmit_suite_dir, + autosubmit_server = autosubmit_server, + silent = silent, debug = debug, wait = wait) + + } # TODO: carry out remaining steps locally, using multiApply # Return results diff --git a/R/Utils.R b/R/Utils.R index b0478cf..c1d738f 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -921,7 +921,7 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { dest_dir <- file.path(autosubmit_suite_dir, paste0("/STARTR_CHUNKING_", cluster$expid)) if (!file.exists(dest_dir)) { - dir.create(savefile_path, recursive = TRUE) + dir.create(dest_fir, recursive = TRUE) } writeLines(bash_script_lines, paste0(dest_dir, '/startR_autosubmit_', n_chunk, '.sh')) } -- GitLab From f80593d5c1b24acc074f7f2c8e82881408b7f098 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 11 Aug 2023 12:23:30 +0200 Subject: [PATCH 25/46] Create issue template --- .gitlab/issue_templates/Default.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 .gitlab/issue_templates/Default.md diff --git a/.gitlab/issue_templates/Default.md b/.gitlab/issue_templates/Default.md new file mode 100644 index 0000000..6d47b71 --- /dev/null +++ b/.gitlab/issue_templates/Default.md @@ -0,0 +1,26 @@ +(This is a template to report problems or suggest a new development. Please fill in the relevant information and remove the rest.) + +Hi @aho, + +#### Summary +(Bug: Summarize the bug and explain briefly the expected and the current behavior.) +(New development: Summarize the development needed.) + +#### Example +(Bug: Provide a **minimal reproducible example** and the error message. See [How To Build A Minimal Reproducible Example](https://docs.google.com/document/d/1zRlmsRwFDJctDB94x6HGf6ezu3HFHhEjaBu0hVcrwTI/edit#heading=h.skblym4acpw5)) +(New development: Provide an example script or useful piece of code if appliable.) + +```r +#Example: +exp <- Start(...) +``` +> Error in Start: &%$("!* + +#### Module and Package Version +(Which R version are you using? e.g., R/4.1.2) +(What other modules and their versions required to reproduce this issue? e.g., PROJ/4.8.0-foss-2015a) +(Which R package versions are you using? Check with sessionInfo(). e.g., startR_2.2.3) +(Which machine are you using? WS, Nord3, hub, others...) + +#### Other Relevant Information +(Additional information.) -- GitLab From 44cd30232d28d7232bd54f23b5048ba8e72c3f1a Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 11 Aug 2023 15:27:47 +0200 Subject: [PATCH 26/46] Update document --- R/ByChunks_autosubmit.R | 34 +++++++++++++++++----------------- R/ByChunks_ecflow.R | 13 +++++++------ R/Collect.R | 29 ++++++++++++++--------------- 3 files changed, 38 insertions(+), 38 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 8789963..af411d9 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -2,7 +2,8 @@ #' #'This is an internal function used in Compute(), executing the operation by #'the chunks specified in Compute(). It also returns the configuration details -#'and profiling information. +#'and profiling information. It is used when the workflow manager is +#'Autosubmit. #' #'@param step_fun A function with the class 'startR_step_fun' containing the #' details of operation. @@ -19,19 +20,17 @@ #'@param threads_compute An integer indicating the number of execution threads #' to use for the computation. The default value is 1. #'@param cluster A list of components that define the configuration of the -#' machine to be run on. The comoponents vary from the different machines. -#' Check \href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab} for more -#' details and examples. -#' Only needed when the computation is not run locally. The default value is -#' NULL. -#'@param ecflow_suite_dir A character string indicating the path to a folder in -#' the local workstation where to store temporary files generated for the -#' automatic management of the workflow. Only needed when the execution is run -#' remotely. The default value is NULL. -#'@param ecflow_server A named vector indicating the host and port of the -#' EC-Flow server. The vector form should be -#' \code{c(host = 'hostname', port = port_number)}. Only needed when the -#' execution is run remotely. The default value is NULL. +#' machine to be run on. The comoponents vary from different machines. Check +#' \href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{practical guide} +#' for more details and examples. +#'@param autosubmit_suite_dir A character string indicating the path to a folder +#' where to store temporary files generated for the automatic management of the +#' workflow manager. This path should be available in local workstation as well +#' as autosubmit machine. The default value is NULL, and a temporary folder +#' will be created. +#'@param autosubmit_server A character vector indicating the login node of the +#' autosubmit machine. It can be "bscesautosubmit01" or "bscesautosubmit02". +#' The default value is NULL, and the node will be randomly chosen. #'@param silent A logical value deciding whether to print the computation #' progress (FALSE) on the R session or not (TRUE). It only works when the #' execution runs locally or the parameter 'wait' is TRUE. The default value @@ -51,8 +50,9 @@ #' attached as attributes to the returned list of arrays. #' #'@examples -#' # ByChunks() is internally used in Compute(), not intended to be used by -#' # users. The example just illustrates the inputs of ByChunks(). +#' # ByChunks_autosubmit() is internally used in Compute(), not intended to be +#' # used by users. The example just illustrates the inputs of +#' # ByChunks_autosubmit(). #' # data_path <- system.file('extdata', package = 'startR') #' # path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') #' # sdates <- c('200011', '200012') @@ -77,7 +77,7 @@ #' # output_dims = 'latitude', #' # use_libraries = c('multiApply'), #' # use_attributes = list(data = "Variables")) -#' #ByChunks(step, data) +#' #ByChunks_autosubmit(step, data) #' #'@import multiApply #'@importFrom methods is diff --git a/R/ByChunks_ecflow.R b/R/ByChunks_ecflow.R index 793052c..cb5c95b 100644 --- a/R/ByChunks_ecflow.R +++ b/R/ByChunks_ecflow.R @@ -19,9 +19,10 @@ #'@param threads_compute An integer indicating the number of execution threads #' to use for the computation. The default value is 1. #'@param cluster A list of components that define the configuration of the -#' machine to be run on. The comoponents vary from the different machines. -#' Check \href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab} for more -#' details and examples. +#' machine to be run on. The comoponents vary from the different machines. +#' Check +#' \href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{practical guide} +#' for more examples. #' Only needed when the computation is not run locally. The default value is #' NULL. #'@param ecflow_suite_dir A character string indicating the path to a folder in @@ -51,8 +52,8 @@ #' attached as attributes to the returned list of arrays. #' #'@examples -#' # ByChunks() is internally used in Compute(), not intended to be used by -#' # users. The example just illustrates the inputs of ByChunks(). +#' # ByChunks_ecflow() is internally used in Compute(), not intended to be used +#' # by users. The example just illustrates the inputs of ByChunks_ecflow(). #' # data_path <- system.file('extdata', package = 'startR') #' # path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') #' # sdates <- c('200011', '200012') @@ -77,7 +78,7 @@ #' # output_dims = 'latitude', #' # use_libraries = c('multiApply'), #' # use_attributes = list(data = "Variables")) -#' #ByChunks(step, data) +#' #ByChunks_ecflow(step, data) #' #'@import multiApply #'@importFrom methods is diff --git a/R/Collect.R b/R/Collect.R index b00ae6c..6d752f5 100644 --- a/R/Collect.R +++ b/R/Collect.R @@ -1,11 +1,11 @@ #'Collect and merge the computation results #' #'The final step of the startR workflow after the data operation. It is used when -#'the parameter 'wait' of Compute() is FALSE, and the functionality includes -#'updating the job status shown on the EC-Flow GUI and collecting all the chunks -#'of results as one data array when the execution is done. See more details on -#'\href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab}. -#' +#'the parameter 'wait' of Compute() is FALSE. It combines all the chunks of the +#'results as one data array when the execution is done. See more details on +#'\href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{practical guide}. +#'Collect() calls Collect_ecflow() or Collect_autosubmit() according to the +#'chosen workflow manager. #'@param startr_exec An R object returned by Compute() when the parameter 'wait' #' of Compute() is FALSE. It can be directly from a Compute() call or read from #' the RDS file. @@ -13,16 +13,15 @@ #' Collect() call to finish (TRUE) or not (FALSE). If TRUE, it will be a #' blocking call, in which Collect() will retrieve information from the HPC, #' including signals and outputs, each polling_period seconds. The the status -#' can be monitored on the EC-Flow GUI. Collect() will not return until the -#' results of all chunks have been received. If FALSE, Collect() will crash with -#' an error if the execution has not finished yet, otherwise it will return the -#' merged array. The default value is TRUE. -#'@param remove A logical value deciding whether to remove of all data results -#' received from the HPC (and stored under 'ecflow_suite_dir', the parameter in -#' Compute()) after being collected, as well as the local job folder under -#' 'ecflow_suite_dir' or 'autosubmit_suite_dir'. To preserve the data and -#' Collect() it as many times as desired, set remove to FALSE. The default -#' value is TRUE. +#' can be monitored on the workflow manager GUI. Collect() will not return +#' until the results of all the chunks have been received. If FALSE, Collect() +#' return an error if the execution has not finished, otherwise it will return +#' the merged array. The default value is TRUE. +#'@param remove A logical value deciding whether to remove of all chunk results +#' received from the HPC after data being collected, as well as the local job +#' folder under 'ecflow_suite_dir' or 'autosubmit_suite_dir'. To preserve the +#' data and Collect() them as many times as desired, set remove to FALSE. The +#' default value is TRUE. #'@return A list of merged data array. #' #'@examples -- GitLab From 8da2a993678b54e1649c6dc043533599ed153282 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 11 Aug 2023 16:26:09 +0200 Subject: [PATCH 27/46] Update documentation for Autosubmit --- inst/doc/practical_guide.md | 67 +++++++++++++++++++++++++++++++++++-- man/CDORemapper.Rd | 2 +- man/Collect.Rd | 26 +++++++------- man/Compute.Rd | 5 +++ 4 files changed, 84 insertions(+), 16 deletions(-) diff --git a/inst/doc/practical_guide.md b/inst/doc/practical_guide.md index 70b29a6..2ad00b5 100644 --- a/inst/doc/practical_guide.md +++ b/inst/doc/practical_guide.md @@ -14,7 +14,8 @@ If you would like to start using startR rightaway on the BSC infrastructure, you 2. [**Step() and AddStep()**](#4-2-step-and-addstep) 3. [**Compute()**](#4-3-compute) 1. [**Compute() locally**](#4-3-1-compute-locally) - 2. [**Compute() on HPCs**](#4-3-2-compute-on-hpcs) + 2. [**Compute() on HPCs with ecFlow**](#4-3-2-compute-on-hpcs-with-ecflow) + 3. [**Compute() on HPCs with Autosubmit**](#4-3-2-compute-on-hpcs-with-autosubmit) 4. [**Collect() and the EC-Flow GUI**](#4-4-collect-and-the-ec-flow-gui) 5. [**Additional information**](#5-additional-information) 1. [**How to choose the number of chunks, jobs and cores**](#5-1-how-to-choose-the-number-of-chunks-jobs-and-cores) @@ -566,9 +567,12 @@ res <- Compute(wf, * max: 8.03660178184509 ``` -#### 4-3-2. Compute() on HPCs +#### 4-3-2. Compute() on HPCs with ecFlow -In order to run the computation on a HPC, you will need to make sure the passwordless connection with the login node of that HPC is configured, as shown at the beginning of this guide. If possible, in both directions. Also, you will need to know whether there is a shared file system between your workstation and that HPC, and will need information on the number of nodes, cores per node, threads per core, RAM memory per node, and type of workload used by that HPC (Slurm, PBS and LSF supported). +We can use workflow manager (ecFlow or Autosubmit) to dispatch computation jobs on a HPC. +To use Autosubmit, check the next session. +You will need to make sure that the passwordless connection with the login node of that HPC is configured, as shown at the beginning of this guide. +If possible, in both directions. Also, you will need to know whether there is a shared file system between your workstation and that HPC, and will need information on the number of nodes, cores per node, threads per core, RAM memory per node, and type of workload used by that HPC (Slurm, PBS and LSF supported). You will need to add two parameters to your `Compute()` call: `cluster` and `ecflow_suite_dir`. @@ -689,6 +693,63 @@ As mentioned above in the definition of the `cluster` parameters, it is strongly You can find the `cluster` configuration for other HPCs at the end of this guide [Compute() cluster templates](#compute-cluster-templates) +#### 4-3-3. Compute() on HPCs with Autosubmit + +To use Autosubmit as workflow manager, add the following parameters to your `Compute()` call: + `cluster`, `autosubmit_suite_dir`, and `autosubmit_server`. + +`autosubmit_suite_dir`is the path where to store temporary files generated for +Autosubmit to establish the workflow. It should be found in both workstation and the Autosubmit machine. + +`autosubmit_server` is the login node of the Autosubmit machine, i.e., 'bscesautosubmit01'or 'bscesautosubmit02'. + +The parameter `cluster` expects a list of components that provide the configuration of Autosubmit machine. For now, the supported platforms are 'local' (run on Autosubmit machine) and 'nord3' (Autosubmit submits jobs to Nord3). +You can see one example of cluster configuration below. + +```r + res <- Compute(wf, chunks = list(sdate = 2), + threads_compute = 4, threads_load = 2, + cluster = list( + queue_host = 'nord3', + expid = , + hpc_user = "bsc32xxx", + r_module = "R/4.1.2-foss-2019b", + CDO_module = "CDO/1.9.8-foss-2019b", + autosubmit_module = 'autosubmit/4.0.0b-foss-2015a-Python-3.7.3', + cores_per_job = 4, + job_wallclock = '01:00:00', + max_jobs = 4 + ), + workflow_manager = 'autosubmit', + autosubmit_suite_dir = "/home/Earth//startR_local_autosubmit/", + autosubmit_server = 'bscesautosubmit01', + wait = TRUE + ) +``` + +The cluster components and options are explained next: + +- `queue_host`: Must match the platform name in Autosubmit configuration file `platforms.yml`, or 'local'. The provided platforms are: 'nord3'. +- `expid`: The Autosubmit experiment to run the computation. You can create the experiment beforehand or let startR create one for you by not specifying this componenet. +To have the good practice, note down the expid if it is automatically created by startR and re-use/delete it afterwards. + - `hpc_user`: Your user ID on the HPC (i.e., "bsc32xxx"). It is required if "queue_host" is not 'local'. +- `data_dir`: The path to the data repository if the data is not shared. +- `lib_dir`: directory on the HPC where the startR R package and other required R packages are installed, accessible from all HPC nodes. These installed packages must be compatible with the R module specified in `r_module`. This parameter is optional; only required when the libraries are not installed in the R module. +- `init_commands`: The initial commands in bash script before R script runs. For example, the modules required by computation can be loaded here. +- `r_module`: name of the UNIX environment module to be used for R. If not specified, `module load R` will be used. +- `CDO_module`: name of the UNIX environment module to be used for CDO. If not specified, it is NULL and no CDO module will be loaded. Make sure to assign it if `tranform` is required in Start(). +- `autosubmit_module`: The name of the Autosubmit module. If not specified, `module load autosubmit` will be used. +- `cores_per_job`: number of computing cores to be requested when submitting the job for each chunk to the HPC queue. Each node may be capable of supporting more than one computing thread. +- `job_wallclock`: amount of time to reserve the resources when submitting the job for each chunk. Must follow the specific format required by the specified `queue_type`. +- `max_jobs`: maximum number of jobs (chunks) to be queued simultaneously onto the HPC queue. Submitting too many jobs could overload the bandwidth between the HPC nodes and the storage system, or could overload the queue system. +- `extra_queue_params`: list of character strings for additional queue headers for the jobs +to be submitted to the HPC. For example, to constraint using medmem node ('#SBATCH --constraint=medmem'); to request an exclusive mode ('#SBATCH --exclusive'). +- `polling_period`: when the connection is unidirectional, the workstation will ask the HPC login node for results each `polling_period` seconds. An excessively small value can overload the login node or result in temporary banning. +- `special_setup`: name of the machine if the computation requires an special setup. Only Marenostrum 4 needs this parameter (e.g. special_setup = 'marenostrum4'). + +After the `Compute()` call is executed, you can monitor the status on [Autosubmit GUI](https://earth.bsc.es/autosubmitapp/). + + ### 4-4. Collect() and the EC-Flow GUI Usually, in use cases where large data inputs are involved, it is convenient to add the parameter `wait = FALSE` to your `Compute()` call. With this parameter, `Compute()` will immediately return an object with information about your startR execution. You will be able to store this object onto disk. After doing that, you will not need to worry in case your workstation turns off in the middle of the computation. You will be able to close your R session, and collect the results later on with the `Collect()` function. diff --git a/man/CDORemapper.Rd b/man/CDORemapper.Rd index 024ce32..5ced7cd 100644 --- a/man/CDORemapper.Rd +++ b/man/CDORemapper.Rd @@ -51,7 +51,7 @@ perform the interpolation, hence CDO is required to be installed. data_path <- system.file('extdata', package = 'startR') path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') sdates <- c('200011') - \donttest{ + \dontrun{ data <- Start(dat = list(list(path = path_obs)), var = 'tos', sdate = sdates, diff --git a/man/Collect.Rd b/man/Collect.Rd index 97b529b..d90caca 100644 --- a/man/Collect.Rd +++ b/man/Collect.Rd @@ -15,25 +15,27 @@ the RDS file.} Collect() call to finish (TRUE) or not (FALSE). If TRUE, it will be a blocking call, in which Collect() will retrieve information from the HPC, including signals and outputs, each polling_period seconds. The the status -can be monitored on the EC-Flow GUI. Collect() will not return until the -results of all chunks have been received. If FALSE, Collect() will crash with -an error if the execution has not finished yet, otherwise it will return the -merged array. The default value is TRUE.} +can be monitored on the workflow manager GUI. Collect() will not return +until the results of all the chunks have been received. If FALSE, Collect() +return an error if the execution has not finished, otherwise it will return +the merged array. The default value is TRUE.} -\item{remove}{A logical value deciding whether to remove of all data results -received from the HPC (and stored under 'ecflow_suite_dir', the parameter in -Compute()) after being collected. To preserve the data and Collect() it as -many times as desired, set remove to FALSE. The default value is TRUE.} +\item{remove}{A logical value deciding whether to remove of all chunk results +received from the HPC after data being collected, as well as the local job +folder under 'ecflow_suite_dir' or 'autosubmit_suite_dir'. To preserve the +data and Collect() them as many times as desired, set remove to FALSE. The +default value is TRUE.} } \value{ A list of merged data array. } \description{ The final step of the startR workflow after the data operation. It is used when -the parameter 'wait' of Compute() is FALSE, and the functionality includes -updating the job status shown on the EC-Flow GUI and collecting all the chunks -of results as one data array when the execution is done. See more details on -\href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab}. +the parameter 'wait' of Compute() is FALSE. It combines all the chunks of the +results as one data array when the execution is done. See more details on +\href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{practical guide}. +Collect() calls Collect_ecflow() or Collect_autosubmit() according to the +chosen workflow manager. } \examples{ data_path <- system.file('extdata', package = 'startR') diff --git a/man/Compute.Rd b/man/Compute.Rd index 5b03abd..f6ad867 100644 --- a/man/Compute.Rd +++ b/man/Compute.Rd @@ -7,11 +7,14 @@ Compute( workflow, chunks = "auto", + workflow_manager = "ecFlow", threads_load = 1, threads_compute = 1, cluster = NULL, ecflow_suite_dir = NULL, ecflow_server = NULL, + autosubmit_suite_dir = NULL, + autosubmit_server = NULL, silent = FALSE, debug = FALSE, wait = TRUE @@ -28,6 +31,8 @@ those not required as the target dimension in function Step(). The default value is 'auto', which lists all the non-target dimensions and each one has one chunk.} +\item{workflow_manager}{Can be NULL, 'ecFlow' or 'Autosubmit'. The default is 'ecFlow'.} + \item{threads_load}{An integer indicating the number of execution threads to use for the data retrieval stage. The default value is 1.} -- GitLab From 76cd9e1c8c3f9d38d0cb31c7f0b67e1b732074cc Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 11 Aug 2023 16:40:04 +0200 Subject: [PATCH 28/46] fix link --- inst/doc/practical_guide.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/inst/doc/practical_guide.md b/inst/doc/practical_guide.md index 2ad00b5..dead996 100644 --- a/inst/doc/practical_guide.md +++ b/inst/doc/practical_guide.md @@ -15,7 +15,7 @@ If you would like to start using startR rightaway on the BSC infrastructure, you 3. [**Compute()**](#4-3-compute) 1. [**Compute() locally**](#4-3-1-compute-locally) 2. [**Compute() on HPCs with ecFlow**](#4-3-2-compute-on-hpcs-with-ecflow) - 3. [**Compute() on HPCs with Autosubmit**](#4-3-2-compute-on-hpcs-with-autosubmit) + 3. [**Compute() on HPCs with Autosubmit**](#4-3-3-compute-on-hpcs-with-autosubmit) 4. [**Collect() and the EC-Flow GUI**](#4-4-collect-and-the-ec-flow-gui) 5. [**Additional information**](#5-additional-information) 1. [**How to choose the number of chunks, jobs and cores**](#5-1-how-to-choose-the-number-of-chunks-jobs-and-cores) @@ -695,10 +695,10 @@ You can find the `cluster` configuration for other HPCs at the end of this guide #### 4-3-3. Compute() on HPCs with Autosubmit -To use Autosubmit as workflow manager, add the following parameters to your `Compute()` call: +To use Autosubmit as workflow manager, add the following parameters to your Compute() call: `cluster`, `autosubmit_suite_dir`, and `autosubmit_server`. -`autosubmit_suite_dir`is the path where to store temporary files generated for +`autosubmit_suite_dir` is the path where to store temporary files generated for Autosubmit to establish the workflow. It should be found in both workstation and the Autosubmit machine. `autosubmit_server` is the login node of the Autosubmit machine, i.e., 'bscesautosubmit01'or 'bscesautosubmit02'. @@ -747,7 +747,7 @@ to be submitted to the HPC. For example, to constraint using medmem node ('#SBAT - `polling_period`: when the connection is unidirectional, the workstation will ask the HPC login node for results each `polling_period` seconds. An excessively small value can overload the login node or result in temporary banning. - `special_setup`: name of the machine if the computation requires an special setup. Only Marenostrum 4 needs this parameter (e.g. special_setup = 'marenostrum4'). -After the `Compute()` call is executed, you can monitor the status on [Autosubmit GUI](https://earth.bsc.es/autosubmitapp/). +After the Compute() call is executed, you can monitor the status on [Autosubmit GUI](https://earth.bsc.es/autosubmitapp/). ### 4-4. Collect() and the EC-Flow GUI -- GitLab From 6b4dbeef1c8b48ffb220d8452628acfa60f649a3 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 17 Aug 2023 11:41:44 +0200 Subject: [PATCH 29/46] Contraint max waiting job to 366 --- R/Utils.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/Utils.R b/R/Utils.R index c1d738f..c693db4 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -951,6 +951,8 @@ write_autosubmit_confs <- function(chunks, cluster, autosubmit_suite_dir) { #Q: Should it be the total amount of chunk? conf$config$MAXWAITINGJOBS <- as.integer(prod(unlist(chunks))) # total amount of chunk + #NOTE: Nord3 max. amount of queued jobs is 366 + if (conf$config$MAXWAITINGJOBS > 366) conf$config$MAXWAITINGJOBS <- 366 conf$config$TOTALJOBS <- as.integer(cluster$max_jobs) ############################################################ -- GitLab From ef114bc0c5da472a58d8160e066571af55a408d9 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 17 Aug 2023 18:02:46 +0200 Subject: [PATCH 30/46] Use threads_compute instead of cores_per_job --- R/ByChunks_autosubmit.R | 25 ++++++++++++++----------- R/Utils.R | 7 +++---- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index af411d9..133f467 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -289,15 +289,18 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', stop("The component 'autosubmit_module' of the parameter 'cluster' must be a character string.") } ### cores_per_job - if (is.null(cluster[['cores_per_job']])) { - cluster[['cores_per_job']] <- threads_compute - } - if (!is.numeric(cluster[['cores_per_job']])) { - stop("The component 'cores_per_job' of the parameter 'cluster' must be numeric.") - } - cluster[['cores_per_job']] <- round(cluster[['cores_per_job']]) - if (cluster[['cores_per_job']] > threads_compute) { - .message("WARNING: 'threads_compute' should be >= cluster[['cores_per_job']].") +# if (is.null(cluster[['cores_per_job']])) { +# cluster[['cores_per_job']] <- threads_compute +# } +# if (!is.numeric(cluster[['cores_per_job']])) { +# stop("The component 'cores_per_job' of the parameter 'cluster' must be numeric.") +# } +# cluster[['cores_per_job']] <- round(cluster[['cores_per_job']]) +# if (cluster[['cores_per_job']] > threads_compute) { +# .message("WARNING: 'threads_compute' should be >= cluster[['cores_per_job']].") +# } + if (!is.null(cluster[['cores_per_job']])) { + .warning("The component 'cores_per_job' in cluster list is not used. Please specify the cores by parameter 'threads_compute'.") } ### job_wallclock tmp <- strsplit( '01:00:00', ':')[[1]] @@ -530,7 +533,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', write_autosubmit_bash(chunks, cluster, autosubmit_suite_dir = autosubmit_suite_dir) # Modify conf files from template and rewrite to /esarchive/autosubmit/expid/conf/ - write_autosubmit_confs(chunks, cluster, autosubmit_suite_dir) + write_autosubmit_confs(chunks, threads_compute, cluster, autosubmit_suite_dir) # Iterate through chunks chunk_array <- array(1:prod(unlist(chunks)), dim = (unlist(chunks))) @@ -547,7 +550,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', } - timings[['cores_per_job']] <- cluster[['cores_per_job']] +# timings[['cores_per_job']] <- cluster[['cores_per_job']] timings[['concurrent_chunks']] <- cluster[['max_jobs']] t_end_bychunks_setup <- Sys.time() diff --git a/R/Utils.R b/R/Utils.R index c693db4..f496333 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -928,7 +928,7 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { } # This function generates the .yml files under autosubmit conf/ -write_autosubmit_confs <- function(chunks, cluster, autosubmit_suite_dir) { +write_autosubmit_confs <- function(chunks, threads_compute, cluster, autosubmit_suite_dir) { #TODO: Remove this library(configr) # "chunks" is from Compute() input, e.g., chunks <- list(lat = 2, lon = 3) @@ -975,8 +975,7 @@ write_autosubmit_confs <- function(chunks, cluster, autosubmit_suite_dir) { # wallclock from '01:00:00' to '01:00' jobs[[1]]$WALLCLOCK <- substr(cluster$job_wallclock, 1, 5) jobs[[1]]$PLATFORM <- cluster$queue_host - #Q: Is it cores_per_job? - jobs[[1]]$THREADS <- as.integer(cluster$cores_per_job) + jobs[[1]]$THREADS <- as.integer(threads_compute) jobs[[1]][paste0(names(chunks), "_N")] <- as.integer(unlist(chunks)) jobs[[1]][names(chunks)] <- "" @@ -996,7 +995,7 @@ write_autosubmit_confs <- function(chunks, cluster, autosubmit_suite_dir) { } else if (conf_type == "platforms") { if (tolower(cluster$queue_host) != "local") { conf$Platforms[[cluster$queue_host]]$USER <- cluster$hpc_user - conf$Platforms[[cluster$queue_host]]$PROCESSORS_PER_NODE <- as.integer(cluster$cores_per_job) + conf$Platforms[[cluster$queue_host]]$PROCESSORS_PER_NODE <- as.integer(threads_compute) if (!is.null(cluster$extra_queue_params)) { tmp <- unlist(cluster$extra_queue_params) for (ii in 1:length(tmp)) { -- GitLab From 2d08116a6295a5bcbd51e97d8dc9c7e0aace3320 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 21 Aug 2023 19:10:19 +0200 Subject: [PATCH 31/46] Fix unit tests for deleted files --- tests/testthat/test-Compute-chunk_split_dim.R | 10 +- ...st-Start-implicit_dependency_by_selector.R | 136 +++++++++--------- tests/testthat/test-Start-multiple-sdates.R | 4 +- tests/testthat/test-Start-split-merge.R | 6 +- 4 files changed, 78 insertions(+), 78 deletions(-) diff --git a/tests/testthat/test-Compute-chunk_split_dim.R b/tests/testthat/test-Compute-chunk_split_dim.R index 09da160..0e82126 100644 --- a/tests/testthat/test-Compute-chunk_split_dim.R +++ b/tests/testthat/test-Compute-chunk_split_dim.R @@ -116,13 +116,13 @@ c(lon = 3, dat = 1, var = 1, sdate = 4, time = 3, lat = 2) test_that("2. The files are repeated", { -ecmwf_path_hc <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily/$var$_f24h/$sdate$/$var$_$syear$.nc') +ecmwf_path_hc <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily_mean/$var$_f6h/$sdate$/$var$_$syear$.nc') obs_path <-paste0('/esarchive/recon/ecmwf/era5/daily/$var$-r240x121/$var$_$file_date$.nc') sdates.seq <- c("20161222","20161229", "20170105","20170112") suppressWarnings( hcst <- Start(dat = ecmwf_path_hc, - var = 'sfcWind', + var = 'tas', sdate = sdates.seq, syear = indices(1:2), #'all', time = 'all', @@ -207,15 +207,15 @@ aperm(res3, c(7, 2, 3, 4, 1, 5, 6)) ) expect_equal( dim(res1), -c(time = 47, dat = 1, var = 1, latitude = 1, longitude = 2, sdate = 4, syear = 2) +c(time = 46, dat = 1, var = 1, latitude = 1, longitude = 2, sdate = 4, syear = 2) ) expect_equal( dim(res2), -c(sdate = 4, dat = 1, var = 1, latitude = 1, longitude = 2, syear = 2, time = 47) +c(sdate = 4, dat = 1, var = 1, latitude = 1, longitude = 2, syear = 2, time = 46) ) expect_equal( dim(res3), -c(longitude = 2, dat = 1, var = 1, latitude = 1, sdate = 4, syear = 2, time = 47) +c(longitude = 2, dat = 1, var = 1, latitude = 1, sdate = 4, syear = 2, time = 46) ) diff --git a/tests/testthat/test-Start-implicit_dependency_by_selector.R b/tests/testthat/test-Start-implicit_dependency_by_selector.R index 10e5545..fd82aa7 100644 --- a/tests/testthat/test-Start-implicit_dependency_by_selector.R +++ b/tests/testthat/test-Start-implicit_dependency_by_selector.R @@ -8,74 +8,74 @@ #--------------------------------------------------- context("Start() implicit dependency by selector dimension") - -test_that("1. region with different index between files", { - -path <- paste0('/esarchive/exp/ecearth/a35b/diags/DCPP/EC-Earth-Consortium/', - 'EC-Earth3-HR/dcppA-hindcast/r1i1p1f1/Omon/$var$_mixed/gn/v20201107/', - '$var$_Omon_EC-Earth3-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_$chunk$.nc') - -# two sdates have different index for Nino3. -region <- array('Nino3', dim = c(sdate = 2, region = 1)) - -suppressWarnings( -data <- Start(dat = path, - var = 'tosmean', - sdate = c('1993', '2013'), - chunk = indices(1:2), - chunk_depends = 'sdate', - region = region, - time = 'all', - time_across = 'chunk', - merge_across_dims = TRUE, - return_vars = list(time = c('sdate', 'chunk'), - region = 'sdate'), - retrieve = T) -) -suppressWarnings( -data1 <- Start(dat = path, - var = 'tosmean', - sdate = c('1993'), - chunk = indices(1:2), - chunk_depends = 'sdate', - region = 'Nino3', - time = 'all', #c(1:length(forecast_month)), - time_across = 'chunk', - merge_across_dims = TRUE, - return_vars = list(time = c('sdate', 'chunk'), - region = NULL), - retrieve = T) -) -suppressWarnings( -data2 <- Start(dat = path, - var = 'tosmean', - sdate = c('2013'), - chunk = indices(1:2), - chunk_depends = 'sdate', - region = 'Nino3', - time = 'all', #c(1:length(forecast_month)), - time_across = 'chunk', - merge_across_dims = TRUE, - return_vars = list(time = c('sdate', 'chunk'), - region = NULL), - retrieve = T) -) - -expect_equal( -dim(data), -c(dat = 1, var = 1, sdate = 2, region = 1, time = 2) -) -expect_equal( -data[1, 1, 1, 1, ], -data1[1, 1, 1, 1, ] -) -expect_equal( -data[1, 1, 2, 1, ], -data2[1, 1, 1, 1, ] -) - - -}) +#NOTE: The files don't exist anymore. +#test_that("1. region with different index between files", { +# +#path <- paste0('/esarchive/exp/ecearth/a35b/diags/DCPP/EC-Earth-Consortium/', +# 'EC-Earth3-HR/dcppA-hindcast/r1i1p1f1/Omon/$var$_mixed/gn/v20201107/', +# '$var$_Omon_EC-Earth3-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_$chunk$.nc') +# +## two sdates have different index for Nino3. +#region <- array('Nino3', dim = c(sdate = 2, region = 1)) +# +#suppressWarnings( +#data <- Start(dat = path, +# var = 'tosmean', +# sdate = c('1993', '2013'), +# chunk = indices(1:2), +# chunk_depends = 'sdate', +# region = region, +# time = 'all', +# time_across = 'chunk', +# merge_across_dims = TRUE, +# return_vars = list(time = c('sdate', 'chunk'), +# region = 'sdate'), +# retrieve = T) +#) +#suppressWarnings( +#data1 <- Start(dat = path, +# var = 'tosmean', +# sdate = c('1993'), +# chunk = indices(1:2), +# chunk_depends = 'sdate', +# region = 'Nino3', +# time = 'all', #c(1:length(forecast_month)), +# time_across = 'chunk', +# merge_across_dims = TRUE, +# return_vars = list(time = c('sdate', 'chunk'), +# region = NULL), +# retrieve = T) +#) +#suppressWarnings( +#data2 <- Start(dat = path, +# var = 'tosmean', +# sdate = c('2013'), +# chunk = indices(1:2), +# chunk_depends = 'sdate', +# region = 'Nino3', +# time = 'all', #c(1:length(forecast_month)), +# time_across = 'chunk', +# merge_across_dims = TRUE, +# return_vars = list(time = c('sdate', 'chunk'), +# region = NULL), +# retrieve = T) +#) +# +#expect_equal( +#dim(data), +#c(dat = 1, var = 1, sdate = 2, region = 1, time = 2) +#) +#expect_equal( +#data[1, 1, 1, 1, ], +#data1[1, 1, 1, 1, ] +#) +#expect_equal( +#data[1, 1, 2, 1, ], +#data2[1, 1, 1, 1, ] +#) +# +# +#}) test_that("2. time depends on sdate", { diff --git a/tests/testthat/test-Start-multiple-sdates.R b/tests/testthat/test-Start-multiple-sdates.R index d0c4bd3..d4ab2b2 100644 --- a/tests/testthat/test-Start-multiple-sdates.R +++ b/tests/testthat/test-Start-multiple-sdates.R @@ -7,10 +7,10 @@ context("Start() multiple sdate with split + merge dim") # It might happen when reading experimental data with many start dates, # and the corresponding observations are required to have the same data structure. -ecmwf_path_hc <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily/$var$_f24h/$sdate$/$var$_$syear$.nc') +ecmwf_path_hc <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily_mean/$var$_f6h/$sdate$/$var$_$syear$.nc') obs_path <-paste0('/esarchive/recon/ecmwf/era5/daily/$var$-r240x121/$var$_$file_date$.nc') -var_name <- 'sfcWind' +var_name <- 'tas' var100_name <- 'windagl100' sdates.seq <- c("20161222","20161229","20170105","20170112") diff --git a/tests/testthat/test-Start-split-merge.R b/tests/testthat/test-Start-split-merge.R index 8793296..7133adc 100644 --- a/tests/testthat/test-Start-split-merge.R +++ b/tests/testthat/test-Start-split-merge.R @@ -1,7 +1,7 @@ context("Start() split + merge dim and value check") -var_name <- 'sfcWind' -path.exp <- '/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily/$var$_f24h/$sdate$/$var$_$syear$.nc' +var_name <- 'tas' +path.exp <- '/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily_mean/$var$_f6h/$sdate$/$var$_$syear$.nc' suppressWarnings( hcst <- Start(dat = path.exp, @@ -24,7 +24,7 @@ file_date <- sort(unique(gsub('-', '', sapply(as.character(dates), substr, 1, 7)))) path.obs <- '/esarchive/recon/ecmwf/era5/1hourly/$var$/$var$_$file_date$.nc' - +var_name <- "sfcWind" test_that("1. split + merge + narm", { suppressWarnings( -- GitLab From 2576319b8b1920a04c5fcba2829d80b8f487ff13 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 23 Aug 2023 13:08:19 +0200 Subject: [PATCH 32/46] Allow selectors to be integer --- R/SelectorChecker.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/SelectorChecker.R b/R/SelectorChecker.R index 7b69a8b..92e1d1b 100644 --- a/R/SelectorChecker.R +++ b/R/SelectorChecker.R @@ -50,7 +50,7 @@ SelectorChecker <- function(selectors, var = NULL, return_indices = TRUE, crescent_selectors <- TRUE if (all(sapply(selectors, function(x) { - any(c('numeric', "POSIXct", "POSIXlt", "POSIXt", "Date") %in% class(x)) + any(c("numeric", "integer", "POSIXct", "POSIXlt", "POSIXt", "Date") %in% class(x)) }))) { if (selectors[[2]] < selectors[[1]]) { crescent_selectors <- FALSE -- GitLab From d9e74f738e47df6cb1111764c64b0e279ac6444e Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 23 Aug 2023 18:54:21 +0200 Subject: [PATCH 33/46] Change unit test files to my scratch: /esarchive/scratch/aho/startR_unittest_files --- tests/testthat/test-AddStep-DimNames.R | 2 +- tests/testthat/test-Compute-CDORemap.R | 1 + tests/testthat/test-Compute-NumChunks.R | 5 +- .../testthat/test-Compute-chunk_depend_dim.R | 10 ++- tests/testthat/test-Compute-chunk_split_dim.R | 8 +- tests/testthat/test-Compute-extra_params.R | 2 + .../test-Compute-inconsistent_target_dim.R | 2 + .../testthat/test-Compute-irregular_regrid.R | 2 + tests/testthat/test-Compute-timedim.R | 1 + tests/testthat/test-Compute-transform_all.R | 7 +- .../testthat/test-Compute-transform_indices.R | 3 + .../testthat/test-Compute-transform_values.R | 38 +++++++-- tests/testthat/test-Compute-two_data.R | 4 + tests/testthat/test-Compute-use_attribute.R | 1 + .../testthat/test-Start-DCPP-across-depends.R | 14 +++- tests/testthat/test-Start-calendar.R | 11 +++ tests/testthat/test-Start-depends_values.R | 1 + .../testthat/test-Start-first_file_missing.R | 5 +- .../test-Start-global-lon-across_meridian.R | 1 + ...st-Start-implicit_dependency_by_selector.R | 2 + .../testthat/test-Start-implicit_inner_dim.R | 1 + .../testthat/test-Start-indices_list_vector.R | 15 ++-- .../testthat/test-Start-largest_dims_length.R | 6 +- .../test-Start-line_order-consistency.R | 1 + tests/testthat/test-Start-metadata_dims.R | 23 ++++-- .../test-Start-metadata_filedim_dependency.R | 1 + .../testthat/test-Start-metadata_reshaping.R | 81 +++++++++++-------- tests/testthat/test-Start-multiple-sdates.R | 6 +- .../test-Start-path_glob_permissive.R | 23 +++--- tests/testthat/test-Start-reorder-lat.R | 15 ++-- tests/testthat/test-Start-reorder-latCoarse.R | 1 + .../test-Start-reorder-lon-180to180.R | 1 + ...st-Start-reorder-lon-transform_-180to180.R | 1 + .../test-Start-reorder-lon-transform_0to360.R | 2 + tests/testthat/test-Start-reorder-lon0to360.R | 1 + .../test-Start-reorder-lon0to360Coarse.R | 1 + tests/testthat/test-Start-reorder-retrieve.R | 2 + tests/testthat/test-Start-reorder_all.R | 2 + tests/testthat/test-Start-reorder_indices.R | 2 + tests/testthat/test-Start-reshape.R | 8 +- tests/testthat/test-Start-return_vars_name.R | 1 + tests/testthat/test-Start-split-merge.R | 3 + tests/testthat/test-Start-time_unit.R | 6 +- tests/testthat/test-Start-transform-all.R | 8 +- tests/testthat/test-Start-transform-border.R | 23 +++--- .../test-Start-transform-lat-Sort-all.R | 3 +- .../test-Start-transform-lat-Sort-indices.R | 2 + .../test-Start-transform-lat-Sort-values.R | 2 + ...test-Start-transform-lon-across_meridian.R | 2 + .../testthat/test-Start-transform-metadata.R | 2 + .../test-Start-transform-three-selectors.R | 2 + tests/testthat/test-Start-two_dats.R | 2 + .../testthat/test-Start-values_list_vector.R | 20 +++-- 53 files changed, 280 insertions(+), 109 deletions(-) diff --git a/tests/testthat/test-AddStep-DimNames.R b/tests/testthat/test-AddStep-DimNames.R index 2fe6b39..647ca2f 100644 --- a/tests/testthat/test-AddStep-DimNames.R +++ b/tests/testthat/test-AddStep-DimNames.R @@ -3,7 +3,7 @@ context("Error with bad dimensions tests.") test_that("Single File - Local execution", { suppressWarnings( - data <- Start(dataset = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$sdate$$month$.nc', + data <- Start(dataset = '/esarchive/scratch/aho/startR_unittest_files/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$sdate$$month$.nc', var = 'tas', sdate = '2000', month = indices(1), diff --git a/tests/testthat/test-Compute-CDORemap.R b/tests/testthat/test-Compute-CDORemap.R index 28df234..b1479e2 100644 --- a/tests/testthat/test-Compute-CDORemap.R +++ b/tests/testthat/test-Compute-CDORemap.R @@ -3,6 +3,7 @@ context("Compute use CDORemap") test_that("ex2_3", { repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, diff --git a/tests/testthat/test-Compute-NumChunks.R b/tests/testthat/test-Compute-NumChunks.R index 9e626e4..5d9a775 100644 --- a/tests/testthat/test-Compute-NumChunks.R +++ b/tests/testthat/test-Compute-NumChunks.R @@ -3,8 +3,11 @@ context("Number of chunks tests.") test_that("Single File - Local execution", { +path <- '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$sdate$$month$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -data <- Start(dataset = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$sdate$$month$.nc', +data <- Start(dataset = path, var = 'tas', sdate = '2000', month = indices(1), diff --git a/tests/testthat/test-Compute-chunk_depend_dim.R b/tests/testthat/test-Compute-chunk_depend_dim.R index ce92b94..9c78764 100644 --- a/tests/testthat/test-Compute-chunk_depend_dim.R +++ b/tests/testthat/test-Compute-chunk_depend_dim.R @@ -12,6 +12,8 @@ path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/', 'HadGEM3-GC31-MM/dcppA-hindcast/', 'r1i1p1f2/Omon/tos/gn/v20200417/', '$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc') +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + sdates <- c('2016', '2017', '2018') # retrieve = T for verification @@ -149,7 +151,9 @@ test_that("2.a. depending dim is indices(); depended dim is indices()", { suppressWarnings( data <- Start(dat = path, var = 'tos', - sdate = indices(57:59), # 2016, 2017, 2018 +#NOTE: sdate was indices(57:59) if path is /esarchive/. Now the path is under my scratch, +# the first sdate is 2016. + sdate = indices(1:3), # 2016, 2017, 2018 chunk = indices(2:4), chunk_depends = 'sdate', time = 'all', @@ -204,7 +208,9 @@ expect_error( suppressWarnings( Start(dat = path, var = 'tos', - sdate = indices(57:59), # 2016, 2017, 2018 +#NOTE: sdate was indices(57:59) if path is /esarchive/. Now the path is under my scratch, +# the first sdate is 2016. + sdate = indices(1:3), # 2016, 2017, 2018 chunk = chunks, chunk_depends = 'sdate', time = 'all', diff --git a/tests/testthat/test-Compute-chunk_split_dim.R b/tests/testthat/test-Compute-chunk_split_dim.R index 0e82126..a40f745 100644 --- a/tests/testthat/test-Compute-chunk_split_dim.R +++ b/tests/testthat/test-Compute-chunk_split_dim.R @@ -8,6 +8,7 @@ test_that("1. The files are not repeated", { repos_exp <- paste0('/esarchive/exp/ecearth/a1tr/cmorfiles/CMIP/EC-Earth-Consortium/', 'EC-Earth3/historical/r24i1p1f1/Amon/$var$/gr/v20190312/', '$var$_Amon_EC-Earth3_historical_r24i1p1f1_gr_$sdate$01-$sdate$12.nc') +repos_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_exp) suppressWarnings( exp <- Start(dat = repos_exp, @@ -32,6 +33,8 @@ dates <- attr(exp, 'Variables')$common$time # 4 3 repos_obs <- '/esarchive/recon/ecmwf/erainterim/monthly_mean/$var$_f6h/$var$_$date$.nc' +repos_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_obs) + suppressWarnings( obs <- Start(dat = repos_obs, var = 'tas', @@ -117,7 +120,10 @@ c(lon = 3, dat = 1, var = 1, sdate = 4, time = 3, lat = 2) test_that("2. The files are repeated", { ecmwf_path_hc <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily_mean/$var$_f6h/$sdate$/$var$_$syear$.nc') -obs_path <-paste0('/esarchive/recon/ecmwf/era5/daily/$var$-r240x121/$var$_$file_date$.nc') +ecmwf_path_hc <- paste0('/esarchive/scratch/aho/startR_unittest_files/', ecmwf_path_hc) +obs_path <- paste0('/esarchive/recon/ecmwf/era5/daily/$var$-r240x121/$var$_$file_date$.nc') +obs_path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', obs_path) + sdates.seq <- c("20161222","20161229", "20170105","20170112") suppressWarnings( diff --git a/tests/testthat/test-Compute-extra_params.R b/tests/testthat/test-Compute-extra_params.R index 9b42e43..02eab30 100644 --- a/tests/testthat/test-Compute-extra_params.R +++ b/tests/testthat/test-Compute-extra_params.R @@ -7,6 +7,8 @@ test_that("ex2_6", { # Prepare sdates and paths #========================= dataset <- "/esarchive/recon/ecmwf/era5/daily_mean/$var$_f1h/$var$_$sdate$.nc" + dataset <- paste0('/esarchive/scratch/aho/startR_unittest_files/', dataset) + sdates <- paste0(1981:1982, rep(10:12, 2)) #=================== # Get daily winds diff --git a/tests/testthat/test-Compute-inconsistent_target_dim.R b/tests/testthat/test-Compute-inconsistent_target_dim.R index 13d2a44..7ebc6f5 100644 --- a/tests/testthat/test-Compute-inconsistent_target_dim.R +++ b/tests/testthat/test-Compute-inconsistent_target_dim.R @@ -6,7 +6,9 @@ context("Compute()/ByChunks(): dimension consistence check") test_that("ex2_11", { path.exp <- '/esarchive/exp/ecmwf/system5c3s/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path.exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path.exp) path.obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$date$.nc' +path.obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path.obs) var <- 'tos' y1 <- 1981 diff --git a/tests/testthat/test-Compute-irregular_regrid.R b/tests/testthat/test-Compute-irregular_regrid.R index 00a5c1d..c76793d 100644 --- a/tests/testthat/test-Compute-irregular_regrid.R +++ b/tests/testthat/test-Compute-irregular_regrid.R @@ -7,6 +7,8 @@ test_that("1. ex2_13", { path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/', 'DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/$member$/Omon/$var$/gn/v20200101/', '$var$_*_s$sdate$-$member$_gn_$aux$.nc') +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( data <- Start(dataset = path, var = 'tos', diff --git a/tests/testthat/test-Compute-timedim.R b/tests/testthat/test-Compute-timedim.R index 80d96ff..d63ae6c 100644 --- a/tests/testthat/test-Compute-timedim.R +++ b/tests/testthat/test-Compute-timedim.R @@ -3,6 +3,7 @@ context("Compute on time dimension") test_that("ex2_1", { repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, diff --git a/tests/testthat/test-Compute-transform_all.R b/tests/testthat/test-Compute-transform_all.R index e6363f4..a7a67dd 100644 --- a/tests/testthat/test-Compute-transform_all.R +++ b/tests/testthat/test-Compute-transform_all.R @@ -4,6 +4,8 @@ test_that("1. Chunk along non-lat/lon dim", { #skip_on_cran() path <- '/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/Amon/$var$/gr/v20190713/$var$_Amon_*_s$sdate$-$member$_gr_$fyear$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( data <- Start(dat = path, var = 'tos', @@ -54,8 +56,11 @@ test_that("2. chunk along lon", { #NOTE: the results are not identical when exp has extra cells = 2 +path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), diff --git a/tests/testthat/test-Compute-transform_indices.R b/tests/testthat/test-Compute-transform_indices.R index 34ddf48..37decfc 100644 --- a/tests/testthat/test-Compute-transform_indices.R +++ b/tests/testthat/test-Compute-transform_indices.R @@ -25,6 +25,7 @@ test_that("1. global", { #skip_on_cran() path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) #----------------------------------- # crop = region @@ -149,6 +150,7 @@ test_that("2. regional, no border", { #skip_on_cran() path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) # crop = region suppressWarnings( @@ -248,6 +250,7 @@ test_that("3. regional, at lon border", { #skip_on_cran() path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) # crop = region suppressWarnings( diff --git a/tests/testthat/test-Compute-transform_values.R b/tests/testthat/test-Compute-transform_values.R index e6b6c26..191d651 100644 --- a/tests/testthat/test-Compute-transform_values.R +++ b/tests/testthat/test-Compute-transform_values.R @@ -17,8 +17,11 @@ lats.max <- 90 # crop = region #NOTE: res1 and res3 differ if extra_cells = 2. But if retrieve = T, extra_cells = 2 or 8 is equal. +path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -95,6 +98,8 @@ tolerance = 0.001 # crop = region, selector is values(c()) library(easyNCDF) pathh <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +pathh <- paste0('/esarchive/scratch/aho/startR_unittest_files/', pathh) + file <- NcOpen(pathh) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -106,8 +111,11 @@ lons <- NcToArray(file, dim_indices = list(longitude = 1:1296), vars_to_read = 'longitude') NcClose(file) +path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -152,8 +160,12 @@ lons.min <- -180 lons.max <- 179.9 lats.min <- -90 lats.max <- 90 + +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -242,8 +254,11 @@ lats.min <- 20 lats.max <- 40 # crop = region +path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', #paste0(2000:2001, '0101'), ensemble = indices(1), #'all', @@ -326,8 +341,11 @@ lats.min <- 21 lats.max <- 41 # crop = region +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', #paste0(2000:2001, '0101'), ensemble = indices(1), #'all', @@ -427,8 +445,11 @@ lats.max <- 40 #NOTE: transform_extra_cells = 8 the results are not equal # crop = region +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -520,8 +541,11 @@ tolerance = 0.001 # crop = region, CircularSort(-180, 180) #NOTE: transform_extra_cells = 8 the results are not equal +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), diff --git a/tests/testthat/test-Compute-two_data.R b/tests/testthat/test-Compute-two_data.R index 9cb7145..735735f 100644 --- a/tests/testthat/test-Compute-two_data.R +++ b/tests/testthat/test-Compute-two_data.R @@ -5,6 +5,8 @@ test_that("ex2_7", { # exp data repos <- paste0('/esarchive/exp/ecmwf/system4_m1/monthly_mean/', '$var$_f6h/$var$_$sdate$.nc') + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + sdates <- sapply(2013:2014, function(x) paste0(x, sprintf('%02d', 1:12), '01')) suppressWarnings( @@ -24,6 +26,8 @@ suppressWarnings( # obs data repos_obs <- paste0('/esarchive/recon/ecmwf/erainterim/monthly_mean/', '$var$_f6h/$var$_$sdate$.nc') + repos_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_obs) + sdates_obs <- (sapply(2012:2013, function(x) paste0(x, sprintf('%02d', 1:12)))) suppressWarnings( obs <- Start(dat = repos_obs, diff --git a/tests/testthat/test-Compute-use_attribute.R b/tests/testthat/test-Compute-use_attribute.R index 2ca73a7..7ec3dc2 100644 --- a/tests/testthat/test-Compute-use_attribute.R +++ b/tests/testthat/test-Compute-use_attribute.R @@ -3,6 +3,7 @@ context("Compute use attributes") test_that("ex2_2", { repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, diff --git a/tests/testthat/test-Start-DCPP-across-depends.R b/tests/testthat/test-Start-DCPP-across-depends.R index a3d9586..c561abd 100644 --- a/tests/testthat/test-Start-DCPP-across-depends.R +++ b/tests/testthat/test-Start-DCPP-across-depends.R @@ -1,6 +1,7 @@ context("DCPP successfull retrieved for depends and across parameters.") test_that("Chunks of DCPP files- Local execution", { path <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc' + path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) sdates <- c('2017', '2018') suppressWarnings( @@ -19,24 +20,31 @@ suppressWarnings( ) # [sdate = 2, chunk = 3] +path <- "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r1i1p1f2_gn_202201-202212.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -dat_2018_chunk3 <- Start(dat = '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r1i1p1f2_gn_202201-202212.nc', +dat_2018_chunk3 <- Start(dat = path, var = 'tos', time = 'all', i = indices(1:10), j = indices(1:10), retrieve = TRUE) ) expect_equal(dat[1,1,2,25:36,,], dat_2018_chunk3[1,1,,,]) # [sdate = 1, chunk = 2] +path <- "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2017-r1i1p1f2_gn_202001-202012.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) suppressWarnings( -dat_2017_chunk2 <- Start(dat = '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2017-r1i1p1f2_gn_202001-202012.nc', +dat_2017_chunk2 <- Start(dat = path, var = 'tos', time = 'all', i = indices(1:10), j = indices(1:10), retrieve = TRUE) ) expect_equal(dat[1,1,1,13:24,,], dat_2017_chunk2[1,1,,,]) # [sdate = 2, chunk = 1] +path <- "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r1i1p1f2_gn_202001-202012.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) suppressWarnings( -dat_2018_chunk1 <- Start(dat = '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r1i1p1f2_gn_202001-202012.nc', +dat_2018_chunk1 <- Start(dat = path, var = 'tos', time = 'all', i = indices(1:10), j = indices(1:10), retrieve = TRUE) ) diff --git a/tests/testthat/test-Start-calendar.R b/tests/testthat/test-Start-calendar.R index da63e53..0ee4c5e 100644 --- a/tests/testthat/test-Start-calendar.R +++ b/tests/testthat/test-Start-calendar.R @@ -5,6 +5,7 @@ test_that("1. 360_day, daily, unit = 'days since 1850-01-01'", { 'DCPP/MOHC/HadGEM3-GC31-MM/', 'dcppA-hindcast/r1i1p1f2/day/$var$/gn/v20200417/', '$var$_day_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$fyear$.nc') +path_hadgem3 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_hadgem3) sdate <- c('2000', '2001') fyear_hadgem3 <- indices(1) @@ -48,6 +49,7 @@ expect_equal( test_that("2. 365_day, daily, unit = 'days since 1984-01-01'", { path_bcc_csm2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/BCC-CSM2-MR/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200114/$var$_day_BCC-CSM2-MR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_19800101-19891231.nc' +path_bcc_csm2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_bcc_csm2) suppressWarnings( data <- Start(dat = path_bcc_csm2, @@ -82,6 +84,7 @@ test_that("3. standard, daily, unit = 'days since 1850-1-1 00:00:00'", { 'DCPP/MPI-M/MPI-ESM1-2-HR/', 'dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200101/', '$var$_day_MPI-ESM1-2-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_$fyear$.nc') + path_mpi_esm <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_mpi_esm) var <- 'tasmax' sdate <- '2000' @@ -122,6 +125,8 @@ test_that("4. standard, monthly, unit = 'days since 1850-1-1 00:00:00'", { 'DCPP/MPI-M/MPI-ESM1-2-HR/', 'dcppA-hindcast/r1i1p1f1/Amon/$var$/gn/v20200320/', '$var$_Amon_MPI-ESM1-2-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_200011-201012.nc') + path_mpi_esm <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_mpi_esm) + sdate <- '2000' fyear_mpi_esm <- paste0(sdate, '1101-', as.numeric(sdate) + 10, '1231') @@ -154,6 +159,7 @@ expect_equal( test_that("5. proleptic_gregorian, 6hrly, unit = 'hours since 2000-11-01 00:00:00'", { repos_obs <- paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', '$var$/$var$_199405.nc') + repos_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_obs) date <- paste0('1994-05-', sprintf('%02d', 1:31), ' 00:00:00') date <- as.POSIXct(date, tz = 'UTC') # attr(date, 'tzone') <- 'UTC' @@ -189,6 +195,8 @@ expect_equal( test_that("6. standard, monthly, unit = 'months since 1850-01-01 00:00:00'", { repos_obs <- '/esarchive/obs/ukmo/hadisst_v1.1/monthly_mean/$var$/$var$_$date$.nc' + repos_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_obs) + suppressWarnings( obs <- Start(dat = repos_obs, var = 'tos', @@ -246,6 +254,8 @@ c("1960-11-16 00:00:00", "1960-12-16 12:00:00", "1961-01-16 12:00:00", "1961-02- test_that("7. proleptic_gregorian, monthly, unit = 'days since 1850-1-1 00:00:00'", { repos <- '/esarchive/exp/mpi-esm-lr/cmip5-historical_i0p1/monthly_mean/$var$/$var$_$sdate$.nc' + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( data <- Start(dat = repos, var = 'tas', @@ -278,6 +288,7 @@ suppressWarnings( test_that("8. gregorian, 3hrly, unit = 'days since 1850-1-1'", { repos <- '/esarchive/exp/CMIP5/historical/ecearth/cmip5-historical_i0p1/$var$_3hr_EC-EARTH_historical_r6i1p1_$period$.nc' + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, var = 'vas', diff --git a/tests/testthat/test-Start-depends_values.R b/tests/testthat/test-Start-depends_values.R index 18d1b9f..e4e4adc 100644 --- a/tests/testthat/test-Start-depends_values.R +++ b/tests/testthat/test-Start-depends_values.R @@ -6,6 +6,7 @@ context("Start() using values() to define dependency relations") path <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) sdates <- c('2016', '2017', '2018') chunks <- array(dim = c(chunk = 3, sdate = 3)) diff --git a/tests/testthat/test-Start-first_file_missing.R b/tests/testthat/test-Start-first_file_missing.R index 392841a..9c232e6 100644 --- a/tests/testthat/test-Start-first_file_missing.R +++ b/tests/testthat/test-Start-first_file_missing.R @@ -7,6 +7,7 @@ context("Start() retrieves files that the first file is missing") # the data. The parameter 'metadata_dims' can also be used in this case. file <- "/esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/$var$_f24h/$var$_$file_date$.nc" +file <- paste0('/esarchive/scratch/aho/startR_unittest_files/', file) var <- 'tas' sdates1 <- c('20130611') #exists sdates2 <- c('20130618') #does not exist @@ -119,7 +120,7 @@ data <- Start(dat = file, ) expect_equal( as.vector(attr(data, 'NotFoundFiles')), - c("/esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20130618.nc", NA) + c("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20130618.nc", NA) ) }) @@ -149,7 +150,7 @@ data <- Start(dat = file, ) expect_equal( as.vector(attr(data, 'NotFoundFiles')), - c(NA, "/esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20130618.nc") + c(NA, "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20130618.nc") ) }) diff --git a/tests/testthat/test-Start-global-lon-across_meridian.R b/tests/testthat/test-Start-global-lon-across_meridian.R index 34c861f..0360629 100644 --- a/tests/testthat/test-Start-global-lon-across_meridian.R +++ b/tests/testthat/test-Start-global-lon-across_meridian.R @@ -5,6 +5,7 @@ context("Start() across_meridia global lon length check") test_that("first test", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) var <- 'tas' lon.min <- 0 lon.max <- 359.723 #360 diff --git a/tests/testthat/test-Start-implicit_dependency_by_selector.R b/tests/testthat/test-Start-implicit_dependency_by_selector.R index fd82aa7..4e89190 100644 --- a/tests/testthat/test-Start-implicit_dependency_by_selector.R +++ b/tests/testthat/test-Start-implicit_dependency_by_selector.R @@ -80,6 +80,7 @@ context("Start() implicit dependency by selector dimension") test_that("2. time depends on sdate", { repos <- '/esarchive/exp/ecmwf/system4_m1/daily_mean/$var$_f24h/$var$_$sdate$.nc' +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) sdates <- paste0(2001:2003, '0501') tmp <- as.POSIXct(sapply(2001:2003, function(x) paste0(x, '-05-', sprintf('%02d', 1:31))), tz = 'UTC') tmp <- array(tmp, dim = c(time = 31, sdate = 3)) @@ -122,6 +123,7 @@ test_that("3. region depends on member and sdate", { reg <- array('Nino3.4', dim = c(sdate = 3, memb = 2, region = 1)) path_SR <- paste0('/esarchive/exp/ecearth/a42y/diags/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$memb$/Omon/$var$/gn/v*/$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$memb$_gn_$chunk$.nc') +path_SR <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_SR) suppressWarnings( data <- Start(dat = path_SR, var = 'tosmean', diff --git a/tests/testthat/test-Start-implicit_inner_dim.R b/tests/testthat/test-Start-implicit_inner_dim.R index 3788af0..fcae53e 100644 --- a/tests/testthat/test-Start-implicit_inner_dim.R +++ b/tests/testthat/test-Start-implicit_inner_dim.R @@ -8,6 +8,7 @@ context("Start() implicit inner dimension") test_that("1. time = 1", { obs.path <- "/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$file_date$.nc" +obs.path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', obs.path) variable <- "prlr" dates_file <- c("201311","201312") diff --git a/tests/testthat/test-Start-indices_list_vector.R b/tests/testthat/test-Start-indices_list_vector.R index 82e5cb1..b225a0a 100644 --- a/tests/testthat/test-Start-indices_list_vector.R +++ b/tests/testthat/test-Start-indices_list_vector.R @@ -6,12 +6,15 @@ context("List of indices and vector of indices") +repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + test_that("1. transform", { # lat and lon are lists of indices suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -36,7 +39,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of indices suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -74,7 +77,7 @@ test_that("2. no transform", { # lat and lon are lists of indices suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -93,7 +96,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of indices suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -194,7 +197,7 @@ test_that("4. no transform, indices reverse", { # lat and lon are lists of indices suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -215,7 +218,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of indices suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), diff --git a/tests/testthat/test-Start-largest_dims_length.R b/tests/testthat/test-Start-largest_dims_length.R index b448f89..6a796a2 100644 --- a/tests/testthat/test-Start-largest_dims_length.R +++ b/tests/testthat/test-Start-largest_dims_length.R @@ -7,9 +7,10 @@ test_that("1. inconsistent member length", { # system3: 40 members. repos <- list(list(name = 'system5c3s', - path = "/esarchive/exp/ecmwf/system5c3s/monthly_mean/g500_f12h/$var$_$sdate$.nc"), + path = "/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/ecmwf/system5c3s/monthly_mean/g500_f12h/$var$_$sdate$.nc"), list(name = 'system3_m1-c3s', - path = "/esarchive/exp/cmcc/system3_m1-c3s/monthly_mean/g500_f12h/$var$_$sdate$.nc")) + path = "/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/cmcc/system3_m1-c3s/monthly_mean/g500_f12h/$var$_$sdate$.nc")) + # largest_dims_length = FALSE suppressWarnings( @@ -140,6 +141,7 @@ test_that("2. inconsistent time length, merge_across_dims = T", { path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/EC-Earth3/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/', '$member$/Amon/$var$/gr/v20210309/', '$var$_Amon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc') +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) suppressWarnings( data <- Start(dataset = path, diff --git a/tests/testthat/test-Start-line_order-consistency.R b/tests/testthat/test-Start-line_order-consistency.R index 6b797a8..8bf4564 100644 --- a/tests/testthat/test-Start-line_order-consistency.R +++ b/tests/testthat/test-Start-line_order-consistency.R @@ -4,6 +4,7 @@ context("Start() line order consistency check") variable <- "tas" obs.path <- "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_$file_date$.nc" + obs.path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', obs.path) dates_file <- "201702" lats.min <- -90 diff --git a/tests/testthat/test-Start-metadata_dims.R b/tests/testthat/test-Start-metadata_dims.R index 4251c71..569a28e 100644 --- a/tests/testthat/test-Start-metadata_dims.R +++ b/tests/testthat/test-Start-metadata_dims.R @@ -2,6 +2,7 @@ context("Start() metadata_dims check") test_that("1. One data set, one var", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = list(list(name = 'system5_m1', path = repos)), var = 'tas', @@ -81,7 +82,9 @@ suppressWarnings( test_that("2. Two data sets, one var", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos2) suppressWarnings( data <- Start(dat = list(list(name = 'system4_m1', path = repos2), list(name = 'system5_m1', path = repos)), @@ -186,6 +189,7 @@ test_that("3. One data set, two vars", { repos <- paste0('/esarchive/exp/ecearth/a1tr/cmorfiles/CMIP/EC-Earth-Consortium/', 'EC-Earth3/historical/r24i1p1f1/Amon/$var$/gr/v20190312/', '$var$_Amon_EC-Earth3_historical_r24i1p1f1_gr_185001-185012.nc') + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) var <- c('tas', 'clt') suppressWarnings( data <- Start(dat = repos, @@ -271,7 +275,9 @@ suppressWarnings( test_that("4. Two data sets, two vars", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos2) suppressWarnings( data <- Start(dat = list(list(name = 'system4_m1', path = repos2), list(name = 'system5_m1', path = repos)), @@ -527,7 +533,9 @@ suppressWarnings( test_that("5. Specify metadata_dims with another file dimension", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos2) suppressWarnings( data <- Start(dat = list(list(name = 'system4_m1', path = repos2), list(name = 'system5_m1', path = repos)), @@ -733,6 +741,7 @@ suppressWarnings( test_that("6. One data set, two vars from one file", { mask_path <- '/esarchive/autosubmit/con_files/mask.regions.Ec3.0_O1L46.nc' +mask_path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', mask_path) suppressWarnings( data <- Start(repos = mask_path, var = c('nav_lon', 'nav_lat'), @@ -781,8 +790,10 @@ data <- Start(repos = mask_path, test_that("7. Two data sets, while one is missing", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) # incorrect path. Therefore repos2 doesn't have any valid files repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f2h/$var$_$sdate$.nc" # correct one is _f6h + repos2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos2) var <- 'tas' suppressWarnings( data <- Start(dat = list(list(name = 'system4_m1', path = repos2), @@ -824,7 +835,7 @@ suppressWarnings( ) expect_equal( attr(data, 'Files'), - array(c(NA, "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20170101.nc"), + array(c(NA, "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20170101.nc"), dim = c(dat = 2, var = 1, sdate = 1)) ) @@ -865,7 +876,7 @@ suppressWarnings( ) expect_equal( attr(dataF, 'ExpectedFiles'), - array(c("/esarchive/exp/ecmwf/system4_m1/monthly_mean/tas_f2h/tas_20170101.nc", "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20170101.nc"), + array(c("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system4_m1/monthly_mean/tas_f2h/tas_20170101.nc", "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20170101.nc"), dim = c(dat = 2, var = 1, sdate = 1)) ) @@ -875,12 +886,12 @@ suppressWarnings( test_that("8. Two data sets, both have files but the first file is missing", { path_list <- list( MPI = list(name = 'MPI_ESM', - path = paste0('/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/', + path = paste0('/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/', 'DCPP/MPI-M/MPI-ESM1-2-HR/', 'dcppA-hindcast/$member$/day/$var$/gn/v20200101/', '$var$_day_MPI-ESM1-2-HR_dcppA-hindcast_s$sdate$-$member$_gn_$chunk$.nc')), Had = list(name = 'HadGEM3', - path = paste0('/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/', + path = paste0('/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/', 'DCPP/MOHC/HadGEM3-GC31-MM/', 'dcppA-hindcast/$member$/day/$var$/gn/v20200417/', '$var$_day_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-$member$_gn_$chunk$.nc'))) @@ -927,9 +938,9 @@ data <- Start(dataset = path_list, ) expect_equal( attr(data, 'Files'), - array(c("/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/r1i1p1f1/day/tasmin/gn/v20200101/tasmin_day_MPI-ESM1-2-HR_dcppA-hindcast_s2018-r1i1p1f1_gn_20181101-20281231.nc", + array(c("/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/r1i1p1f1/day/tasmin/gn/v20200101/tasmin_day_MPI-ESM1-2-HR_dcppA-hindcast_s2018-r1i1p1f1_gn_20181101-20281231.nc", NA, NA, NA, NA, NA, NA, - "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r2i1p1f2/day/tasmin/gn/v20200417/tasmin_day_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r2i1p1f2_gn_20181101-20181230.nc"), + "/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r2i1p1f2/day/tasmin/gn/v20200417/tasmin_day_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r2i1p1f2_gn_20181101-20181230.nc"), dim = c(dataset = 2, var = 1, member = 2, sdate = 1, chunk = 2)) ) diff --git a/tests/testthat/test-Start-metadata_filedim_dependency.R b/tests/testthat/test-Start-metadata_filedim_dependency.R index cfd7dfb..13cac47 100644 --- a/tests/testthat/test-Start-metadata_filedim_dependency.R +++ b/tests/testthat/test-Start-metadata_filedim_dependency.R @@ -6,6 +6,7 @@ context("Start() metadata filedim dependency") # Preparation: Get the time values repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index 7e9c280..92e831b 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -3,8 +3,11 @@ context("Start() metadata reshaping") test_that("1. time across fyear, fyear depends on sdate", { +repos <- "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +data <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -54,7 +57,7 @@ as.vector(seq(as.POSIXct('1962-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1962-12 # retrieve = FALSE suppressWarnings( -dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +dataF <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -82,8 +85,11 @@ dates test_that("2. time across fyear, only one sdate", { +repos <- "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +data <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -118,7 +124,7 @@ as.vector(seq(as.POSIXct('1961-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1961-12 #retrieve = FALSE suppressWarnings( -dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +dataF <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -145,8 +151,11 @@ dates test_that("3. time across fyear, fyear depends on sdate, 1st fyear is empty, 3rd fyear has more indices than 2nd one, 1964 is leap year", { +repos <- "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +data <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -193,7 +202,7 @@ as.vector(seq(as.POSIXct('1963-10-01 12:00:00', tz = 'UTC'), as.POSIXct('1964-03 suppressWarnings( -dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +dataF <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -228,9 +237,10 @@ datess <- as.POSIXct(array(datess, dim = c(time = 31, sdate = 8)), dates_file <- sort(unique(gsub('-', '', sapply(as.character(datess), substr, 1, 7)))) +repos <- "/esarchive/recon/ecmwf/erainterim/6hourly/$var$/$var$_$file_date$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( - data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), + data <- Start(dat = repos, var = 'tas', file_date = dates_file, time = values(datess), #[time = 31, sdate = 8] @@ -277,8 +287,7 @@ as.vector(datess) ) suppressWarnings( - dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), + dataF <- Start(dat = repos, var = 'tas', file_date = dates_file, time = values(datess), #[time = 31, sdate = 8] @@ -310,8 +319,12 @@ dates test_that("5. test 1 but merge_across_dims_narm = F", { + +repos <- "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +data <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -361,7 +374,7 @@ c(as.vector(seq(as.POSIXct('1962-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1962- ) suppressWarnings( -dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +dataF <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -392,9 +405,11 @@ test_that("6. split time dim only", { datess <- seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-07-14', tz = 'UTC'), by = 'days') datess <- as.POSIXct(array(datess, dim = c(time = 7, week = 2)), origin = '1970-01-01', tz = 'UTC') +repos <- '/esarchive/recon/ecmwf/erainterim/6hourly/$var$/$var$_199407.nc' +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_199407.nc'), +data <- Start(dat = repos, var = 'tas', # file_date = '199407', time = values(datess), #[time = 7, week = 2] @@ -439,8 +454,7 @@ as.vector(seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-07-14', tz ) suppressWarnings( -dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_199407.nc'), +dataF <- Start(dat = repos, var = 'tas', # file_date = '199407', time = values(datess), #[time = 7, week = 2] @@ -470,9 +484,10 @@ datess <- seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-08-31', tz datess <- as.POSIXct(array(datess, dim = c(time = 31, month = 2)), origin = '1970-01-01', tz = 'UTC') +repos <- '/esarchive/recon/ecmwf/erainterim/6hourly/$var$/$var$_$file_date$.nc' +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( -data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +data <- Start(dat = repos, var = 'tas', file_date = c('199407', '199408'), time = values(datess), #[time = 31, month = 2] @@ -518,8 +533,7 @@ as.vector(seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-08-31', tz ) suppressWarnings( -dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +dataF <- Start(dat = repos, var = 'tas', file_date = c('199407', '199408'), time = values(datess), #[time = 31, month = 2] @@ -549,9 +563,10 @@ test_that("8. split sdate dim", { file_date <- array(c(paste0(1993:1995, '07'), paste0(1993:1995, '08')), dim = c(syear = 3, smonth = 2)) +repos <- '/esarchive/recon/ecmwf/erainterim/6hourly/$var$/$var$_$file_date$.nc' +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( -data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +data <- Start(dat = repos, var = 'tas', file_date = file_date, #[syear = 3, smonth = 2] time = indices(1:2), @@ -596,8 +611,7 @@ seq(as.POSIXct('1993-08-01 06:00:00', tz = 'UTC'), as.POSIXct('1995-08-01 06:00: ) suppressWarnings( -dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +dataF <- Start(dat = repos, var = 'tas', file_date = file_date, #[syear = 3, smonth = 2] time = indices(1:2), @@ -617,8 +631,7 @@ dates # no return_vars suppressWarnings( -data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +data <- Start(dat = repos, var = 'tas', file_date = file_date, #[syear = 3, smonth = 2] time = indices(1:2), @@ -636,8 +649,7 @@ names(attributes(data)$Variables$common), ) suppressWarnings( -data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +data <- Start(dat = repos, var = 'tas', file_date = file_date, #[syear = 3, smonth = 2] time = indices(1:2), @@ -660,8 +672,11 @@ test_that("9. split file dim that contains 'time', and 'time' inner dim is impli dates_arr <- array(c(paste0(1961, '0', 1:5), paste0(1962, '0', 1:5)), dim = c(time = 5, syear = 2)) +repos <- "/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$file_date$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$file_date$.nc', +data <- Start(dat = repos, var = 'tas', file_date = dates_arr, # [syear, time] split_multiselected_dims = TRUE, @@ -704,7 +719,7 @@ as.vector(dates_arr) suppressWarnings( -dataF <- Start(dat = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$file_date$.nc', +dataF <- Start(dat = repos, var = 'tas', file_date = dates_arr, # [syear, time] split_multiselected_dims = TRUE, @@ -735,8 +750,10 @@ y2 <- seq(a, b, by = 'days') y2 <- y2[-3] # remove 28 Feb time_array <- array(c(y1, y2), dim = c(time = 3, file_date = 2)) time_array <- as.POSIXct(time_array, origin = '1970-01-01', tz = 'UTC') +repos <- "/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$file_date$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( -data <- Start(dat = "/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$file_date$.nc", +data <- Start(dat = repos, var = "tas", file_date = paste0(1994:1995, '1101'), #1996 is leap year time = time_array, #[time = 3, file_date = 2] @@ -772,7 +789,7 @@ as.vector(time_array) ) suppressWarnings( -dataF <- Start(dat = "/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$file_date$.nc", +dataF <- Start(dat = repos, var = "tas", file_date = paste0(1994:1995, '1101'), #1996 is leap year time = time_array, #[time = 3, file_date = 2] diff --git a/tests/testthat/test-Start-multiple-sdates.R b/tests/testthat/test-Start-multiple-sdates.R index d4ab2b2..6467a84 100644 --- a/tests/testthat/test-Start-multiple-sdates.R +++ b/tests/testthat/test-Start-multiple-sdates.R @@ -8,7 +8,9 @@ context("Start() multiple sdate with split + merge dim") # and the corresponding observations are required to have the same data structure. ecmwf_path_hc <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily_mean/$var$_f6h/$sdate$/$var$_$syear$.nc') +ecmwf_path_hc <- paste0('/esarchive/scratch/aho/startR_unittest_files/', ecmwf_path_hc) obs_path <-paste0('/esarchive/recon/ecmwf/era5/daily/$var$-r240x121/$var$_$file_date$.nc') +obs_path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', obs_path) var_name <- 'tas' var100_name <- 'windagl100' @@ -55,7 +57,7 @@ obs <- Start(dat = obs_path, ) expect_equal( dim(obs), - c(dat = 1, var = 1, latitude = 1, longitude = 1, sdate = 4, syear = 20, time = 47) + c(dat = 1, var = 1, latitude = 1, longitude = 1, sdate = 4, syear = 20, time = 46) ) expect_equal( obs[1, 1, 1, 1, 1, 1, 8:15], @@ -131,7 +133,7 @@ obs <- Start(dat = obs_path, expect_equal( dim(obs), - c(dat = 1, var = 1, latitude = 1, longitude = 1, sdate = 4, syear = 20, time = 47) + c(dat = 1, var = 1, latitude = 1, longitude = 1, sdate = 4, syear = 20, time = 46) ) expect_equal( obs[1, 1, 1, 1, 1, 1, 8:15], diff --git a/tests/testthat/test-Start-path_glob_permissive.R b/tests/testthat/test-Start-path_glob_permissive.R index ddd69be..e32d0b3 100644 --- a/tests/testthat/test-Start-path_glob_permissive.R +++ b/tests/testthat/test-Start-path_glob_permissive.R @@ -8,6 +8,7 @@ years <- paste0(c(1960:1961), '01-', c(1960:1961), '12') repos <- paste0('/esarchive/exp/ecearth/$expid$/diags/CMIP/EC-Earth-Consortium/', 'EC-Earth3/historical/*/Omon/$var$/gn/v*/', '$var$_Omon_EC-Earth3_historical_*_gn_$year$.nc') +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, var = 'tosmean', @@ -30,10 +31,10 @@ data <- Start(dat = repos, ) expect_equal( attr(data, 'Files'), - array(c("/esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196001-196012.nc", - "/esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196001-196012.nc", - "/esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196101-196112.nc", - "/esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196101-196112.nc"), + array(c("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196001-196012.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196001-196012.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196101-196112.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196101-196112.nc"), dim = c(dat = 1, var = 1, expid = 2, year = 2)) ) # NOTE: in R_3.2.0, the following test doesn't have dimension. In R_3.6.2 it does. @@ -48,6 +49,7 @@ data <- Start(dat = repos, repos <- paste0('/esarchive/exp/ecearth/$expid$/diags/CMIP/EC-Earth-Consortium/', 'EC-Earth3/historical/$member$/Omon/$var$/gn/v*/', '$var$_Omon_EC-Earth3_historical_*_gn_$year$.nc') +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, var = 'tosmean', @@ -72,10 +74,10 @@ data <- Start(dat = repos, ) expect_equal( attr(data, 'Files'), - array(c("/esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196001-196012.nc", - "/esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196001-196012.nc", - "/esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196101-196112.nc", - "/esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196101-196112.nc"), + array(c("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196001-196012.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196001-196012.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196101-196112.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196101-196112.nc"), dim = c(dat = 1, var = 1, expid = 2, year = 2, member = 1)) ) # NOTE: in R_3.2.0, the following test doesn't have dimension. In R_3.6.2 it does. @@ -138,6 +140,7 @@ test_that("2. tag at the end", { path <- "/esarchive/exp/ecmwf/system4_m1/6hourly/$var$/$var$_$year$0*.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) suppressWarnings( data <- Start(dat = path, var = "tas", @@ -152,8 +155,8 @@ data <- Start(dat = path, expect_equal( as.list(attr(data, 'ExpectedFiles')), -list("/esarchive/exp/ecmwf/system4_m1/6hourly/tas/tas_19940501.nc", - "/esarchive/exp/ecmwf/system4_m1/6hourly/tas/tas_19950101.nc") +list("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system4_m1/6hourly/tas/tas_19940501.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system4_m1/6hourly/tas/tas_19950101.nc") ) }) diff --git a/tests/testthat/test-Start-reorder-lat.R b/tests/testthat/test-Start-reorder-lat.R index 2fe5de9..0ac7701 100644 --- a/tests/testthat/test-Start-reorder-lat.R +++ b/tests/testthat/test-Start-reorder-lat.R @@ -13,6 +13,7 @@ context("Start() lat Reorder test") ############################################## path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) ## Origin longitude in file: [0:360] @@ -884,8 +885,10 @@ test_that("4-x-2-12-123-2-1-x", { # 1-1. no Sort(), NULL ## lat should be descending +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) suppressWarnings( -exp1_1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1_1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -904,7 +907,7 @@ exp1_1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$v # 1-2. Sort(), NULL ## lat should be ascending suppressWarnings( -exp1_2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1_2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -923,7 +926,7 @@ exp1_2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$v # 1-3. Sort(drcreasing = T), NULL ## lat should be descending suppressWarnings( -exp1_3 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1_3 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -971,7 +974,7 @@ as.vector(attr(exp1_3, 'Variables')$common$latitude) # 2-1. no Sort(), 'dat' ## lat should be descending suppressWarnings( -exp2_1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2_1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -990,7 +993,7 @@ exp2_1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$v # 2-2. Sort(), 'dat' ## lat should be ascending suppressWarnings( -exp2_2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2_2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -1009,7 +1012,7 @@ exp2_2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$v # 2-3. Sort(drcreasing = T), NULL ## lat should be descending suppressWarnings( -exp2_3 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2_3 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), diff --git a/tests/testthat/test-Start-reorder-latCoarse.R b/tests/testthat/test-Start-reorder-latCoarse.R index 6ca7b15..af9c2db 100644 --- a/tests/testthat/test-Start-reorder-latCoarse.R +++ b/tests/testthat/test-Start-reorder-latCoarse.R @@ -16,6 +16,7 @@ context("Start() lat Reorder test") ############################################## #path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' path_exp <- '/esarchive/exp/ncar/cesm-dple/monthly_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) ## Origin longitude in file: [0:358.75] step 1.25 degrees #288 values ## latitude: -90 o 90 {-90, -89.05759 ...} #192 values ############################################## diff --git a/tests/testthat/test-Start-reorder-lon-180to180.R b/tests/testthat/test-Start-reorder-lon-180to180.R index aa209b8..e0a066c 100644 --- a/tests/testthat/test-Start-reorder-lon-180to180.R +++ b/tests/testthat/test-Start-reorder-lon-180to180.R @@ -15,6 +15,7 @@ context("Start() lon Reorder non-transform -180to180 test") ## Origin longitude in file: [-179.71875:180] path_exp <- '/esarchive/recon/ecmwf/era5/original_files/reorder/daily_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) variable <- 'tas' sdate <- '199212' diff --git a/tests/testthat/test-Start-reorder-lon-transform_-180to180.R b/tests/testthat/test-Start-reorder-lon-transform_-180to180.R index 4351aa4..46da00e 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_-180to180.R +++ b/tests/testthat/test-Start-reorder-lon-transform_-180to180.R @@ -14,6 +14,7 @@ context("Start() lon Reorder transform -180to180 test") # 3-2 ## Origin longitude in file: [-179.71875:180] path_exp <- '/esarchive/recon/ecmwf/era5/original_files/reorder/daily_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) variable <- 'tas' sdate <- '199212' diff --git a/tests/testthat/test-Start-reorder-lon-transform_0to360.R b/tests/testthat/test-Start-reorder-lon-transform_0to360.R index 3d2047e..e05c731 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_0to360.R +++ b/tests/testthat/test-Start-reorder-lon-transform_0to360.R @@ -14,6 +14,8 @@ context("Start() lon Reorder transform 0to360 test") # 3-2 ## Origin longitude in file: [0:360] path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) + variable <- 'psl' sdate <- '19821201' diff --git a/tests/testthat/test-Start-reorder-lon0to360.R b/tests/testthat/test-Start-reorder-lon0to360.R index 340860a..84b0527 100644 --- a/tests/testthat/test-Start-reorder-lon0to360.R +++ b/tests/testthat/test-Start-reorder-lon0to360.R @@ -13,6 +13,7 @@ context("Start() lon Reorder non-transform 0to360 test") ############################################## # 3-2 path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) ## Origin longitude in file: [0:359.722222222222] diff --git a/tests/testthat/test-Start-reorder-lon0to360Coarse.R b/tests/testthat/test-Start-reorder-lon0to360Coarse.R index e093a88..16ad2e0 100644 --- a/tests/testthat/test-Start-reorder-lon0to360Coarse.R +++ b/tests/testthat/test-Start-reorder-lon0to360Coarse.R @@ -13,6 +13,7 @@ context("Start() lon Reorder non-transform 0to360 test") ############################################## # 3-2 path_exp <- '/esarchive/exp/ncar/cesm-dple/monthly_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) ## Origin longitude in file: [0:358.75] step 1.25 degrees #288 values ## latitude: -90 o 90 {-90, -89.05759 ...} #192 values diff --git a/tests/testthat/test-Start-reorder-retrieve.R b/tests/testthat/test-Start-reorder-retrieve.R index 42a79ce..28d8c79 100644 --- a/tests/testthat/test-Start-reorder-retrieve.R +++ b/tests/testthat/test-Start-reorder-retrieve.R @@ -7,6 +7,7 @@ context("Start() lon Reorder non-transform retrieve test") test_that("original range 0to360", { ## Origin longitude in file: [0:359.722222222222] path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) lons.min <- -2 lons.max <- 2 @@ -86,6 +87,7 @@ res2 <- Start(dat = path_exp, test_that("original range -180to180", { ## Origin longitude in file: [0:359.722222222222] path_exp <- '/esarchive/recon/ecmwf/era5/original_files/reorder/daily_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) variable <- 'tas' sdate <- '199212' diff --git a/tests/testthat/test-Start-reorder_all.R b/tests/testthat/test-Start-reorder_all.R index b8279de..fce2dc4 100644 --- a/tests/testthat/test-Start-reorder_all.R +++ b/tests/testthat/test-Start-reorder_all.R @@ -7,6 +7,7 @@ context("No transform, reorder test: 'all'") # cdo is used to verify the data values library(easyNCDF) path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -23,6 +24,7 @@ NcClose(file) #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. lat", { # lon_reorder = CircularSort(0, 360) diff --git a/tests/testthat/test-Start-reorder_indices.R b/tests/testthat/test-Start-reorder_indices.R index b2ca0ac..4027b78 100644 --- a/tests/testthat/test-Start-reorder_indices.R +++ b/tests/testthat/test-Start-reorder_indices.R @@ -6,6 +6,7 @@ context("No transform, reorder test: indices()") # cdo is used to verify the data values library(easyNCDF) path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -22,6 +23,7 @@ NcClose(file) #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. lat", { # lon_reorder = CircularSort(0, 360) diff --git a/tests/testthat/test-Start-reshape.R b/tests/testthat/test-Start-reshape.R index 3d576d8..fc7acb6 100644 --- a/tests/testthat/test-Start-reshape.R +++ b/tests/testthat/test-Start-reshape.R @@ -2,7 +2,9 @@ context("Start() reshape parameters check") # This one is more comprehensive than test-Start-split-merge.R path_exp <- '/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) path_obs <- '/esarchive/recon/ecmwf/era5/daily_mean/$var$_f1h-r360x181/$var$_$date$.nc' +path_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_obs) var <- 'tas' sdate <- paste0(1993:1995, '1201') @@ -31,7 +33,7 @@ easy_sdate <- c('199312', paste0(rep(1994:1995, each = 3), c('01', '02', '12')), easy_array <- c() for (i in 1:length(easy_sdate)) { - easy_file <- NcOpen(paste0('/esarchive/recon/ecmwf/era5/daily_mean/tas_f1h-r360x181/tas_', + easy_file <- NcOpen(paste0('/esarchive/scratch/aho/startR_unittest_files//esarchive/recon/ecmwf/era5/daily_mean/tas_f1h-r360x181/tas_', easy_sdate[i], '.nc')) if (substr(easy_sdate[i], 5, 6) == '02') { sub_time <- 1:28 @@ -400,7 +402,7 @@ exp1 <- Start(dat = path_exp, ) # easyNCDF easy_sdate_exp <- '19931201' -easy_file_exp <- NcOpen(paste0('/esarchive/exp/ecmwf/system5c3s/daily_mean/tas_f6h/tas_', +easy_file_exp <- NcOpen(paste0('/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system5c3s/daily_mean/tas_f6h/tas_', easy_sdate_exp, '.nc')) easy_exp <- NcToArray(easy_file_exp, vars_to_read = 'tas', dim_indices = list(longitude = c(1), latitude = c(1), ensemble = c(1), @@ -490,7 +492,7 @@ obs2 <- Start(dat = path_obs, retrieve = TRUE) ) # easyNCDF -easy_file_199311 <- NcOpen(paste0('/esarchive/recon/ecmwf/era5/daily_mean/tas_f1h-r360x181/tas_', +easy_file_199311 <- NcOpen(paste0('/esarchive/scratch/aho/startR_unittest_files//esarchive/recon/ecmwf/era5/daily_mean/tas_f1h-r360x181/tas_', '199311', '.nc')) easy_obs_199311 <- NcToArray(easy_file_199311, vars_to_read = 'tas', dim_indices = list(lon = c(1), lat = c(1), time = 1:30)) diff --git a/tests/testthat/test-Start-return_vars_name.R b/tests/testthat/test-Start-return_vars_name.R index 4bf83c6..e97023d 100644 --- a/tests/testthat/test-Start-return_vars_name.R +++ b/tests/testthat/test-Start-return_vars_name.R @@ -3,6 +3,7 @@ context("Start() return_vars name") # be used but will be changed back to the inner dim names. repos_obs <- '/esarchive/obs/ukmo/hadisst_v1.1/monthly_mean/$var$/$var$_$date$.nc' +repos_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_obs) #--------------------------------------------------------------- diff --git a/tests/testthat/test-Start-split-merge.R b/tests/testthat/test-Start-split-merge.R index 7133adc..d95fa62 100644 --- a/tests/testthat/test-Start-split-merge.R +++ b/tests/testthat/test-Start-split-merge.R @@ -2,6 +2,7 @@ context("Start() split + merge dim and value check") var_name <- 'tas' path.exp <- '/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily_mean/$var$_f6h/$sdate$/$var$_$syear$.nc' +path.exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path.exp) suppressWarnings( hcst <- Start(dat = path.exp, @@ -24,6 +25,7 @@ file_date <- sort(unique(gsub('-', '', sapply(as.character(dates), substr, 1, 7)))) path.obs <- '/esarchive/recon/ecmwf/era5/1hourly/$var$/$var$_$file_date$.nc' +path.obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path.obs) var_name <- "sfcWind" test_that("1. split + merge + narm", { @@ -148,6 +150,7 @@ obs <- Start(dat = path.obs, test_that("4. split only", { obs.path <- "/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$file_date$.nc" +obs.path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', obs.path) variable <- "prlr" dates_file <- c("201311","201312","201411","201412") dim(dates_file) <- c(smonth = 2, syear = 2) diff --git a/tests/testthat/test-Start-time_unit.R b/tests/testthat/test-Start-time_unit.R index 3aa1930..a05a42a 100644 --- a/tests/testthat/test-Start-time_unit.R +++ b/tests/testthat/test-Start-time_unit.R @@ -4,7 +4,7 @@ test_that("1. The data has units like time", { suppressWarnings( -FD <- Start(dat = '/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var$_MON.nc', +FD <- Start(dat = '/esarchive/scratch/aho/startR_unittest_files/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var$_MON.nc', var = 'FD', # units: days time = indices(1), longitude = indices(1), @@ -14,7 +14,7 @@ FD <- Start(dat = '/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var retrieve = TRUE) ) suppressWarnings( -FD2 <- Start(dat = '/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var$_MON.nc', +FD2 <- Start(dat = '/esarchive/scratch/aho/startR_unittest_files/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var$_MON.nc', var = 'FD', # units: days time = indices(1), longitude = indices(1), @@ -39,7 +39,7 @@ test_that("2. The metadata variable name is not time", { # VITIGEOOS vari <- "rsds" -anlgs <- paste0("/esarchive/oper/VITIGEOSS","/output/cfsv2/weekly_mean/", +anlgs <- paste0("/esarchive/scratch/aho/startR_unittest_files/esarchive/oper/VITIGEOSS","/output/cfsv2/weekly_mean/", "$var$/$var$-vitigeoss-cat","_1999-2018_", "$file_date$.nc") file_date_array <- array(dim = c(sweek = 2, sday = 3)) diff --git a/tests/testthat/test-Start-transform-all.R b/tests/testthat/test-Start-transform-all.R index 8a9ca65..7fbac55 100644 --- a/tests/testthat/test-Start-transform-all.R +++ b/tests/testthat/test-Start-transform-all.R @@ -9,8 +9,8 @@ context("Transform test target grid: lon and lat = 'all'") # cdo is used to verify the data values # Test 1: original grid 'r360x180' library(easyNCDF) -grid1 <- '/esarchive/exp/CMIP6/dcppA-hindcast/CanESM5/DCPP/CCCma/CanESM5/dcppA-hindcast/r1i1p2f1/Omon/tos/gr/v20190429/tos_Omon_CanESM5_dcppA-hindcast_s1980-r1i1p2f1_gr_198101-199012.nc' # 'r128x64' -path <- '/esarchive/exp/CMIP6/dcppA-hindcast/CESM1-1-CAM5-CMIP5/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/r1i1p1f1/Omon/tos/gr/v20191016/tos_Omon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s2015-r1i1p1f1_gr_201511-202512.nc' # 'r360x180' +grid1 <- '/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/CanESM5/DCPP/CCCma/CanESM5/dcppA-hindcast/r1i1p2f1/Omon/tos/gr/v20190429/tos_Omon_CanESM5_dcppA-hindcast_s1980-r1i1p2f1_gr_198101-199012.nc' # 'r128x64' +path <- '/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/CESM1-1-CAM5-CMIP5/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/r1i1p1f1/Omon/tos/gr/v20191016/tos_Omon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s2015-r1i1p1f1_gr_201511-202512.nc' # 'r360x180' file <- NcOpen(path) arr <- NcToArray(file, @@ -36,6 +36,8 @@ suppressWarnings( #--------------------------------------------------------------- # Test 2: Original grid 'r432x324' path <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Amon/tas/gn/v20200417/tas_Amon_HadGEM3-GC31-MM_dcppA-hindcast_s2009-r1i1p1f2_gn_201501-201512.nc' # 'r432x324' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(lat = 1:324, lon = 1:432, time = 1:2), @@ -53,6 +55,7 @@ suppressWarnings( #--------------------------------------------------------------- path1 <- '/esarchive/exp/CMIP6/dcppA-hindcast/CESM1-1-CAM5-CMIP5/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/r1i1p1f1/Omon/$var$/gr/v20191016/$var$_Omon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s$sdate$-r1i1p1f1_gr_$sdate$11-202512.nc' # 'r360x180' +path1 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path1) test_that("1. 'all'", { suppressWarnings( @@ -108,6 +111,7 @@ test_that("1. 'all'", { #--------------------------------------------------------------- path2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Amon/$var$/gn/v20200417/$var$_Amon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$sdate$11-201512.nc' # 'r432x324' +path2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path2) test_that("2. test path 2", { suppressWarnings( diff --git a/tests/testthat/test-Start-transform-border.R b/tests/testthat/test-Start-transform-border.R index dee8b4e..90b48b6 100644 --- a/tests/testthat/test-Start-transform-border.R +++ b/tests/testthat/test-Start-transform-border.R @@ -26,6 +26,9 @@ context("Transform: check with cdo") # The result of cdo is from CDO version 1.9.8. +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + ############################################## test_that("1. normal regional situation", { @@ -35,7 +38,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -99,7 +102,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -173,7 +176,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -251,7 +254,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -308,7 +311,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -386,7 +389,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -456,7 +459,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -517,7 +520,7 @@ lons.min <- 0 lons.max <- 359 suppressWarnings( - exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', + exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -617,7 +620,7 @@ lons.min <- 0.5 lons.max <- 359.9 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -709,4 +712,4 @@ expect_equal( # [8,] 299.2109 300.3170 300.1524 299.6214 298.8563 # [9,] 299.4723 299.9515 299.4566 299.0601 299.5071 # [10,] 299.5299 299.7573 299.0317 299.1104 300.0644 -############################################## \ No newline at end of file +############################################## diff --git a/tests/testthat/test-Start-transform-lat-Sort-all.R b/tests/testthat/test-Start-transform-lat-Sort-all.R index b41ec0a..3852da9 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-all.R +++ b/tests/testthat/test-Start-transform-lat-Sort-all.R @@ -9,7 +9,7 @@ context("Transform and lat_reorder test: 'all'") #--------------------------------------------------------------- # cdo is used to verify the data values library(easyNCDF) -path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +path <- "/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -27,6 +27,7 @@ arr2 <- s2dv::CDORemap(arr, lons = as.vector(lons), lats = as.vector(lats), #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. 'all'", { diff --git a/tests/testthat/test-Start-transform-lat-Sort-indices.R b/tests/testthat/test-Start-transform-lat-Sort-indices.R index 6c3a797..f729545 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-indices.R +++ b/tests/testthat/test-Start-transform-lat-Sort-indices.R @@ -15,6 +15,7 @@ context("Transform and lat_reorder test: indices") # cdo is used to verify the data values library(easyNCDF) path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -32,6 +33,7 @@ arr2 <- s2dv::CDORemap(arr, lons = as.vector(lons), lats = as.vector(lats), #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. indices(1:640)", { diff --git a/tests/testthat/test-Start-transform-lat-Sort-values.R b/tests/testthat/test-Start-transform-lat-Sort-values.R index 92490ae..0333101 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-values.R +++ b/tests/testthat/test-Start-transform-lat-Sort-values.R @@ -13,6 +13,7 @@ context("Transform and lat_reorder test: values") # cdo is used to verify the data values library(easyNCDF) pathh <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +pathh <- paste0('/esarchive/scratch/aho/startR_unittest_files/', pathh) file <- NcOpen(pathh) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -30,6 +31,7 @@ arr2 <- s2dv::CDORemap(arr, lons = as.vector(lons), lats = as.vector(lats), #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. values(list(-90, 90))", { diff --git a/tests/testthat/test-Start-transform-lon-across_meridian.R b/tests/testthat/test-Start-transform-lon-across_meridian.R index f164046..d3c3dfa 100644 --- a/tests/testthat/test-Start-transform-lon-across_meridian.R +++ b/tests/testthat/test-Start-transform-lon-across_meridian.R @@ -5,6 +5,8 @@ context("Start() transform across_meridian lon order check") test_that("first test", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + var <- 'tas' lon.min <- 170 lon.max <- 185 #359.723 #360 diff --git a/tests/testthat/test-Start-transform-metadata.R b/tests/testthat/test-Start-transform-metadata.R index ede3c95..62d31da 100644 --- a/tests/testthat/test-Start-transform-metadata.R +++ b/tests/testthat/test-Start-transform-metadata.R @@ -5,6 +5,7 @@ test_that("1. Sort() and CircularSort(0, 360)", { # Original lon is [-180, 180] path_exp <- '/esarchive/recon/ecmwf/era5/original_files/reorder/daily_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) lons.min <- -10 lons.max <- 10 lats.min <- 10 @@ -140,6 +141,7 @@ test_that("2. Sort(decreasing = TRUE) and CircularSort(-180, 180)", { # Original lon is [0, 360] path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) lons.min <- 190 lons.max <- 200 lats.min <- 10 diff --git a/tests/testthat/test-Start-transform-three-selectors.R b/tests/testthat/test-Start-transform-three-selectors.R index 657cca3..500168e 100644 --- a/tests/testthat/test-Start-transform-three-selectors.R +++ b/tests/testthat/test-Start-transform-three-selectors.R @@ -14,6 +14,7 @@ context("Transform: three selector forms") # cdo is used to verify the data values library(easyNCDF) path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -31,6 +32,7 @@ arr2 <- s2dv::CDORemap(arr, lons = as.vector(lons), lats = as.vector(lats), #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. indices", { diff --git a/tests/testthat/test-Start-two_dats.R b/tests/testthat/test-Start-two_dats.R index ff83441..e2fef3b 100644 --- a/tests/testthat/test-Start-two_dats.R +++ b/tests/testthat/test-Start-two_dats.R @@ -6,9 +6,11 @@ test_that("1. ex1_8, case 1", { path_tas <- paste0('/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/', 'dcppA-hindcast/$member$/Amon/$var$/gr/v20190713/', '$var$_Amon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc') +path_tas <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_tas) path_tos <- paste0('/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/', 'dcppA-hindcast/$member$/Omon/$var$/gr/v20190713/', '$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc') +path_tos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_tos) suppressWarnings( data <- Start(dataset = list(list(path = path_tas), list(path = path_tos)), var = c('tas', 'tos'), diff --git a/tests/testthat/test-Start-values_list_vector.R b/tests/testthat/test-Start-values_list_vector.R index 76c4f91..a84530f 100644 --- a/tests/testthat/test-Start-values_list_vector.R +++ b/tests/testthat/test-Start-values_list_vector.R @@ -10,6 +10,7 @@ context("List of values and vector of values") # To get lat and lon vectors library(easyNCDF) pathh <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +pathh <- paste0('/esarchive/scratch/aho/startR_unittest_files/', pathh) file <- NcOpen(pathh) lats <- NcToArray(file, dim_indices = list(latitude = 1:35), vars_to_read = 'latitude') @@ -18,11 +19,14 @@ lons <- NcToArray(file, NcClose(file) #------------------------------------------------------------------ +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + test_that("1. transform", { # lat and lon are lists of values suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -47,7 +51,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of values. This one is a weird usage though... suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -85,7 +89,7 @@ test_that("2. no transform", { # lat and lon are lists of indices suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -104,7 +108,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of indices suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -137,7 +141,7 @@ test_that("3. transform, vector reverse", { # lat and lon are lists of values suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -163,7 +167,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var #WRONG!!!!!!!!!! # lat and lon are vectors of values suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -201,7 +205,7 @@ test_that("4. no transform, vector reverse", { # lat and lon are lists of values suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -220,7 +224,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of values suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), -- GitLab From c896a633ce980ca53b33b262ae25906b60dc51a9 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 24 Aug 2023 11:53:03 +0200 Subject: [PATCH 34/46] Add note saying the data has been deleted. --- inst/doc/usecase/ex1_13_implicit_dependency.R | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/doc/usecase/ex1_13_implicit_dependency.R b/inst/doc/usecase/ex1_13_implicit_dependency.R index 6740a21..8f60413 100644 --- a/inst/doc/usecase/ex1_13_implicit_dependency.R +++ b/inst/doc/usecase/ex1_13_implicit_dependency.R @@ -55,6 +55,7 @@ summary(exp) #============================================================================= # Case 2: 'region' depends on 'sdate' +#NOTE: Exp "a35b" has been deleted. This example cannot be run now. path <- paste0('/esarchive/exp/ecearth/a35b/diags/DCPP/EC-Earth-Consortium/', 'EC-Earth3-HR/dcppA-hindcast/r1i1p1f1/Omon/$var$_mixed/gn/v20201107/', '$var$_Omon_EC-Earth3-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_$chunk$.nc') -- GitLab From 5b81e3738a3e603e26cb09a44bd422051671124e Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 30 Aug 2023 16:45:44 +0200 Subject: [PATCH 35/46] Change autosubmit 'THREADS' back to 'cores_per_job'; Modify documentation about ncores and num_procs --- R/ByChunks_autosubmit.R | 22 ++++++++++------------ R/Compute.R | 4 ++-- R/Start.R | 4 ++-- R/Utils.R | 6 +++--- inst/doc/practical_guide.md | 23 +++++++++++++++++------ man/Compute.Rd | 4 ++-- man/Start.Rd | 4 ++-- 7 files changed, 38 insertions(+), 29 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 133f467..5a43146 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -289,19 +289,17 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', stop("The component 'autosubmit_module' of the parameter 'cluster' must be a character string.") } ### cores_per_job -# if (is.null(cluster[['cores_per_job']])) { -# cluster[['cores_per_job']] <- threads_compute -# } -# if (!is.numeric(cluster[['cores_per_job']])) { -# stop("The component 'cores_per_job' of the parameter 'cluster' must be numeric.") -# } -# cluster[['cores_per_job']] <- round(cluster[['cores_per_job']]) + if (is.null(cluster[['cores_per_job']])) { + cluster[['cores_per_job']] <- threads_compute + } + if (!is.numeric(cluster[['cores_per_job']])) { + stop("The component 'cores_per_job' of the parameter 'cluster' must be numeric.") + } + cluster[['cores_per_job']] <- round(cluster[['cores_per_job']]) +# NOTE: Why do we have this condition? # if (cluster[['cores_per_job']] > threads_compute) { # .message("WARNING: 'threads_compute' should be >= cluster[['cores_per_job']].") # } - if (!is.null(cluster[['cores_per_job']])) { - .warning("The component 'cores_per_job' in cluster list is not used. Please specify the cores by parameter 'threads_compute'.") - } ### job_wallclock tmp <- strsplit( '01:00:00', ':')[[1]] if (!length(tmp) %in% c(2, 3) | any(!grepl("^[0-9]+$", tmp)) | any(nchar(tmp) != 2)) { @@ -533,7 +531,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', write_autosubmit_bash(chunks, cluster, autosubmit_suite_dir = autosubmit_suite_dir) # Modify conf files from template and rewrite to /esarchive/autosubmit/expid/conf/ - write_autosubmit_confs(chunks, threads_compute, cluster, autosubmit_suite_dir) + write_autosubmit_confs(chunks, cluster, autosubmit_suite_dir) # Iterate through chunks chunk_array <- array(1:prod(unlist(chunks)), dim = (unlist(chunks))) @@ -550,7 +548,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', } -# timings[['cores_per_job']] <- cluster[['cores_per_job']] + timings[['cores_per_job']] <- cluster[['cores_per_job']] timings[['concurrent_chunks']] <- cluster[['max_jobs']] t_end_bychunks_setup <- Sys.time() diff --git a/R/Compute.R b/R/Compute.R index 981f28b..7ab6549 100644 --- a/R/Compute.R +++ b/R/Compute.R @@ -19,9 +19,9 @@ #' those not required as the target dimension in function Step(). The default #' value is 'auto', which lists all the non-target dimensions and each one has #' one chunk. -#'@param threads_load An integer indicating the number of execution threads to +#'@param threads_load An integer indicating the number of execution processes to #' use for the data retrieval stage. The default value is 1. -#'@param threads_compute An integer indicating the number of execution threads +#'@param threads_compute An integer indicating the number of execution processes #' to use for the computation. The default value is 1. #'@param cluster A list of components that define the configuration of the #' machine to be run on. The comoponents vary from the different machines. diff --git a/R/Start.R b/R/Start.R index 92eb16d..b72a02b 100644 --- a/R/Start.R +++ b/R/Start.R @@ -704,8 +704,8 @@ #'@param num_procs An integer of number of processes to be created for the #' parallel execution of the retrieval/transformation/arrangement of the #' multiple involved files in a call to Start(). If set to NULL, -#' takes the number of available cores (as detected by detectCores() in -#' the package 'future'). The default value is 1 (no parallel execution). +#' takes the number of available cores (as detected by future::detectCores). +#' The default value is 1 (no parallel execution). #'@param ObjectBigmemory a character string to be included as part of the #' bigmemory object name. This parameter is thought to be used internally by the #' chunking capabilities of startR. diff --git a/R/Utils.R b/R/Utils.R index f496333..a20653e 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -928,7 +928,7 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { } # This function generates the .yml files under autosubmit conf/ -write_autosubmit_confs <- function(chunks, threads_compute, cluster, autosubmit_suite_dir) { +write_autosubmit_confs <- function(chunks, cluster, autosubmit_suite_dir) { #TODO: Remove this library(configr) # "chunks" is from Compute() input, e.g., chunks <- list(lat = 2, lon = 3) @@ -975,7 +975,7 @@ write_autosubmit_confs <- function(chunks, threads_compute, cluster, autosubmit_ # wallclock from '01:00:00' to '01:00' jobs[[1]]$WALLCLOCK <- substr(cluster$job_wallclock, 1, 5) jobs[[1]]$PLATFORM <- cluster$queue_host - jobs[[1]]$THREADS <- as.integer(threads_compute) + jobs[[1]]$THREADS <- as.integer(cluster$cores_per_job) jobs[[1]][paste0(names(chunks), "_N")] <- as.integer(unlist(chunks)) jobs[[1]][names(chunks)] <- "" @@ -995,7 +995,7 @@ write_autosubmit_confs <- function(chunks, threads_compute, cluster, autosubmit_ } else if (conf_type == "platforms") { if (tolower(cluster$queue_host) != "local") { conf$Platforms[[cluster$queue_host]]$USER <- cluster$hpc_user - conf$Platforms[[cluster$queue_host]]$PROCESSORS_PER_NODE <- as.integer(threads_compute) + conf$Platforms[[cluster$queue_host]]$PROCESSORS_PER_NODE <- as.integer(cores_per_job) if (!is.null(cluster$extra_queue_params)) { tmp <- unlist(cluster$extra_queue_params) for (ii in 1:length(tmp)) { diff --git a/inst/doc/practical_guide.md b/inst/doc/practical_guide.md index dead996..4f79ae0 100644 --- a/inst/doc/practical_guide.md +++ b/inst/doc/practical_guide.md @@ -371,8 +371,18 @@ It is not possible for now to define workflows with more than one step, but this ### 4-3. Compute() Once the data sources are declared and the workflow is defined, you can proceed to specify the execution parameters (including which platform to run on) and trigger the execution with the `Compute()` function. +The execution can run locally (only on the machine where the R session is running) or on different HPCs (Nord3-v2, CTE-Power9 and other HPCs). -Next, a few examples are shown with `Compute()` calls to trigger the processing of a dataset locally (only on the machine where the R session is running) and different HPCs (Nord3-v2, CTE-Power9 and other HPCs). However, let's first define a `Start()` call that involves a smaller subset of data in order not to make the examples too heavy. +The common Compute() parameters of local and remote execution are: +- `wf`: The workflow defined by the previous steps. +- `chunks`: The dimensions to be chunked and how many chunks you want for each dimension. +startR will automatically chunk the data for you. See more details in session [#5-1](#5-1-how-to-choose-the-number-of-chunks-jobs-and-cores). +- `threads_load`: The number of processes to be created for data retrieval stage. It is used as Start() parameter "num_procs" if it is not specified when Start() is defined. +- `threads_compute`: The number of processes to be created for data computing. It is used as multiApply::Apply parameter "ncores". + +Using more than 2 threads for the retrieval will usually be perjudicial, since two will already be able to make full use of the bandwidth between the workstation and the data repository. The optimal number of threads for the computation will depend on the number of processors in your machine, the number of cores they have, and the number of threads supported by each of them. + +In the following example, we first define a `Start()` call that involves a smaller subset of data in order not to make the examples too heavy. ```r library(startR) @@ -526,7 +536,7 @@ dim(res$output1) 2 1 1 1 640 1296 ``` -In addition to performing the computation in chunks, you can adjust the number of execution threads to use for the data retrieval stage (with `threads_load`) and for the computation (with `threads_compute`). Using more than 2 threads for the retrieval will usually be perjudicial, since two will already be able to make full use of the bandwidth between the workstation and the data repository. The optimal number of threads for the computation will depend on the number of processors in your machine, the number of cores they have, and the number of threads supported by each of them. +Run Compute() with desired chunks and resource setting. ```r res <- Compute(wf, @@ -729,17 +739,18 @@ You can see one example of cluster configuration below. The cluster components and options are explained next: -- `queue_host`: Must match the platform name in Autosubmit configuration file `platforms.yml`, or 'local'. The provided platforms are: 'nord3'. +- `queue_host`: Must match the platform name in Autosubmit configuration file _platforms.yml_, or 'local'. The current provided platforms are: 'nord3'. - `expid`: The Autosubmit experiment to run the computation. You can create the experiment beforehand or let startR create one for you by not specifying this componenet. To have the good practice, note down the expid if it is automatically created by startR and re-use/delete it afterwards. - `hpc_user`: Your user ID on the HPC (i.e., "bsc32xxx"). It is required if "queue_host" is not 'local'. - `data_dir`: The path to the data repository if the data is not shared. - `lib_dir`: directory on the HPC where the startR R package and other required R packages are installed, accessible from all HPC nodes. These installed packages must be compatible with the R module specified in `r_module`. This parameter is optional; only required when the libraries are not installed in the R module. - `init_commands`: The initial commands in bash script before R script runs. For example, the modules required by computation can be loaded here. -- `r_module`: name of the UNIX environment module to be used for R. If not specified, `module load R` will be used. -- `CDO_module`: name of the UNIX environment module to be used for CDO. If not specified, it is NULL and no CDO module will be loaded. Make sure to assign it if `tranform` is required in Start(). +- `r_module`: Name of the UNIX environment module to be used for R. If not specified, `module load R` will be used. +- `CDO_module`: Name of the UNIX environment module to be used for CDO. If not specified, it is NULL and no CDO module will be loaded. Make sure to assign it if `tranform` is required in Start(). - `autosubmit_module`: The name of the Autosubmit module. If not specified, `module load autosubmit` will be used. -- `cores_per_job`: number of computing cores to be requested when submitting the job for each chunk to the HPC queue. Each node may be capable of supporting more than one computing thread. +- `cores_per_job`: Number of computing cores to be requested when submitting the job for each chunk to the HPC queue. Each node may be capable of supporting more than one computing thread. +It is corresponded to the parameter "THREADS" in _jobs.yml_ and "PROCESSORS_PER_NODE" in _platforms.yml_. - `job_wallclock`: amount of time to reserve the resources when submitting the job for each chunk. Must follow the specific format required by the specified `queue_type`. - `max_jobs`: maximum number of jobs (chunks) to be queued simultaneously onto the HPC queue. Submitting too many jobs could overload the bandwidth between the HPC nodes and the storage system, or could overload the queue system. - `extra_queue_params`: list of character strings for additional queue headers for the jobs diff --git a/man/Compute.Rd b/man/Compute.Rd index f6ad867..f475860 100644 --- a/man/Compute.Rd +++ b/man/Compute.Rd @@ -33,10 +33,10 @@ one chunk.} \item{workflow_manager}{Can be NULL, 'ecFlow' or 'Autosubmit'. The default is 'ecFlow'.} -\item{threads_load}{An integer indicating the number of execution threads to +\item{threads_load}{An integer indicating the number of execution processes to use for the data retrieval stage. The default value is 1.} -\item{threads_compute}{An integer indicating the number of execution threads +\item{threads_compute}{An integer indicating the number of execution processes to use for the computation. The default value is 1.} \item{cluster}{A list of components that define the configuration of the diff --git a/man/Start.Rd b/man/Start.Rd index 3bdae42..7cdc9f8 100644 --- a/man/Start.Rd +++ b/man/Start.Rd @@ -684,8 +684,8 @@ value is FALSE.} \item{num_procs}{An integer of number of processes to be created for the parallel execution of the retrieval/transformation/arrangement of the multiple involved files in a call to Start(). If set to NULL, -takes the number of available cores (as detected by detectCores() in -the package 'future'). The default value is 1 (no parallel execution).} +takes the number of available cores (as detected by future::detectCores). +The default value is 1 (no parallel execution).} \item{ObjectBigmemory}{a character string to be included as part of the bigmemory object name. This parameter is thought to be used internally by the -- GitLab From 5bae396cbe4e045d15fe86a04af5b23f1de7317c Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 30 Aug 2023 17:10:06 +0200 Subject: [PATCH 36/46] Correct cores_per_job to cluster --- R/Utils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Utils.R b/R/Utils.R index a20653e..3fce2da 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -995,7 +995,7 @@ write_autosubmit_confs <- function(chunks, cluster, autosubmit_suite_dir) { } else if (conf_type == "platforms") { if (tolower(cluster$queue_host) != "local") { conf$Platforms[[cluster$queue_host]]$USER <- cluster$hpc_user - conf$Platforms[[cluster$queue_host]]$PROCESSORS_PER_NODE <- as.integer(cores_per_job) + conf$Platforms[[cluster$queue_host]]$PROCESSORS_PER_NODE <- as.integer(cluster$cores_per_job) if (!is.null(cluster$extra_queue_params)) { tmp <- unlist(cluster$extra_queue_params) for (ii in 1:length(tmp)) { -- GitLab From 263cddd3ea278849ae966881fd9593c76557b131 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 31 Aug 2023 11:26:21 +0200 Subject: [PATCH 37/46] Refine document about threads and cores_per_job --- R/Compute.R | 11 ++++++----- inst/doc/practical_guide.md | 8 ++++---- man/Compute.Rd | 11 ++++++----- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/R/Compute.R b/R/Compute.R index 7ab6549..321a0a1 100644 --- a/R/Compute.R +++ b/R/Compute.R @@ -19,16 +19,17 @@ #' those not required as the target dimension in function Step(). The default #' value is 'auto', which lists all the non-target dimensions and each one has #' one chunk. -#'@param threads_load An integer indicating the number of execution processes to -#' use for the data retrieval stage. The default value is 1. -#'@param threads_compute An integer indicating the number of execution processes -#' to use for the computation. The default value is 1. +#'@param threads_load An integer indicating the number of parallel execution +#' processes to use for the data retrieval stage. The default value is 1. +#'@param threads_compute An integer indicating the number of parallel execution +#' processes to use for the computation. The default value is 1. #'@param cluster A list of components that define the configuration of the #' machine to be run on. The comoponents vary from the different machines. #' Check \href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{Practical guide on GitLab} for more #' details and examples. Only needed when the computation is not run locally. #' The default value is NULL. -#'@param workflow_manager Can be NULL, 'ecFlow' or 'Autosubmit'. The default is 'ecFlow'. +#'@param workflow_manager Can be NULL, 'ecFlow' or 'Autosubmit'. The default is +#' 'ecFlow'. #'@param ecflow_suite_dir A character string indicating the path to a folder in #' the local workstation where to store temporary files generated for the #' automatic management of the workflow. Only needed when the execution is run diff --git a/inst/doc/practical_guide.md b/inst/doc/practical_guide.md index 4f79ae0..6bc4bef 100644 --- a/inst/doc/practical_guide.md +++ b/inst/doc/practical_guide.md @@ -377,8 +377,8 @@ The common Compute() parameters of local and remote execution are: - `wf`: The workflow defined by the previous steps. - `chunks`: The dimensions to be chunked and how many chunks you want for each dimension. startR will automatically chunk the data for you. See more details in session [#5-1](#5-1-how-to-choose-the-number-of-chunks-jobs-and-cores). -- `threads_load`: The number of processes to be created for data retrieval stage. It is used as Start() parameter "num_procs" if it is not specified when Start() is defined. -- `threads_compute`: The number of processes to be created for data computing. It is used as multiApply::Apply parameter "ncores". +- `threads_load`: The number of parallel execution processes to be created for data retrieval stage. It is used as Start() parameter "num_procs" if it is not specified when Start() is defined. +- `threads_compute`: The number of parallel execution processes to be created for data computing. It is used as multiApply::Apply parameter "ncores". Using more than 2 threads for the retrieval will usually be perjudicial, since two will already be able to make full use of the bandwidth between the workstation and the data repository. The optimal number of threads for the computation will depend on the number of processors in your machine, the number of cores they have, and the number of threads supported by each of them. @@ -621,7 +621,7 @@ The cluster components and options are explained next: - `temp_dir`: directory on the HPC where to store temporary files. Must be accessible from the HPC login node and all HPC nodes. - `lib_dir`: directory on the HPC where the startR R package and other required R packages are installed, accessible from all HPC nodes. These installed packages must be compatible with the R module specified in `r_module`. This parameter is optional; only required when the libraries are not installed in the R module. - `r_module`: name of the UNIX environment module to be used for R. If not specified, 'module load R' will be used. -- `cores_per_job`: number of computing cores to be requested when submitting the job for each chunk to the HPC queue. Each node may be capable of supporting more than one computing thread. +- `cores_per_job`: number of computing cores to be requested when submitting the job for each chunk to the HPC queue. Each node (should be core?) may be capable of supporting more than one computing thread. - `job_wallclock`: amount of time to reserve the resources when submitting the job for each chunk. Must follow the specific format required by the specified `queue_type`. - `max_jobs`: maximum number of jobs (chunks) to be queued simultaneously onto the HPC queue. Submitting too many jobs could overload the bandwidth between the HPC nodes and the storage system, or could overload the queue system. - `extra_queue_params`: list of character strings with additional queue headers for the jobs to be submitted to the HPC. Mainly used to specify the amount of memory to book for each job (e.g. '#SBATCH --mem-per-cpu=30000'; __NOTE: this line does not work on Nord3v2__), to request special queuing (e.g. '#SBATCH --qos=bsc_es'), or to request use of specific software (e.g. '#SBATCH --reservation=test-rhel-7.5'). @@ -749,7 +749,7 @@ To have the good practice, note down the expid if it is automatically created by - `r_module`: Name of the UNIX environment module to be used for R. If not specified, `module load R` will be used. - `CDO_module`: Name of the UNIX environment module to be used for CDO. If not specified, it is NULL and no CDO module will be loaded. Make sure to assign it if `tranform` is required in Start(). - `autosubmit_module`: The name of the Autosubmit module. If not specified, `module load autosubmit` will be used. -- `cores_per_job`: Number of computing cores to be requested when submitting the job for each chunk to the HPC queue. Each node may be capable of supporting more than one computing thread. +- `cores_per_job`: Number of computing cores to be requested when submitting the job for each chunk to the HPC queue. It is corresponded to the parameter "THREADS" in _jobs.yml_ and "PROCESSORS_PER_NODE" in _platforms.yml_. - `job_wallclock`: amount of time to reserve the resources when submitting the job for each chunk. Must follow the specific format required by the specified `queue_type`. - `max_jobs`: maximum number of jobs (chunks) to be queued simultaneously onto the HPC queue. Submitting too many jobs could overload the bandwidth between the HPC nodes and the storage system, or could overload the queue system. diff --git a/man/Compute.Rd b/man/Compute.Rd index f475860..270846a 100644 --- a/man/Compute.Rd +++ b/man/Compute.Rd @@ -31,13 +31,14 @@ those not required as the target dimension in function Step(). The default value is 'auto', which lists all the non-target dimensions and each one has one chunk.} -\item{workflow_manager}{Can be NULL, 'ecFlow' or 'Autosubmit'. The default is 'ecFlow'.} +\item{workflow_manager}{Can be NULL, 'ecFlow' or 'Autosubmit'. The default is +'ecFlow'.} -\item{threads_load}{An integer indicating the number of execution processes to -use for the data retrieval stage. The default value is 1.} +\item{threads_load}{An integer indicating the number of parallel execution +processes to use for the data retrieval stage. The default value is 1.} -\item{threads_compute}{An integer indicating the number of execution processes -to use for the computation. The default value is 1.} +\item{threads_compute}{An integer indicating the number of parallel execution +processes to use for the computation. The default value is 1.} \item{cluster}{A list of components that define the configuration of the machine to be run on. The comoponents vary from the different machines. -- GitLab From a30eb2e10e2b964936917834d3fc409cda3c33c3 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 31 Aug 2023 13:30:04 +0200 Subject: [PATCH 38/46] Remove TODOs --- R/ByChunks_autosubmit.R | 6 ++---- R/ByChunks_ecflow.R | 32 +++++++++++--------------------- R/Utils.R | 12 +++--------- 3 files changed, 16 insertions(+), 34 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 5a43146..08414f9 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -467,10 +467,8 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', } # Copy load_process_save_chunk_autosubmit.R into local folder -#TODO: Change the following line to read from package - chunk_script <- file("/esarchive/scratch/aho/tmp/chunking/Autosubmit/load_process_save_chunk_autosubmit.R") -# chunk_script <- file(system.file('chunking/Autosubmit/load_process_save_chunk_autosubmit.R', -# package = 'startR')) + chunk_script <- file(system.file('chunking/Autosubmit/load_process_save_chunk_autosubmit.R', + package = 'startR')) chunk_script_lines <- readLines(chunk_script) close(chunk_script) chunk_script_lines <- gsub('^lib_dir <- *', paste0('lib_dir <- ', diff --git a/R/ByChunks_ecflow.R b/R/ByChunks_ecflow.R index cb5c95b..6292448 100644 --- a/R/ByChunks_ecflow.R +++ b/R/ByChunks_ecflow.R @@ -438,10 +438,8 @@ ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', "access is properly set in both directions.")) # Copy load_process_save_chunk_ecflow.R into shared folder - #TODO: Change to package file - chunk_script <- file("/esarchive/scratch/aho/tmp/chunking/ecFlow/load_process_save_chunk_ecflow.R") -# chunk_script <- file(system.file('chunking/ecFlow/load_process_save_chunk_ecflow.R', -# package = 'startR')) + chunk_script <- file(system.file('chunking/ecFlow/load_process_save_chunk_ecflow.R', + package = 'startR')) chunk_script_lines <- readLines(chunk_script) close(chunk_script) chunk_script_lines <- gsub('^lib_dir <- *', paste0('lib_dir <- ', @@ -494,10 +492,8 @@ ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', writeLines(chunk_script_lines, paste0(ecflow_suite_dir_suite, '/load_process_save_chunk_ecflow.R')) # Copy Chunk.ecf into shared folder - #TODO: Change to package file - chunk_ecf_script <- file("/esarchive/scratch/aho/tmp/chunking/ecFlow/Chunk.ecf") -# chunk_ecf_script <- file(system.file('chunking/ecFlow/Chunk.ecf', -# package = 'startR')) + chunk_ecf_script <- file(system.file('chunking/ecFlow/Chunk.ecf', + package = 'startR')) chunk_ecf_script_lines <- readLines(chunk_ecf_script) close(chunk_ecf_script) if (cluster[['queue_type']] == 'host') { @@ -562,9 +558,7 @@ ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', # Copy queue header into shared folder #file.copy(system.file(paste0('chunking/', cluster[['queue_type']], '.h'), package = 'startR'), # ecflow_suite_dir_suite) - #TODO: Change to package file - chunk_queue_header <- file(paste0("/esarchive/scratch/aho/tmp/chunking/ecFlow/", cluster[['queue_type']], '.h')) -# chunk_queue_header <- file(system.file(paste0('chunking/ecFlow/', cluster[['queue_type']], '.h'), package = 'startR')) + chunk_queue_header <- file(system.file(paste0('chunking/ecFlow/', cluster[['queue_type']], '.h'), package = 'startR')) chunk_queue_header_lines <- readLines(chunk_queue_header) close(chunk_queue_header) chunk_queue_header_lines <- gsub('^include_extra_queue_params', @@ -573,15 +567,10 @@ ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', writeLines(chunk_queue_header_lines, paste0(ecflow_suite_dir_suite, '/', cluster[['queue_type']], '.h')) # Copy headers - #TODO: Change to package file - file.copy(file("/esarchive/scratch/aho/tmp/chunking/ecFlow/head.h"), ecflow_suite_dir_suite) - file.copy(file("/esarchive/scratch/aho/tmp/chunking/ecFlow/tail.h"), ecflow_suite_dir_suite) -# file.copy(system.file('chunking/ecFlow/head.h', package = 'startR'), -# ecflow_suite_dir_suite) -# file.copy(system.file('chunking/ecFlow/tail.h', package = 'startR'), -# ecflow_suite_dir_suite) - #file.copy(system.file('chunking/clean_devshm.sh', package = 'startR'), - # ecflow_suite_dir_suite) + file.copy(system.file('chunking/ecFlow/head.h', package = 'startR'), + ecflow_suite_dir_suite) + file.copy(system.file('chunking/ecFlow/tail.h', package = 'startR'), + ecflow_suite_dir_suite) } add_line <- function(suite, line, tabs) { @@ -900,7 +889,8 @@ ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', ecflow_server[['port']])) timings[['total']] <- t_begin_total - startr_exec <- list(cluster = cluster, ecflow_server = ecflow_server, + startr_exec <- list(cluster = cluster, ecflow_server = ecflow_server, + workflow_manager = 'ecFlow', suite_id = suite_id, chunks = chunks, num_outputs = length(arrays_of_results), ecflow_suite_dir = ecflow_suite_dir, diff --git a/R/Utils.R b/R/Utils.R index 3fce2da..940e2d3 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -885,10 +885,8 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { chunk_args[2, ] <- paste0('%JOBS.CHUNK_', n_chunk, '.', chunk_names, '_N%') chunk_args <- paste0('(', paste(c(chunk_args), collapse = ' '), ')') - #TODO: Change to the following line getting .sh template from package - # bash_script <- file(system.file('chunking/Autosubmit/startR_autosubmit.sh', - # package = 'startR')) - bash_script_template <- file("/esarchive/scratch/aho/tmp/chunking/Autosubmit/startR_autosubmit.sh") + bash_script_template <- file(system.file('chunking/Autosubmit/startR_autosubmit.sh', + package = 'startR')) bash_script_lines <- readLines(bash_script_template) close(bash_script_template) @@ -929,16 +927,12 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { # This function generates the .yml files under autosubmit conf/ write_autosubmit_confs <- function(chunks, cluster, autosubmit_suite_dir) { - #TODO: Remove this - library(configr) # "chunks" is from Compute() input, e.g., chunks <- list(lat = 2, lon = 3) # "cluster" is the argument "cluster" in Compute(), to set machine configuration # "autosubmit_suite_dir" should be the local folder that has R script, like ecflow_suite_dir in Compute() # Get config template files from package - #TODO: Change to package path -# template_dir <- system.file('chunking/Autosubmit/', package = 'startR') - template_dir <- "/esarchive/scratch/aho/tmp/chunking/Autosubmit/" + template_dir <- system.file('chunking/Autosubmit/', package = 'startR') config_files <- list.files(template_dir, pattern = "*\\.yml$") for (i_file in config_files) { -- GitLab From b3baa4a1431580ccb99ea8a6c4fc80c96cbc2897 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 31 Aug 2023 13:30:44 +0200 Subject: [PATCH 39/46] Add explanation about ecFlow port; ignore .gitlab/ when building R package --- .Rbuildignore | 2 +- inst/doc/practical_guide.md | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 2ef8ba9..d988cd4 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -11,7 +11,7 @@ ## unit tests should be ignored when building the package for CRAN ^tests$ ^inst/PlotProfiling\.R$ - +^.gitlab$ # Suggested by http://r-pkgs.had.co.nz/package.html ^.*\.Rproj$ # Automatically added by RStudio, ^\.Rproj\.user$ # used for temporary files. diff --git a/inst/doc/practical_guide.md b/inst/doc/practical_guide.md index 6bc4bef..7038ad7 100644 --- a/inst/doc/practical_guide.md +++ b/inst/doc/practical_guide.md @@ -22,6 +22,7 @@ If you would like to start using startR rightaway on the BSC infrastructure, you 2. [**How to clean a failed execution**](#5-2-how-to-clean-a-failed-execution) 3. [**Visualizing the profiling of the execution**](#5-3-visualizing-the-profiling-of-the-execution) 4. [**Pending features**](#5-4-pending-features) + 5. [**ecFlow server and port**](#5-5-ecflow-server-and-port) 6. [**Other examples**](#6-other-examples) 7. [**Compute() cluster templates**](#7-compute-cluster-templates) @@ -794,9 +795,14 @@ module load ecFlow ecflow_ui & ``` -After doing that, a window will pop up. You will be able to monitor the status of your EC-Flow suites there. However, if it is the first time you are using the EC-Flow GUI with startR, you will need to register the EC-Flow server that has been started automatically by `Compute()`. You can open the top menu "Manage servers" > "New server" > set host to 'localhost' > set port to '5678' > save. +After doing that, a window will pop up. You will be able to monitor the status of your EC-Flow suites there. +However, if it is the first time you are using the EC-Flow GUI with startR, +you will need to register the EC-Flow server that has been started automatically by `Compute()`. +You can open the top menu "Manage servers" > "Add server" > Put a recognizable 'Name' for host > set 'Host' to your workstation (i.e., bscearthxxx) or 'localhost' > set 'Port' to '5678' > save. +See more information about ecFlow server in [#5-5](#5-5-ecflow-server-and-port). + +Note that the host and port can be adjusted with the parameter `ecflow_server` in `Compute()`, which must be provided in the form `c(host = 'hostname', port = port_number)`. _(NOTE: 'host' is not supported for now. You can use `ecflow_server = c(port = xxxx)` to change port number.)_ -Note that the host and port can be adjusted with the parameter `ecflow_server` in `Compute()`, which must be provided in the form `c(host = 'hostname', port = port_number)`. After registering the EC-Flow server, an expandable entry will appear, where you can see listed the jobs to be executed, one for each chunk, with their status represented by a colour. Gray means pending, blue means queuing, green means in progress, and yellow means completed. @@ -916,6 +922,15 @@ You can click on the image to expand it. - Adding feature in `Start()` to read sparse grid points. - Allow for chunking along "essential" (a.k.a. "target") dimensions. +### 5-5. ecFlow server and port + +You cannot start two ecFlow servers on the same machine with the same port number. +That is, the port number on one workstation cannot be shared. For example, if port '5678' on workstation 'bscearth123' is taken by user A, +user B cannot ssh to 'bscearth123' and use the port number '5678'. But user B can use a new port number and specify 'ecflow_server' in Compute(). +Or, if user B uses another workstation that has port number '5678' available, s/he can use it without problem. + +You can check the host-port you have in `~/.ecflowrc/servers`. To stop using a server, you can go to ecFlow UI, right click the server > halt > checkpoint > terminate. + ## 6. Other examples You can find more use cases in [usecase.md](inst/doc/usecase.md). -- GitLab From c9fdce1451e470cfe130944f4a59bfe8407226b3 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 31 Aug 2023 14:16:57 +0200 Subject: [PATCH 40/46] version bump to 2.3.0 --- .Rbuildignore | 2 -- DESCRIPTION | 2 +- NEWS.md | 6 ++++++ 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index d988cd4..aa7059a 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -15,7 +15,5 @@ # Suggested by http://r-pkgs.had.co.nz/package.html ^.*\.Rproj$ # Automatically added by RStudio, ^\.Rproj\.user$ # used for temporary files. -^README\.Rmd$ # An Rmarkdown file used to generate README.md ^cran-comments\.md$ # Comments for CRAN submission #^NEWS\.md$ # A news file written in Markdown -^\.gitlab-ci\.yml$ diff --git a/DESCRIPTION b/DESCRIPTION index 173baca..60fa08c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: startR Title: Automatically Retrieve Multidimensional Distributed Data Sets -Version: 2.2.3 +Version: 2.3.0 Authors@R: c( person("Nicolau", "Manubens", , "nicolau.manubens@bsc.es", role = c("aut")), person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("aut", "cre")), diff --git a/NEWS.md b/NEWS.md index 888fb72..1b3062e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# startR v2.3.0 (Release date: 2023-08-31) +- Load variable metadata when retreive = F +- Change Compute() "threads_load" to 1 to be consistent with documentation +- Add Autosubmit as workflow manager +- SelectorChecker() to recognize class integer Task actions + # startR v2.2.3 (Release date: 2023-06-06) - Bugfix in Start(): when using parameter `longitude = 'all'` with transform, there was a missing point for some cases. -- GitLab From a95a8745d38051f32ad7bb9ee2078fd7a10d72d8 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 31 Aug 2023 14:53:15 +0200 Subject: [PATCH 41/46] Add param document; fix typo --- R/Compute.R | 8 +++++++- R/Utils.R | 2 +- man/Compute.Rd | 10 +++++++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/R/Compute.R b/R/Compute.R index 321a0a1..d288a4d 100644 --- a/R/Compute.R +++ b/R/Compute.R @@ -37,7 +37,13 @@ #'@param ecflow_server A named vector indicating the host and port of the #' EC-Flow server. The vector form should be #' \code{c(host = 'hostname', port = port_number)}. Only needed when the -#' execution is run#' remotely. The default value is NULL. +#' execution is run remotely. The default value is NULL. +#'@param autosubmit_suite_dir A character string indicating the path to a folder +#' that can be found locally and on Autosubmit machine where to store temporary +#' files generated for Autosubmit to establish workflow. +#'@param autosubmit_server A character string of autosubmit machine name ( +#' 'bscesautosubmit01'or 'bscesautosubmit02'.) If it is NULL, a random one will +#' be assigned. The default value is NULL. #'@param silent A logical value deciding whether to print the computation #' progress (FALSE) on the R session or not (TRUE). It only works when the #' execution runs locally or the parameter 'wait' is TRUE. The default value diff --git a/R/Utils.R b/R/Utils.R index 940e2d3..e440dde 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -919,7 +919,7 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { dest_dir <- file.path(autosubmit_suite_dir, paste0("/STARTR_CHUNKING_", cluster$expid)) if (!file.exists(dest_dir)) { - dir.create(dest_fir, recursive = TRUE) + dir.create(dest_dir, recursive = TRUE) } writeLines(bash_script_lines, paste0(dest_dir, '/startR_autosubmit_', n_chunk, '.sh')) } diff --git a/man/Compute.Rd b/man/Compute.Rd index 270846a..610af3d 100644 --- a/man/Compute.Rd +++ b/man/Compute.Rd @@ -54,7 +54,15 @@ remotely. The default value is NULL.} \item{ecflow_server}{A named vector indicating the host and port of the EC-Flow server. The vector form should be \code{c(host = 'hostname', port = port_number)}. Only needed when the -execution is run#' remotely. The default value is NULL.} +execution is run remotely. The default value is NULL.} + +\item{autosubmit_suite_dir}{A character string indicating the path to a folder +that can be found locally and on Autosubmit machine where to store temporary +files generated for Autosubmit to establish workflow.} + +\item{autosubmit_server}{A character string of autosubmit machine name ( +'bscesautosubmit01'or 'bscesautosubmit02'.) If it is NULL, a random one will +be assigned. The default value is NULL.} \item{silent}{A logical value deciding whether to print the computation progress (FALSE) on the R session or not (TRUE). It only works when the -- GitLab From 9b521bb0684f66f3e834668b36a56b879bb0b96d Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 31 Aug 2023 15:09:19 +0200 Subject: [PATCH 42/46] remove context() in unit tests --- tests/testthat/test-AddStep-DimNames.R | 1 - tests/testthat/test-Compute-CDORemap.R | 2 -- tests/testthat/test-Compute-NumChunks.R | 1 - tests/testthat/test-Compute-chunk_depend_dim.R | 2 -- tests/testthat/test-Compute-chunk_split_dim.R | 2 -- tests/testthat/test-Compute-extra_params.R | 1 - tests/testthat/test-Compute-inconsistent_target_dim.R | 1 - tests/testthat/test-Compute-irregular_regrid.R | 2 -- tests/testthat/test-Compute-timedim.R | 2 -- tests/testthat/test-Compute-transform_all.R | 1 - tests/testthat/test-Compute-transform_indices.R | 1 - tests/testthat/test-Compute-transform_values.R | 1 - tests/testthat/test-Compute-two_data.R | 2 -- tests/testthat/test-Compute-use_attribute.R | 2 -- tests/testthat/test-Start-DCPP-across-depends.R | 1 - tests/testthat/test-Start-calendar.R | 1 - tests/testthat/test-Start-depends_values.R | 3 --- tests/testthat/test-Start-first_file_missing.R | 2 -- tests/testthat/test-Start-global-lon-across_meridian.R | 2 -- tests/testthat/test-Start-implicit_dependency_by_selector.R | 1 - tests/testthat/test-Start-implicit_inner_dim.R | 1 - tests/testthat/test-Start-indices_list_vector.R | 2 -- tests/testthat/test-Start-largest_dims_length.R | 1 - tests/testthat/test-Start-line_order-consistency.R | 2 -- tests/testthat/test-Start-metadata_dims.R | 2 -- tests/testthat/test-Start-metadata_filedim_dependency.R | 1 - tests/testthat/test-Start-metadata_reshaping.R | 1 - tests/testthat/test-Start-multiple-sdates.R | 2 -- tests/testthat/test-Start-path_glob_permissive.R | 2 -- tests/testthat/test-Start-reorder-lat.R | 2 -- tests/testthat/test-Start-reorder-latCoarse.R | 2 -- tests/testthat/test-Start-reorder-lon-180to180.R | 1 - tests/testthat/test-Start-reorder-lon-transform_-180to180.R | 1 - tests/testthat/test-Start-reorder-lon-transform_0to360.R | 1 - tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R | 1 - tests/testthat/test-Start-reorder-lon0to360.R | 1 - tests/testthat/test-Start-reorder-lon0to360Coarse.R | 1 - tests/testthat/test-Start-reorder-metadata.R | 1 - tests/testthat/test-Start-reorder-retrieve.R | 3 --- tests/testthat/test-Start-reorder_all.R | 3 --- tests/testthat/test-Start-reorder_indices.R | 2 -- tests/testthat/test-Start-reshape.R | 1 - tests/testthat/test-Start-return_vars_name.R | 1 - tests/testthat/test-Start-split-merge.R | 1 - tests/testthat/test-Start-time_unit.R | 1 - tests/testthat/test-Start-transform-all.R | 2 -- tests/testthat/test-Start-transform-border.R | 1 - tests/testthat/test-Start-transform-lat-Sort-all.R | 2 -- tests/testthat/test-Start-transform-lat-Sort-indices.R | 2 -- tests/testthat/test-Start-transform-lat-Sort-values.R | 2 -- tests/testthat/test-Start-transform-lon-across_meridian.R | 2 -- tests/testthat/test-Start-transform-metadata.R | 1 - tests/testthat/test-Start-transform-three-selectors.R | 2 -- tests/testthat/test-Start-two_dats.R | 1 - tests/testthat/test-Start-values_list_vector.R | 1 - 55 files changed, 84 deletions(-) diff --git a/tests/testthat/test-AddStep-DimNames.R b/tests/testthat/test-AddStep-DimNames.R index 647ca2f..5e1fe9c 100644 --- a/tests/testthat/test-AddStep-DimNames.R +++ b/tests/testthat/test-AddStep-DimNames.R @@ -1,6 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Error with bad dimensions tests.") test_that("Single File - Local execution", { suppressWarnings( data <- Start(dataset = '/esarchive/scratch/aho/startR_unittest_files/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$sdate$$month$.nc', diff --git a/tests/testthat/test-Compute-CDORemap.R b/tests/testthat/test-Compute-CDORemap.R index b1479e2..fb31d00 100644 --- a/tests/testthat/test-Compute-CDORemap.R +++ b/tests/testthat/test-Compute-CDORemap.R @@ -1,5 +1,3 @@ -context("Compute use CDORemap") - test_that("ex2_3", { repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' diff --git a/tests/testthat/test-Compute-NumChunks.R b/tests/testthat/test-Compute-NumChunks.R index 5d9a775..d85c8bf 100644 --- a/tests/testthat/test-Compute-NumChunks.R +++ b/tests/testthat/test-Compute-NumChunks.R @@ -1,6 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Number of chunks tests.") test_that("Single File - Local execution", { path <- '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$sdate$$month$.nc' diff --git a/tests/testthat/test-Compute-chunk_depend_dim.R b/tests/testthat/test-Compute-chunk_depend_dim.R index 9c78764..101bfb5 100644 --- a/tests/testthat/test-Compute-chunk_depend_dim.R +++ b/tests/testthat/test-Compute-chunk_depend_dim.R @@ -6,8 +6,6 @@ # b. depended dim is list of values # Note that 2.b. doesn't work. -context("Chunk over dimensions that have dependency relationship") - path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/', 'HadGEM3-GC31-MM/dcppA-hindcast/', 'r1i1p1f2/Omon/tos/gn/v20200417/', diff --git a/tests/testthat/test-Compute-chunk_split_dim.R b/tests/testthat/test-Compute-chunk_split_dim.R index a40f745..0c1da4a 100644 --- a/tests/testthat/test-Compute-chunk_split_dim.R +++ b/tests/testthat/test-Compute-chunk_split_dim.R @@ -1,8 +1,6 @@ # This unit test is to check chunking over the split dim. It involves # how to arrange the chunks in a correct order even when chunking is happening. -context("Chunk over split dim") - test_that("1. The files are not repeated", { repos_exp <- paste0('/esarchive/exp/ecearth/a1tr/cmorfiles/CMIP/EC-Earth-Consortium/', diff --git a/tests/testthat/test-Compute-extra_params.R b/tests/testthat/test-Compute-extra_params.R index 02eab30..f055e96 100644 --- a/tests/testthat/test-Compute-extra_params.R +++ b/tests/testthat/test-Compute-extra_params.R @@ -1,4 +1,3 @@ -context("Compute, extra function arguments") test_that("ex2_6", { diff --git a/tests/testthat/test-Compute-inconsistent_target_dim.R b/tests/testthat/test-Compute-inconsistent_target_dim.R index 7ebc6f5..58f96a9 100644 --- a/tests/testthat/test-Compute-inconsistent_target_dim.R +++ b/tests/testthat/test-Compute-inconsistent_target_dim.R @@ -1,4 +1,3 @@ -context("Compute()/ByChunks(): dimension consistence check") # If dataset are more than 1 (e.g., exp and obs), ByChunks() checks if # they have consistent dimensions in favor of Apply() computation. However, # only margin dimensions need to be identical. Target dimensions can have diff --git a/tests/testthat/test-Compute-irregular_regrid.R b/tests/testthat/test-Compute-irregular_regrid.R index c76793d..7de1471 100644 --- a/tests/testthat/test-Compute-irregular_regrid.R +++ b/tests/testthat/test-Compute-irregular_regrid.R @@ -1,7 +1,5 @@ library(s2dv) -context("Irregular regriding in the workflow") - test_that("1. ex2_13", { path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/', diff --git a/tests/testthat/test-Compute-timedim.R b/tests/testthat/test-Compute-timedim.R index d63ae6c..fbc5af0 100644 --- a/tests/testthat/test-Compute-timedim.R +++ b/tests/testthat/test-Compute-timedim.R @@ -1,5 +1,3 @@ -context("Compute on time dimension") - test_that("ex2_1", { repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' diff --git a/tests/testthat/test-Compute-transform_all.R b/tests/testthat/test-Compute-transform_all.R index a7a67dd..05d5de6 100644 --- a/tests/testthat/test-Compute-transform_all.R +++ b/tests/testthat/test-Compute-transform_all.R @@ -1,4 +1,3 @@ -context("Transform with 'all'") test_that("1. Chunk along non-lat/lon dim", { #skip_on_cran() diff --git a/tests/testthat/test-Compute-transform_indices.R b/tests/testthat/test-Compute-transform_indices.R index 37decfc..c2d3e35 100644 --- a/tests/testthat/test-Compute-transform_indices.R +++ b/tests/testthat/test-Compute-transform_indices.R @@ -1,4 +1,3 @@ -context("Transform with indices") # Using indinces() to assign lat and lon, and transform the data. # Also test transform + chunk along lat/lon. diff --git a/tests/testthat/test-Compute-transform_values.R b/tests/testthat/test-Compute-transform_values.R index 191d651..25a803f 100644 --- a/tests/testthat/test-Compute-transform_values.R +++ b/tests/testthat/test-Compute-transform_values.R @@ -1,4 +1,3 @@ -context("Compute: Transform and chunk values()") # Using values() to assign lat and lon, and transform the data. # Also test transform + chunk along lat/lon. diff --git a/tests/testthat/test-Compute-two_data.R b/tests/testthat/test-Compute-two_data.R index 735735f..dfa579a 100644 --- a/tests/testthat/test-Compute-two_data.R +++ b/tests/testthat/test-Compute-two_data.R @@ -1,5 +1,3 @@ -context("Compute with two datasets") - test_that("ex2_7", { # exp data diff --git a/tests/testthat/test-Compute-use_attribute.R b/tests/testthat/test-Compute-use_attribute.R index 7ec3dc2..6f218e6 100644 --- a/tests/testthat/test-Compute-use_attribute.R +++ b/tests/testthat/test-Compute-use_attribute.R @@ -1,5 +1,3 @@ -context("Compute use attributes") - test_that("ex2_2", { repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' diff --git a/tests/testthat/test-Start-DCPP-across-depends.R b/tests/testthat/test-Start-DCPP-across-depends.R index c561abd..bfe44b1 100644 --- a/tests/testthat/test-Start-DCPP-across-depends.R +++ b/tests/testthat/test-Start-DCPP-across-depends.R @@ -1,4 +1,3 @@ -context("DCPP successfull retrieved for depends and across parameters.") test_that("Chunks of DCPP files- Local execution", { path <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc' path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) diff --git a/tests/testthat/test-Start-calendar.R b/tests/testthat/test-Start-calendar.R index 0ee4c5e..7dfbc2c 100644 --- a/tests/testthat/test-Start-calendar.R +++ b/tests/testthat/test-Start-calendar.R @@ -1,4 +1,3 @@ -context("Start() different calendar") test_that("1. 360_day, daily, unit = 'days since 1850-01-01'", { path_hadgem3 <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast//HadGEM3-GC31-MM/', diff --git a/tests/testthat/test-Start-depends_values.R b/tests/testthat/test-Start-depends_values.R index e4e4adc..9cccc2d 100644 --- a/tests/testthat/test-Start-depends_values.R +++ b/tests/testthat/test-Start-depends_values.R @@ -2,9 +2,6 @@ # and depending dimensions. The depending dimension should be a list with # names that are the values of depended dimensions. -context("Start() using values() to define dependency relations") - - path <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc' path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) diff --git a/tests/testthat/test-Start-first_file_missing.R b/tests/testthat/test-Start-first_file_missing.R index 9c232e6..fecbd7c 100644 --- a/tests/testthat/test-Start-first_file_missing.R +++ b/tests/testthat/test-Start-first_file_missing.R @@ -1,5 +1,3 @@ -context("Start() retrieves files that the first file is missing") - # When some of the files are missing, Start() still can retrieve the data and # put NA in those missing positions. However, when the first file is missing, # Start() returned error before because of failing to find metadata. The bug is diff --git a/tests/testthat/test-Start-global-lon-across_meridian.R b/tests/testthat/test-Start-global-lon-across_meridian.R index 0360629..921c331 100644 --- a/tests/testthat/test-Start-global-lon-across_meridian.R +++ b/tests/testthat/test-Start-global-lon-across_meridian.R @@ -1,7 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() across_meridia global lon length check") - test_that("first test", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/$var$_$sdate$.nc" diff --git a/tests/testthat/test-Start-implicit_dependency_by_selector.R b/tests/testthat/test-Start-implicit_dependency_by_selector.R index 4e89190..d493a87 100644 --- a/tests/testthat/test-Start-implicit_dependency_by_selector.R +++ b/tests/testthat/test-Start-implicit_dependency_by_selector.R @@ -6,7 +6,6 @@ # If assign a selector with an array that has file dim as dimension, Start() read # the values depending on the the file dim. #--------------------------------------------------- -context("Start() implicit dependency by selector dimension") #NOTE: The files don't exist anymore. #test_that("1. region with different index between files", { diff --git a/tests/testthat/test-Start-implicit_inner_dim.R b/tests/testthat/test-Start-implicit_inner_dim.R index fcae53e..7e0264c 100644 --- a/tests/testthat/test-Start-implicit_inner_dim.R +++ b/tests/testthat/test-Start-implicit_inner_dim.R @@ -1,4 +1,3 @@ -context("Start() implicit inner dimension") # The unit test is for the implicit inner dimension. If the inner dimension length is 1, # startR allows it not to be specified in the call. Users can still define it in # 'return_vars'. diff --git a/tests/testthat/test-Start-indices_list_vector.R b/tests/testthat/test-Start-indices_list_vector.R index b225a0a..2effede 100644 --- a/tests/testthat/test-Start-indices_list_vector.R +++ b/tests/testthat/test-Start-indices_list_vector.R @@ -4,8 +4,6 @@ # 3. transform, indices reversed # 4. no transform, indices reversed -context("List of indices and vector of indices") - repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) diff --git a/tests/testthat/test-Start-largest_dims_length.R b/tests/testthat/test-Start-largest_dims_length.R index 6a796a2..211c132 100644 --- a/tests/testthat/test-Start-largest_dims_length.R +++ b/tests/testthat/test-Start-largest_dims_length.R @@ -1,4 +1,3 @@ -context("Start() largest_dims_length check") # When certain inner dim of files is not consistent, the parameter 'largest_dims_length' can # be used to ensure the returned array has the largest length of inner dimensions. diff --git a/tests/testthat/test-Start-line_order-consistency.R b/tests/testthat/test-Start-line_order-consistency.R index 8bf4564..11be109 100644 --- a/tests/testthat/test-Start-line_order-consistency.R +++ b/tests/testthat/test-Start-line_order-consistency.R @@ -1,7 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() line order consistency check") - variable <- "tas" obs.path <- "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_$file_date$.nc" obs.path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', obs.path) diff --git a/tests/testthat/test-Start-metadata_dims.R b/tests/testthat/test-Start-metadata_dims.R index 569a28e..2a2e735 100644 --- a/tests/testthat/test-Start-metadata_dims.R +++ b/tests/testthat/test-Start-metadata_dims.R @@ -1,5 +1,3 @@ -context("Start() metadata_dims check") - test_that("1. One data set, one var", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) diff --git a/tests/testthat/test-Start-metadata_filedim_dependency.R b/tests/testthat/test-Start-metadata_filedim_dependency.R index 13cac47..da3fe86 100644 --- a/tests/testthat/test-Start-metadata_filedim_dependency.R +++ b/tests/testthat/test-Start-metadata_filedim_dependency.R @@ -1,4 +1,3 @@ -context("Start() metadata filedim dependency") # When inner dimension selector is an array with filedim dimension name (e.g., time = [sdate = 2, time = 4], # or *_across is used, the inner dim has dependency on file dim. In this case, return_vars must # specify this relationship, i.e., return_vars = list(time = 'sdate'). diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index 92e831b..b143268 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -1,4 +1,3 @@ -context("Start() metadata reshaping") # When data is reshaping (e.g., time_across = 'sdate'), the corresponding attribute should be reshaped too. test_that("1. time across fyear, fyear depends on sdate", { diff --git a/tests/testthat/test-Start-multiple-sdates.R b/tests/testthat/test-Start-multiple-sdates.R index 6467a84..e16f2bf 100644 --- a/tests/testthat/test-Start-multiple-sdates.R +++ b/tests/testthat/test-Start-multiple-sdates.R @@ -1,7 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() multiple sdate with split + merge dim") - # When certain values in one observation file are required more than once, # and 'merge_across_dims' + 'split_multiselected_dims' are used, the values may be misplaced. # It might happen when reading experimental data with many start dates, diff --git a/tests/testthat/test-Start-path_glob_permissive.R b/tests/testthat/test-Start-path_glob_permissive.R index e32d0b3..75f28d4 100644 --- a/tests/testthat/test-Start-path_glob_permissive.R +++ b/tests/testthat/test-Start-path_glob_permissive.R @@ -1,5 +1,3 @@ -context("Start() path_glob_permissive check") - test_that("1. expid/member/version", { years <- paste0(c(1960:1961), '01-', c(1960:1961), '12') diff --git a/tests/testthat/test-Start-reorder-lat.R b/tests/testthat/test-Start-reorder-lat.R index 0ac7701..c87792e 100644 --- a/tests/testthat/test-Start-reorder-lat.R +++ b/tests/testthat/test-Start-reorder-lat.R @@ -1,7 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() lat Reorder test") - #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix #2 selector range 1-[10, 20] 2-[20, 10] 3-[-10, -20] 4-[-20, -10] #3 resolution 1-1 2-<1 3->1 diff --git a/tests/testthat/test-Start-reorder-latCoarse.R b/tests/testthat/test-Start-reorder-latCoarse.R index af9c2db..34a766f 100644 --- a/tests/testthat/test-Start-reorder-latCoarse.R +++ b/tests/testthat/test-Start-reorder-latCoarse.R @@ -1,7 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() lat Reorder test") - #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix #2 selector range 1-[10, 20] 2-[20, 10] 3-[-10, -20] 4-[-20, -10] #3 resolution 1-1 2-<1 3->1 4-> mixed diff --git a/tests/testthat/test-Start-reorder-lon-180to180.R b/tests/testthat/test-Start-reorder-lon-180to180.R index e0a066c..0f71f0a 100644 --- a/tests/testthat/test-Start-reorder-lon-180to180.R +++ b/tests/testthat/test-Start-reorder-lon-180to180.R @@ -1,6 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() lon Reorder non-transform -180to180 test") #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix #2 selector range 1-[10, 20] 2-[20, 10] 3-[-10, -20] 4-[-20, -10] 5-[-10, 10] 6-[10, -10] 7-[300, 350] 8-[170, 190] #3 resolution 1-1 2-<1 3->1 diff --git a/tests/testthat/test-Start-reorder-lon-transform_-180to180.R b/tests/testthat/test-Start-reorder-lon-transform_-180to180.R index 46da00e..5e7701a 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_-180to180.R +++ b/tests/testthat/test-Start-reorder-lon-transform_-180to180.R @@ -1,6 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() lon Reorder transform -180to180 test") #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix #2 selector range 1-[10, 20] 2-[20, 10] 3-[-10, -20] 4-[-20, -10] 5-[-10, 10] 6-[10, -10] 7-[300, 350] 8-[170, 190] #3 resolution 1-1 2-<1 3->1 diff --git a/tests/testthat/test-Start-reorder-lon-transform_0to360.R b/tests/testthat/test-Start-reorder-lon-transform_0to360.R index e05c731..86ad5e7 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_0to360.R +++ b/tests/testthat/test-Start-reorder-lon-transform_0to360.R @@ -1,6 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() lon Reorder transform 0to360 test") #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix #2 selector range 1-[10, 20] 2-[20, 10] 3-[-10, -20] 4-[-20, -10] 5-[-10, 10] 6-[10, -10] 7-[300, 350] 8-[350, 370] #3 resolution 1-1 2-<1 3->1 diff --git a/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R b/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R index d4629af..c18d34a 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R +++ b/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R @@ -1,6 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() lon Reorder transform 0to360 test") #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix #2 selector range 1-[10, 20] 2-[20, 10] 3-[-10, -20] 4-[-20, -10] 5-[-10, 10] 6-[10, -10] 7-[300, 350] 8-[350, 370] #3 resolution 1-1 2-<1 3->1 4 -> mixed diff --git a/tests/testthat/test-Start-reorder-lon0to360.R b/tests/testthat/test-Start-reorder-lon0to360.R index 84b0527..1e946d9 100644 --- a/tests/testthat/test-Start-reorder-lon0to360.R +++ b/tests/testthat/test-Start-reorder-lon0to360.R @@ -1,6 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() lon Reorder non-transform 0to360 test") #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix #2 selector range 1-[10, 20] 2-[20, 10] 3-[-10, -20] 4-[-20, -10] 5-[-10, 10] 6-[10, -10] 7-[300, 350] 8-[350, 370] #3 resolution 1-1 2-<1 3->1 diff --git a/tests/testthat/test-Start-reorder-lon0to360Coarse.R b/tests/testthat/test-Start-reorder-lon0to360Coarse.R index 16ad2e0..71361d9 100644 --- a/tests/testthat/test-Start-reorder-lon0to360Coarse.R +++ b/tests/testthat/test-Start-reorder-lon0to360Coarse.R @@ -1,6 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() lon Reorder non-transform 0to360 test") #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix #2 selector range 1-[10, 20] 2-[20, 10] 3-[-10, -20] 4-[-20, -10] 5-[-10, 10] 6-[10, -10] 7-[300, 350] 8-[350, 370] #3 resolution 1-1 2-<1 3->1 4-> mixed diff --git a/tests/testthat/test-Start-reorder-metadata.R b/tests/testthat/test-Start-reorder-metadata.R index 4b6f909..ea727e5 100644 --- a/tests/testthat/test-Start-reorder-metadata.R +++ b/tests/testthat/test-Start-reorder-metadata.R @@ -1,4 +1,3 @@ -context("Start() reorder metadata check") # Ensure returns_vars = NULL or 'dat' have the same metadata test_that("1. Sort() and CircularSort(0, 360)", { diff --git a/tests/testthat/test-Start-reorder-retrieve.R b/tests/testthat/test-Start-reorder-retrieve.R index 28d8c79..25efcfc 100644 --- a/tests/testthat/test-Start-reorder-retrieve.R +++ b/tests/testthat/test-Start-reorder-retrieve.R @@ -1,8 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() lon Reorder non-transform retrieve test") - - ############################################## test_that("original range 0to360", { ## Origin longitude in file: [0:359.722222222222] diff --git a/tests/testthat/test-Start-reorder_all.R b/tests/testthat/test-Start-reorder_all.R index fce2dc4..87a4416 100644 --- a/tests/testthat/test-Start-reorder_all.R +++ b/tests/testthat/test-Start-reorder_all.R @@ -1,8 +1,5 @@ # No transform, test reorder function Sort() and CircularSort() with selector 'all'. - -context("No transform, reorder test: 'all'") - #--------------------------------------------------------------- # cdo is used to verify the data values library(easyNCDF) diff --git a/tests/testthat/test-Start-reorder_indices.R b/tests/testthat/test-Start-reorder_indices.R index 4027b78..59d00d4 100644 --- a/tests/testthat/test-Start-reorder_indices.R +++ b/tests/testthat/test-Start-reorder_indices.R @@ -1,7 +1,5 @@ # No transform, test reorder function Sort() and CircularSort() with selector indices(). -context("No transform, reorder test: indices()") - #--------------------------------------------------------------- # cdo is used to verify the data values library(easyNCDF) diff --git a/tests/testthat/test-Start-reshape.R b/tests/testthat/test-Start-reshape.R index fc7acb6..480a3bc 100644 --- a/tests/testthat/test-Start-reshape.R +++ b/tests/testthat/test-Start-reshape.R @@ -1,4 +1,3 @@ -context("Start() reshape parameters check") # This one is more comprehensive than test-Start-split-merge.R path_exp <- '/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$sdate$.nc' diff --git a/tests/testthat/test-Start-return_vars_name.R b/tests/testthat/test-Start-return_vars_name.R index e97023d..e3ff876 100644 --- a/tests/testthat/test-Start-return_vars_name.R +++ b/tests/testthat/test-Start-return_vars_name.R @@ -1,4 +1,3 @@ -context("Start() return_vars name") # The name of return_vars should be one of the inner dimension names. The synonims can # be used but will be changed back to the inner dim names. diff --git a/tests/testthat/test-Start-split-merge.R b/tests/testthat/test-Start-split-merge.R index d95fa62..699c01c 100644 --- a/tests/testthat/test-Start-split-merge.R +++ b/tests/testthat/test-Start-split-merge.R @@ -1,4 +1,3 @@ -context("Start() split + merge dim and value check") var_name <- 'tas' path.exp <- '/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily_mean/$var$_f6h/$sdate$/$var$_$syear$.nc' diff --git a/tests/testthat/test-Start-time_unit.R b/tests/testthat/test-Start-time_unit.R index a05a42a..0c499d3 100644 --- a/tests/testthat/test-Start-time_unit.R +++ b/tests/testthat/test-Start-time_unit.R @@ -1,4 +1,3 @@ -context("To detect the variable with time format and adjust the units") test_that("1. The data has units like time", { diff --git a/tests/testthat/test-Start-transform-all.R b/tests/testthat/test-Start-transform-all.R index 7fbac55..a8290a6 100644 --- a/tests/testthat/test-Start-transform-all.R +++ b/tests/testthat/test-Start-transform-all.R @@ -3,8 +3,6 @@ # The test contains three calls with different target grids: # two with 'r128x64' (from different original grid) and one with 'r100x50'. -context("Transform test target grid: lon and lat = 'all'") - #--------------------------------------------------------------- # cdo is used to verify the data values # Test 1: original grid 'r360x180' diff --git a/tests/testthat/test-Start-transform-border.R b/tests/testthat/test-Start-transform-border.R index 90b48b6..9b3cc6a 100644 --- a/tests/testthat/test-Start-transform-border.R +++ b/tests/testthat/test-Start-transform-border.R @@ -1,4 +1,3 @@ -context("Transform: check with cdo") ############################################## # This unit test checks different border situations: normal regional that doesn't touch the borders, diff --git a/tests/testthat/test-Start-transform-lat-Sort-all.R b/tests/testthat/test-Start-transform-lat-Sort-all.R index 3852da9..d7d895e 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-all.R +++ b/tests/testthat/test-Start-transform-lat-Sort-all.R @@ -4,8 +4,6 @@ # The test contains three calls: lat_reorder = Sort(), no lat_reorder, and lat_reorder = Sort(decreasing = T). # Note that the original latitude is descending [90:-90]. cdo result is ascending [-90:90]. -context("Transform and lat_reorder test: 'all'") - #--------------------------------------------------------------- # cdo is used to verify the data values library(easyNCDF) diff --git a/tests/testthat/test-Start-transform-lat-Sort-indices.R b/tests/testthat/test-Start-transform-lat-Sort-indices.R index f729545..16daa79 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-indices.R +++ b/tests/testthat/test-Start-transform-lat-Sort-indices.R @@ -9,8 +9,6 @@ #!!!!!!!!!!!!!!!!!!!!!PROBLEM in test 2, indices(640:1)!!!!!!!!!!!!!!!!!!!! #TODO: Add regional test -context("Transform and lat_reorder test: indices") - #--------------------------------------------------------------- # cdo is used to verify the data values library(easyNCDF) diff --git a/tests/testthat/test-Start-transform-lat-Sort-values.R b/tests/testthat/test-Start-transform-lat-Sort-values.R index 0333101..b70b637 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-values.R +++ b/tests/testthat/test-Start-transform-lat-Sort-values.R @@ -7,8 +7,6 @@ # Each of them contains three calls: lat_reorder = Sort(), no lat_reorder, and lat_reorder = Sort(decreasing = T). # Note that the original latitude is descending [90:-90]. cdo result is ascending [-90:90]. -context("Transform and lat_reorder test: values") - #--------------------------------------------------------------- # cdo is used to verify the data values library(easyNCDF) diff --git a/tests/testthat/test-Start-transform-lon-across_meridian.R b/tests/testthat/test-Start-transform-lon-across_meridian.R index d3c3dfa..d07388e 100644 --- a/tests/testthat/test-Start-transform-lon-across_meridian.R +++ b/tests/testthat/test-Start-transform-lon-across_meridian.R @@ -1,7 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() transform across_meridian lon order check") - test_that("first test", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/$var$_$sdate$.nc" diff --git a/tests/testthat/test-Start-transform-metadata.R b/tests/testthat/test-Start-transform-metadata.R index 62d31da..227f09d 100644 --- a/tests/testthat/test-Start-transform-metadata.R +++ b/tests/testthat/test-Start-transform-metadata.R @@ -1,4 +1,3 @@ -context("Start() transform metadata check") # Ensure returns_vars = NULL or 'dat' have the same metadata test_that("1. Sort() and CircularSort(0, 360)", { diff --git a/tests/testthat/test-Start-transform-three-selectors.R b/tests/testthat/test-Start-transform-three-selectors.R index 500168e..95e7c2b 100644 --- a/tests/testthat/test-Start-transform-three-selectors.R +++ b/tests/testthat/test-Start-transform-three-selectors.R @@ -8,8 +8,6 @@ # Note that the original latitude is descending [90:-90]. -context("Transform: three selector forms") - #--------------------------------------------------------------- # cdo is used to verify the data values library(easyNCDF) diff --git a/tests/testthat/test-Start-two_dats.R b/tests/testthat/test-Start-two_dats.R index e2fef3b..46b57d8 100644 --- a/tests/testthat/test-Start-two_dats.R +++ b/tests/testthat/test-Start-two_dats.R @@ -1,5 +1,4 @@ # ex1_8 -context("Start() two dats and two vars in one call") test_that("1. ex1_8, case 1", { diff --git a/tests/testthat/test-Start-values_list_vector.R b/tests/testthat/test-Start-values_list_vector.R index a84530f..1a6288b 100644 --- a/tests/testthat/test-Start-values_list_vector.R +++ b/tests/testthat/test-Start-values_list_vector.R @@ -4,7 +4,6 @@ # 3. transform, indices reversed # 4. no transform, indices reversed -context("List of values and vector of values") #----------------------------------------------------------------- # To get lat and lon vectors -- GitLab From c31d72871893a37ad94853a8be54f5ecca882b13 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 31 Aug 2023 15:34:52 +0200 Subject: [PATCH 43/46] Change doc to be the same as ByChunk_autosubmit() --- R/ByChunks_autosubmit.R | 2 +- R/Compute.R | 12 +++++++----- man/Compute.Rd | 12 +++++++----- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 08414f9..65ab36e 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -27,7 +27,7 @@ #' where to store temporary files generated for the automatic management of the #' workflow manager. This path should be available in local workstation as well #' as autosubmit machine. The default value is NULL, and a temporary folder -#' will be created. +#' under the current working folder will be created. #'@param autosubmit_server A character vector indicating the login node of the #' autosubmit machine. It can be "bscesautosubmit01" or "bscesautosubmit02". #' The default value is NULL, and the node will be randomly chosen. diff --git a/R/Compute.R b/R/Compute.R index d288a4d..6a476ef 100644 --- a/R/Compute.R +++ b/R/Compute.R @@ -39,11 +39,13 @@ #' \code{c(host = 'hostname', port = port_number)}. Only needed when the #' execution is run remotely. The default value is NULL. #'@param autosubmit_suite_dir A character string indicating the path to a folder -#' that can be found locally and on Autosubmit machine where to store temporary -#' files generated for Autosubmit to establish workflow. -#'@param autosubmit_server A character string of autosubmit machine name ( -#' 'bscesautosubmit01'or 'bscesautosubmit02'.) If it is NULL, a random one will -#' be assigned. The default value is NULL. +#' where to store temporary files generated for the automatic management of the +#' workflow manager. This path should be available in local workstation as well +#' as autosubmit machine. The default value is NULL, and a temporary folder +#' under the current working folder will be created. +#'@param autosubmit_server A character vector indicating the login node of the +#' autosubmit machine. It can be "bscesautosubmit01" or "bscesautosubmit02". +#' The default value is NULL, and the node will be randomly chosen. #'@param silent A logical value deciding whether to print the computation #' progress (FALSE) on the R session or not (TRUE). It only works when the #' execution runs locally or the parameter 'wait' is TRUE. The default value diff --git a/man/Compute.Rd b/man/Compute.Rd index 610af3d..9aa2a76 100644 --- a/man/Compute.Rd +++ b/man/Compute.Rd @@ -57,12 +57,14 @@ EC-Flow server. The vector form should be execution is run remotely. The default value is NULL.} \item{autosubmit_suite_dir}{A character string indicating the path to a folder -that can be found locally and on Autosubmit machine where to store temporary -files generated for Autosubmit to establish workflow.} +where to store temporary files generated for the automatic management of the +workflow manager. This path should be available in local workstation as well +as autosubmit machine. The default value is NULL, and a temporary folder +under the current working folder will be created.} -\item{autosubmit_server}{A character string of autosubmit machine name ( -'bscesautosubmit01'or 'bscesautosubmit02'.) If it is NULL, a random one will -be assigned. The default value is NULL.} +\item{autosubmit_server}{A character vector indicating the login node of the +autosubmit machine. It can be "bscesautosubmit01" or "bscesautosubmit02". +The default value is NULL, and the node will be randomly chosen.} \item{silent}{A logical value deciding whether to print the computation progress (FALSE) on the R session or not (TRUE). It only works when the -- GitLab From 005c802f33541b51e9c1b5acc45259090c8ec60b Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 31 Aug 2023 15:49:22 +0200 Subject: [PATCH 44/46] Change check.attributes to ignore_attr --- tests/testthat/test-Compute-NumChunks.R | 2 +- tests/testthat/test-Start-metadata_filedim_dependency.R | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-Compute-NumChunks.R b/tests/testthat/test-Compute-NumChunks.R index d85c8bf..ffce880 100644 --- a/tests/testthat/test-Compute-NumChunks.R +++ b/tests/testthat/test-Compute-NumChunks.R @@ -47,7 +47,7 @@ res2 <- Compute(workflow = wf, expect_equal( res1, res2, -check.attributes = FALSE +ignore_attr = TRUE ) }) diff --git a/tests/testthat/test-Start-metadata_filedim_dependency.R b/tests/testthat/test-Start-metadata_filedim_dependency.R index da3fe86..227383b 100644 --- a/tests/testthat/test-Start-metadata_filedim_dependency.R +++ b/tests/testthat/test-Start-metadata_filedim_dependency.R @@ -83,7 +83,7 @@ expect_equal( expect_equal( test6, test4, - check.attributes = FALSE + ignore_attr = TRUE ) #---------------------------------------- @@ -119,7 +119,7 @@ expect_equal( expect_equal( test6, test6a, - check.attributes = FALSE + ignore_attr = TRUE ) #---------------------------------------- @@ -157,7 +157,7 @@ expect_equal( expect_equal( test14a, test6a, - check.attributes = FALSE + ignore_attr = TRUE ) #------------------------------------------------- @@ -193,7 +193,7 @@ expect_equal( expect_equal( test15a, test6a, - check.attributes = FALSE + ignore_attr = TRUE ) }) -- GitLab From 28bab87e4a64392f912212c696214c94a5a96e6f Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 31 Aug 2023 16:13:12 +0200 Subject: [PATCH 45/46] Remove wrong words --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 1b3062e..9219f96 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ - Load variable metadata when retreive = F - Change Compute() "threads_load" to 1 to be consistent with documentation - Add Autosubmit as workflow manager -- SelectorChecker() to recognize class integer Task actions +- SelectorChecker() to recognize class integer # startR v2.2.3 (Release date: 2023-06-06) - Bugfix in Start(): when using parameter `longitude = 'all'` with transform, there was a missing point for some cases. -- GitLab From a2cf1a245a7944d6ad5d9a8a9557041963a113d6 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 4 Sep 2023 16:01:38 +0200 Subject: [PATCH 46/46] Change threads_compute and threads_load documentation to 'core', not 'process' or 'thread'. --- R/Compute.R | 4 ++-- R/Start.R | 2 +- inst/doc/practical_guide.md | 4 ++-- man/Compute.Rd | 4 ++-- man/Start.Rd | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/Compute.R b/R/Compute.R index 6a476ef..5a58abd 100644 --- a/R/Compute.R +++ b/R/Compute.R @@ -20,9 +20,9 @@ #' value is 'auto', which lists all the non-target dimensions and each one has #' one chunk. #'@param threads_load An integer indicating the number of parallel execution -#' processes to use for the data retrieval stage. The default value is 1. +#' cores to use for the data retrieval stage. The default value is 1. #'@param threads_compute An integer indicating the number of parallel execution -#' processes to use for the computation. The default value is 1. +#' cores to use for the computation. The default value is 1. #'@param cluster A list of components that define the configuration of the #' machine to be run on. The comoponents vary from the different machines. #' Check \href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{Practical guide on GitLab} for more diff --git a/R/Start.R b/R/Start.R index b72a02b..b0ad40d 100644 --- a/R/Start.R +++ b/R/Start.R @@ -704,7 +704,7 @@ #'@param num_procs An integer of number of processes to be created for the #' parallel execution of the retrieval/transformation/arrangement of the #' multiple involved files in a call to Start(). If set to NULL, -#' takes the number of available cores (as detected by future::detectCores). +#' takes the number of available cores (as detected by future::availableCores). #' The default value is 1 (no parallel execution). #'@param ObjectBigmemory a character string to be included as part of the #' bigmemory object name. This parameter is thought to be used internally by the diff --git a/inst/doc/practical_guide.md b/inst/doc/practical_guide.md index 7038ad7..b22c629 100644 --- a/inst/doc/practical_guide.md +++ b/inst/doc/practical_guide.md @@ -378,8 +378,8 @@ The common Compute() parameters of local and remote execution are: - `wf`: The workflow defined by the previous steps. - `chunks`: The dimensions to be chunked and how many chunks you want for each dimension. startR will automatically chunk the data for you. See more details in session [#5-1](#5-1-how-to-choose-the-number-of-chunks-jobs-and-cores). -- `threads_load`: The number of parallel execution processes to be created for data retrieval stage. It is used as Start() parameter "num_procs" if it is not specified when Start() is defined. -- `threads_compute`: The number of parallel execution processes to be created for data computing. It is used as multiApply::Apply parameter "ncores". +- `threads_load`: The number of parallel execution cores to be created for data retrieval stage. It is used as Start() parameter "num_procs" if it is not specified when Start() is defined. +- `threads_compute`: The number of parallel execution cores to be created for data computing. It is used as multiApply::Apply parameter "ncores". Using more than 2 threads for the retrieval will usually be perjudicial, since two will already be able to make full use of the bandwidth between the workstation and the data repository. The optimal number of threads for the computation will depend on the number of processors in your machine, the number of cores they have, and the number of threads supported by each of them. diff --git a/man/Compute.Rd b/man/Compute.Rd index 9aa2a76..96d063a 100644 --- a/man/Compute.Rd +++ b/man/Compute.Rd @@ -35,10 +35,10 @@ one chunk.} 'ecFlow'.} \item{threads_load}{An integer indicating the number of parallel execution -processes to use for the data retrieval stage. The default value is 1.} +cores to use for the data retrieval stage. The default value is 1.} \item{threads_compute}{An integer indicating the number of parallel execution -processes to use for the computation. The default value is 1.} +cores to use for the computation. The default value is 1.} \item{cluster}{A list of components that define the configuration of the machine to be run on. The comoponents vary from the different machines. diff --git a/man/Start.Rd b/man/Start.Rd index 7cdc9f8..25eb8d7 100644 --- a/man/Start.Rd +++ b/man/Start.Rd @@ -684,7 +684,7 @@ value is FALSE.} \item{num_procs}{An integer of number of processes to be created for the parallel execution of the retrieval/transformation/arrangement of the multiple involved files in a call to Start(). If set to NULL, -takes the number of available cores (as detected by future::detectCores). +takes the number of available cores (as detected by future::availableCores). The default value is 1 (no parallel execution).} \item{ObjectBigmemory}{a character string to be included as part of the -- GitLab