diff --git a/.Rbuildignore b/.Rbuildignore index 2ef8ba9063900318c7c0be04cc2ac48a636842e4..aa7059a3b2102a246821ef37d6ddd1d831deb5f0 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -11,11 +11,9 @@ ## unit tests should be ignored when building the package for CRAN ^tests$ ^inst/PlotProfiling\.R$ - +^.gitlab$ # Suggested by http://r-pkgs.had.co.nz/package.html ^.*\.Rproj$ # Automatically added by RStudio, ^\.Rproj\.user$ # used for temporary files. -^README\.Rmd$ # An Rmarkdown file used to generate README.md ^cran-comments\.md$ # Comments for CRAN submission #^NEWS\.md$ # A news file written in Markdown -^\.gitlab-ci\.yml$ diff --git a/.gitlab/issue_templates/Default.md b/.gitlab/issue_templates/Default.md new file mode 100644 index 0000000000000000000000000000000000000000..6d47b7150d6ddd446c2013022b5bfbcdc94abce4 --- /dev/null +++ b/.gitlab/issue_templates/Default.md @@ -0,0 +1,26 @@ +(This is a template to report problems or suggest a new development. Please fill in the relevant information and remove the rest.) + +Hi @aho, + +#### Summary +(Bug: Summarize the bug and explain briefly the expected and the current behavior.) +(New development: Summarize the development needed.) + +#### Example +(Bug: Provide a **minimal reproducible example** and the error message. See [How To Build A Minimal Reproducible Example](https://docs.google.com/document/d/1zRlmsRwFDJctDB94x6HGf6ezu3HFHhEjaBu0hVcrwTI/edit#heading=h.skblym4acpw5)) +(New development: Provide an example script or useful piece of code if appliable.) + +```r +#Example: +exp <- Start(...) +``` +> Error in Start: &%$("!* + +#### Module and Package Version +(Which R version are you using? e.g., R/4.1.2) +(What other modules and their versions required to reproduce this issue? e.g., PROJ/4.8.0-foss-2015a) +(Which R package versions are you using? Check with sessionInfo(). e.g., startR_2.2.3) +(Which machine are you using? WS, Nord3, hub, others...) + +#### Other Relevant Information +(Additional information.) diff --git a/DESCRIPTION b/DESCRIPTION index 2d569fe9c1883931c7784607620b2181a04e6f5a..60fa08cdacb88cba6abc5814af42af6020084162 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: startR Title: Automatically Retrieve Multidimensional Distributed Data Sets -Version: 2.2.3 +Version: 2.3.0 Authors@R: c( person("Nicolau", "Manubens", , "nicolau.manubens@bsc.es", role = c("aut")), person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("aut", "cre")), @@ -35,7 +35,8 @@ Imports: Suggests: stats, utils, - testthat + testthat, + yaml License: GPL-3 URL: https://earth.bsc.es/gitlab/es/startR/ BugReports: https://earth.bsc.es/gitlab/es/startR/-/issues diff --git a/NEWS.md b/NEWS.md index 888fb729955e559437f574fd6ec013f573dedeef..9219f962797d99c8bdc6c950bc13dc272583f2d3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# startR v2.3.0 (Release date: 2023-08-31) +- Load variable metadata when retreive = F +- Change Compute() "threads_load" to 1 to be consistent with documentation +- Add Autosubmit as workflow manager +- SelectorChecker() to recognize class integer + # startR v2.2.3 (Release date: 2023-06-06) - Bugfix in Start(): when using parameter `longitude = 'all'` with transform, there was a missing point for some cases. diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R new file mode 100644 index 0000000000000000000000000000000000000000..65ab36eeeef2b475a2449f0ab7af28a19e7e67e7 --- /dev/null +++ b/R/ByChunks_autosubmit.R @@ -0,0 +1,662 @@ +#'Execute the operation by chunks +#' +#'This is an internal function used in Compute(), executing the operation by +#'the chunks specified in Compute(). It also returns the configuration details +#'and profiling information. It is used when the workflow manager is +#'Autosubmit. +#' +#'@param step_fun A function with the class 'startR_step_fun' containing the +#' details of operation. +#'@param cube_headers A list with the class 'startR_cube' returned by Start(). +#' It contains the details of data to be operated. +#'@param \dots Additional parameters for the inputs of 'step_fun'. +#'@param chunks A named list of dimensions which to split the data along and +#' the number of chunks to make for each. The chunked dimension can only be +#' those not required as the target dimension in function Step(). The default +#' value is 'auto', which lists all the non-target dimensions and each one has +#' one chunk. +#'@param threads_load An integer indicating the number of execution threads to +#' use for the data retrieval stage. The default value is 1. +#'@param threads_compute An integer indicating the number of execution threads +#' to use for the computation. The default value is 1. +#'@param cluster A list of components that define the configuration of the +#' machine to be run on. The comoponents vary from different machines. Check +#' \href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{practical guide} +#' for more details and examples. +#'@param autosubmit_suite_dir A character string indicating the path to a folder +#' where to store temporary files generated for the automatic management of the +#' workflow manager. This path should be available in local workstation as well +#' as autosubmit machine. The default value is NULL, and a temporary folder +#' under the current working folder will be created. +#'@param autosubmit_server A character vector indicating the login node of the +#' autosubmit machine. It can be "bscesautosubmit01" or "bscesautosubmit02". +#' The default value is NULL, and the node will be randomly chosen. +#'@param silent A logical value deciding whether to print the computation +#' progress (FALSE) on the R session or not (TRUE). It only works when the +#' execution runs locally or the parameter 'wait' is TRUE. The default value +#' is FALSE. +#'@param debug A logical value deciding whether to return detailed messages on +#' the progress and operations in a Compute() call (TRUE) or not (FALSE). +#' Automatically changed to FALSE if parameter 'silent' is TRUE. The default +#' value is FALSE. +#'@param wait A logical value deciding whether the R session waits for the +#' Compute() call to finish (TRUE) or not (FALSE). If FALSE, it will return an +#' object with all the information of the startR execution that can be stored +#' in your disk. After that, the R session can be closed and the results can +#' be collected later with the Collect() function. The default value is TRUE. +#' +#'@return A list of data arrays for the output returned by the last step in the +#' specified workflow. The configuration details and profiling information are +#' attached as attributes to the returned list of arrays. +#' +#'@examples +#' # ByChunks_autosubmit() is internally used in Compute(), not intended to be +#' # used by users. The example just illustrates the inputs of +#' # ByChunks_autosubmit(). +#' # data_path <- system.file('extdata', package = 'startR') +#' # path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') +#' # sdates <- c('200011', '200012') +#' # data <- Start(dat = list(list(path = path_obs)), +#' # var = 'tos', +#' # sdate = sdates, +#' # time = 'all', +#' # latitude = 'all', +#' # longitude = 'all', +#' # return_vars = list(latitude = 'dat', +#' # longitude = 'dat', +#' # time = 'sdate'), +#' # retrieve = FALSE) +#' # fun <- function(x) { +#' # lat = attributes(x)$Variables$dat1$latitude +#' # weight = sqrt(cos(lat * pi / 180)) +#' # corrected = Apply(list(x), target_dims = "latitude", +#' # fun = function(x) {x * weight}) +#' # } +#' # step <- Step(fun = fun, +#' # target_dims = 'latitude', +#' # output_dims = 'latitude', +#' # use_libraries = c('multiApply'), +#' # use_attributes = list(data = "Variables")) +#' #ByChunks_autosubmit(step, data) +#' +#'@import multiApply +#'@importFrom methods is +#'@noRd +ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', + threads_load = 1, threads_compute = 1, + cluster = NULL, + autosubmit_suite_dir = NULL, autosubmit_server = NULL, + silent = FALSE, debug = FALSE, wait = TRUE) { + + #NOTE: + #autosubmit_suite_dir: /home/Earth/aho/startR_local_autosubmit/ + #autosubmit_suite_dir_suite: /home/Earth/aho/startR_local_autosubmit/STARTR_CHUNKING_a68h/ + #remote_autosubmit_suite_dir: /esarchive/autosubmit/a68h/proj/ + #remote_autosubmit_suite_dir_suite: /esarchive/autosubmit/a68h/proj/STARTR_CHUNKING_a68h/ + + # Build object to store profiling timings + t_begin_total <- Sys.time() + t_begin_bychunks_setup <- t_begin_total + timings <- list(nchunks = NULL, + concurrent_chunks = NULL, + cores_per_job = NULL, + threads_load = NULL, + threads_compute = NULL, + bychunks_setup = NULL, + transfer = NULL, + queue = NULL, + job_setup = NULL, + load = NULL, + compute = NULL, + transfer_back = NULL, + merge = NULL, + total = NULL) + + MergeArrays <- .MergeArrays + + # Sanity checks + ## step_fun + if (!is(step_fun, 'startR_step_fun')) { + stop("Parameter 'step_fun' must be of the class 'startR_step_fun', as returned ", + "by the function Step.") + } + + ## cube_headers + if (is(cube_headers, 'startR_cube')) { + cube_headers <- list(cube_headers) + } + if (!all(sapply(lapply(cube_headers, class), + function(x) 'startR_cube' %in% x))) { + stop("All objects passed in 'cube_headers' must be of class 'startR_cube', ", + "as returned by Start().") + } + if (length(cube_headers) != length(attr(step_fun, 'TargetDims'))) { + stop("Number of inputs in parameter 'cube_headers' must be equal to the ", + "number of inputs expected by the function 'step_fun'.") + } + + ## threads_load and threads_compute + if (!is.numeric(threads_load)) { + stop("Parameter 'threads_load' must be a numeric value.") + } + threads_load <- round(threads_load) + if (!is.numeric(threads_compute)) { + stop("Parameter 'threads_compute' must be a numeric value.") + } + threads_compute <- round(threads_compute) + timings[['threads_load']] <- threads_load + timings[['threads_compute']] <- threads_compute + + ## autosubmit_suite_dir + if (is.null(autosubmit_suite_dir)) { + # Create a tmp folder as autosubmit_suite_dir + autosubmit_suite_dir <- file.path(getwd(), "startR_autosubmit_temp") + if (!dir.exists(autosubmit_suite_dir)) { + dir.create("startR_autosubmit_temp", recursive = FALSE) + } + .warning(paste0("Parameter 'autosubmit_suite_dir' is not specified. Create a temporary ", + "folder under current directory: ", autosubmit_suite_dir, "/. Make sure ", + "that Autosubmit machine can find this path.")) + } + if (!is.character(autosubmit_suite_dir)) { + stop("Parameter 'autosubmit_suite_dir' must be a character string.") + } + + ## autosubmit_server + if (!is.null(autosubmit_server)) { + if (!autosubmit_server %in% c('bscesautosubmit01', 'bscesautosubmit02')) { + stop("Parameter 'autosubmit_server' must be one existing Autosubmit machine login node, 'bscesautosubmit01' or 'bscesautosubmit02'.") + } + } else { + autosubmit_server <- paste0('bscesautosubmit0', sample(1:2, 1)) + } + + ## silent + if (!is.logical(silent)) { + stop("Parameter 'silent' must be logical.") + } + + ## debug + if (!is.logical(debug)) { + stop("Parameter 'debug' must be logical.") + } + if (silent) { + debug <- FALSE + } + + ## wait + if (!is.logical(wait)) { + stop("Parameter 'wait' must be logical.") + } + + ## cluster + default_cluster <- list(queue_host = NULL, +# queue_type = 'slurm', + data_dir = NULL, +# temp_dir = NULL, + lib_dir = NULL, + init_commands = list(''), + r_module = 'R', + CDO_module = NULL, + autosubmit_module = 'autosubmit', + node_memory = NULL, # not used + cores_per_job = NULL, + job_wallclock = '01:00:00', + max_jobs = 6, + extra_queue_params = list(''), +# bidirectional = TRUE, + polling_period = 10, + special_setup = 'none', + expid = NULL, + hpc_user = NULL, + run_dir = NULL) + if (!is.list(cluster) || is.null(names(cluster))) { + stop("Parameter 'cluster' must be a named list.") + } + if (any(!(names(cluster) %in% c('queue_host', 'queue_type', 'data_dir', + 'temp_dir', 'lib_dir', 'init_commands', + 'r_module', 'CDO_module', 'autosubmit_module', + 'ecflow_module', 'node_memory', + 'cores_per_job', 'job_wallclock', 'max_jobs', + 'extra_queue_params', 'bidirectional', + 'polling_period', 'special_setup', 'expid', 'hpc_user', + 'run_dir' + )))) { + stop("Found invalid component names in parameter 'cluster'.") + } + # Remove ecFlow components + redundant_components <- c('queue_type', 'temp_dir', 'ecflow_module', 'bidirectional') + if (any(redundant_components %in% names(cluster))) { + tmp <- redundant_components[which(redundant_components %in% names(cluster))] + .warning(paste0("Cluster component ", paste(tmp, collapse = ','), + " not used when Autosubmit is the workflow manager.")) + cluster[[tmp]] <- NULL + } + default_cluster[names(cluster)] <- cluster + cluster <- default_cluster + + ### queue_host + support_hpcs <- c('local', 'nord3') # names in platforms.yml + if (is.null(cluster$queue_host) || !cluster$queue_host %in% support_hpcs) { + stop("Cluster component 'queue_host' must be one of the follows: ", + paste(support_hpcs, collapse = ','), '.') + } + + ### data_dir + is_data_dir_shared <- FALSE + if (is.null(cluster[['data_dir']])) { + is_data_dir_shared <- TRUE + } else { + if (!is.character(cluster[['data_dir']])) { + stop("The component 'data_dir' of the parameter 'cluster' must be a character string.") + } + remote_data_dir <- cluster[['data_dir']] + } + ### lib_dir + if (!is.null(cluster[['lib_dir']])) { + if (!is.character(cluster[['lib_dir']])) { + stop("The component 'lib_dir', of the parameter 'cluster' must be NULL or ", + "a character string.") + } + } + ### init_commands + if (!is.list(cluster[['init_commands']]) || + !all(sapply(cluster[['init_commands']], is.character))) { + stop("The component 'init_commands' of the parameter 'cluster' must be a list of ", + "character strings.") + } + ### r_module + if (!is.character(cluster[['r_module']])) { + stop("The component 'r_module' of the parameter 'cluster' must be a character string.") + } + if ((nchar(cluster[['r_module']]) < 1) || (grepl(' ', cluster[['r_module']]))) { + stop("The component 'r_module' of the parameter 'cluster' must have at least one character ", + "and contain no blank spaces.") + } + ### CDO_module + if (!is.null(cluster[['CDO_module']])) { + if (!is.character(cluster[['CDO_module']])) { + stop("The component 'CDO_module' of the parameter 'cluster' must be a character string.") + } + if (nchar(cluster[['CDO_module']]) < 1 || grepl(' ', cluster[['CDO_module']])) { + warning("The component 'CDO_module' of parameter 'cluster' must have ", + " than 1 and only the first element will be used.") + } + cluster[['r_module']] <- paste(cluster[['r_module']], cluster[['CDO_module']]) + } + ### autosubmit_module + if (!is.character(cluster[['autosubmit_module']])) { + stop("The component 'autosubmit_module' of the parameter 'cluster' must be a character string.") + } + ### cores_per_job + if (is.null(cluster[['cores_per_job']])) { + cluster[['cores_per_job']] <- threads_compute + } + if (!is.numeric(cluster[['cores_per_job']])) { + stop("The component 'cores_per_job' of the parameter 'cluster' must be numeric.") + } + cluster[['cores_per_job']] <- round(cluster[['cores_per_job']]) +# NOTE: Why do we have this condition? +# if (cluster[['cores_per_job']] > threads_compute) { +# .message("WARNING: 'threads_compute' should be >= cluster[['cores_per_job']].") +# } + ### job_wallclock + tmp <- strsplit( '01:00:00', ':')[[1]] + if (!length(tmp) %in% c(2, 3) | any(!grepl("^[0-9]+$", tmp)) | any(nchar(tmp) != 2)) { + stop("The compoment 'job_wallclock' should be the format of HH:MM or HH:MM:SS.") + } + ### max_jobs + if (!is.numeric(cluster[['max_jobs']])) { + stop("The component 'max_jobs' of the parameter 'cluster' must be numeric.") + } + cluster[['max_jobs']] <- round(cluster[['max_jobs']]) + ### extra_queue_params + if (!is.list(cluster[['extra_queue_params']]) || + !all(sapply(cluster[['extra_queue_params']], is.character))) { + stop("The component 'extra_queue_params' of the parameter 'cluster' must be a list of ", + "character strings.") + } + ### polling_period + if (!is.numeric(cluster[['polling_period']])) { + stop("The component 'polling_period' of the parameter 'cluster' must be numeric.") + } + cluster[['polling_period']] <- round(cluster[['polling_period']]) + ### special_setup + if (!(cluster[['special_setup']] %in% c('none', 'marenostrum4'))) { + stop("The value provided for the component 'special_setup' of the parameter ", + "'cluster' is not recognized.") + } + ### expid + as_module <- cluster[['autosubmit_module']] + if (is.null(cluster[['expid']])) { + text <- system( + paste0("module load ", as_module, "; ", + "autosubmit expid -H local -d 'startR computation'"), + intern = T) + cluster[['expid']] <- strsplit( + text[grep("The new experiment", text)], + "\"")[[1]][2] + message(paste0("ATTENTION: The new experiment '", cluster[['expid']], + "' is created. Please note it down.")) + } else { + if (!is.character(cluster[['expid']]) | length(cluster[['expid']]) != 1) { + stop("The component 'expid' of the parameter 'cluster' must be a character string.") + } + if (!dir.exists(file.path("/esarchive/autosubmit", cluster[['expid']]))) { + stop("Cluster component 'expid' is not found under /esarchive/autosubmit/.") + } + } + suite_id <- cluster[['expid']] + + ### hpc_user + if (!is.null(cluster$hpc_user) && (!is.character(cluster$hpc_user) | length(cluster$hpc_user) != 1)) { + stop("Cluster component 'hpc_user' must be a character string.") + } + ### run_dir + if (!is.null(cluster$run_dir)) { + if (!dir.exists(cluster$run_dir)) { + stop("Cluster component 'run_dir' ", cluster$run_dir," is not found.") + } + } + +#============================================== + + autosubmit_suite_dir_suite <- paste0(autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') + if (!dir.exists(autosubmit_suite_dir_suite)) { + dir.create(autosubmit_suite_dir_suite, recursive = TRUE) + } + if (!dir.exists(autosubmit_suite_dir_suite)) { + stop("Could not find or create the directory in parameter 'autosubmit_suite_dir'.") + } + + remote_autosubmit_suite_dir <- file.path("/esarchive/autosubmit/", suite_id, 'proj') + remote_autosubmit_suite_dir_suite <- paste0(remote_autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') + + + # Work out chunked dimensions and target dimensions + all_dims <- lapply(cube_headers, attr, 'Dimensions') + all_dims_merged <- NULL + for (i in all_dims) { + if (is.null(all_dims_merged)) { + all_dims_merged <- i + } else { + all_dims_merged <- .MergeArrayDims(all_dims_merged, i)[[3]] + } + } + all_dimnames <- names(all_dims_merged) + + target_dims_indices <- which(all_dimnames %in% unlist(attr(step_fun, 'TargetDims'))) + target_dims <- NULL + if (length(target_dims_indices) > 0) { + target_dims <- all_dimnames[target_dims_indices] + } + + chunked_dims <- all_dimnames + if (length(target_dims_indices) > 0) { + chunked_dims <- chunked_dims[-target_dims_indices] + } + if (length(chunked_dims) < 1) { + stop("Not possible to process input by chunks. All input dimensions are ", + "target dimensions.") + } + + # Check all input headers have matching dimensions + cube_index <- 1 + for (cube_header in cube_headers) { + + # Check if all the margin dims are consistent among datasets + if (!all(chunked_dims %in% names(attr(cube_header, "Dimensions")))) { + trouble_dim_name <- chunked_dims[which(!chunked_dims %in% + names(attr(cube_header, "Dimensions")))] + stop(paste0("Found margin dimension, ", toString(trouble_dim_name), + ", is not in input data ", cube_index, ".")) + } + + # Only check margin dimensions (i.e., chunked_dims) + if (!all(attr(cube_header, 'Dimensions')[chunked_dims] == all_dims_merged[names(attr(cube_header, 'Dimensions'))][chunked_dims])) { + stop("All provided 'cube_headers' must have matching dimension lengths ", + "with each other.") + } + if (!all(attr(step_fun, 'TargetDims')[[cube_index]] %in% names(attr(cube_header, 'Dimensions')))) { + stop("All provided 'cube_headers' must contain at least the target dimensions ", + "expected by 'step_fun'.") + } + cube_index <- cube_index + 1 + # work out expected result dimensions + } + + # Check chunks + default_chunks <- as.list(rep(1, length(chunked_dims))) + names(default_chunks) <- chunked_dims + if (length(chunks) == 1 && chunks == 'auto') { + chunks <- default_chunks + } + if (!is.list(chunks)) { + stop("Parameter 'chunks' must be a named list or 'auto'.") + } + if (is.null(names(chunks))) { + stop("Parameter 'chunks' must be a named list or 'auto'.") + } + if (any(!(names(chunks) %in% chunked_dims))) { + stop("All names in parameter 'chunks' must be one of the non-target dimensions ", + "present in the cubes in 'cube_headers'. The target dimensions are ", + paste(paste0("'", target_dims, "'"), collapse = ', '), ". The non-target ", + "dimensions (margins) are ", paste(paste0("'", chunked_dims, "'"), collapse = ', '), ".") + } + if (any(!(((unlist(chunks) %% 1) == 0) | (unlist(chunks) == 'all')))) { + stop("All values in parameter 'chunks' must take a numeric value or 'all'.") + } + if (any(unlist(chunks) < 1)) { + stop("All values in parameter 'chunks' must be >= 1.") + } + for (chunk_spec in 1:length(chunks)) { + if (chunks[[chunk_spec]] > all_dims_merged[names(chunks)[chunk_spec]]) { + stop("Too many chunks requested for the dimension ", names(chunks)[chunk_spec], + ". Maximum allowed is ", all_dims_merged[names(chunks)[chunk_spec]]) + } + } + default_chunks[names(chunks)] <- chunks + #NOTE: chunks here has all the margin dims, not only the chunked ones + chunks <- default_chunks + timings[['nchunks']] <- prod(unlist(chunks)) + + # Replace 'all's + chunks_all <- which(unlist(chunks) == 'all') + if (length(chunks_all) > 0) { + chunks[chunks_all] <- all_dims[names(chunks)[chunks_all]] + } + + # Copy load_process_save_chunk_autosubmit.R into local folder + chunk_script <- file(system.file('chunking/Autosubmit/load_process_save_chunk_autosubmit.R', + package = 'startR')) + chunk_script_lines <- readLines(chunk_script) + close(chunk_script) + chunk_script_lines <- gsub('^lib_dir <- *', paste0('lib_dir <- ', + paste(deparse(cluster[['lib_dir']]), collapse = '\n')), + chunk_script_lines) + #TODO: Change out_dir to somewhere else like expid/outputs/ + chunk_script_lines <- gsub('^out_dir <- *', paste0('out_dir <- ', + paste(deparse(remote_autosubmit_suite_dir_suite), collapse = '\n')), chunk_script_lines) + chunk_script_lines <- gsub('^debug <- *', paste0('debug <- ', paste(deparse(debug), collapse = '\n')), + chunk_script_lines) + deparsed_calls <- paste0('start_calls <- list(') + extra_path <- '' + if (cluster[['special_setup']] == 'marenostrum4') { + extra_path <- '/gpfs/archive/bsc32/' + } + for (cube_header in 1:length(cube_headers)) { + pattern_dim <- attr(cube_headers[[cube_header]], 'PatternDim') + bk_pattern_dim <- cube_headers[[cube_header]][[pattern_dim]] + bk_expected_files <- attr(cube_headers[[cube_header]], 'ExpectedFiles') + if (!is_data_dir_shared) { + cube_headers[[cube_header]][[pattern_dim]] <- paste0(remote_data_dir, '/', + extra_path, '/', cube_headers[[cube_header]][[pattern_dim]]) + for (file_n in 1:length(bk_expected_files)) { + attr(cube_headers[[cube_header]], 'ExpectedFiles')[file_n] <- paste0(remote_data_dir, '/', + extra_path, '/', attr(cube_headers[[cube_header]], 'ExpectedFiles')[file_n]) + } + } + deparsed_calls <- paste0(deparsed_calls, '\nquote(', + paste(deparse(cube_headers[[cube_header]]), collapse = '\n'), + ')') + cube_headers[[cube_header]][[pattern_dim]] <- bk_pattern_dim + attr(cube_headers[[cube_header]], 'ExpectedFiles') <- bk_expected_files + if (cube_header < length(cube_headers)) { + deparsed_calls <- paste0(deparsed_calls, ', ') + } + } + deparsed_calls <- paste0(deparsed_calls, '\n)') + chunk_script_lines <- gsub('^start_calls <- *', deparsed_calls, chunk_script_lines) + chunk_script_lines <- gsub('^start_calls_attrs <- *', + paste0('start_calls_attrs <- ', + paste(deparse(lapply(cube_headers, attributes)), collapse = '\n')), + chunk_script_lines) + chunk_script_lines <- gsub('^param_dimnames <- *', + paste0('param_dimnames <- ', + paste(deparse(chunked_dims), collapse = '\n')), + chunk_script_lines) + chunk_script_lines <- gsub('^threads_load <- *', paste0('threads_load <- ', threads_load), + chunk_script_lines) + chunk_script_lines <- gsub('^threads_compute <- *', paste0('threads_compute <- ', threads_compute), + chunk_script_lines) + chunk_script_lines <- gsub('^fun <- *', paste0('fun <- ', paste(deparse(step_fun), collapse = '\n')), + chunk_script_lines) + chunk_script_lines <- gsub('^params <- *', paste0('params <- ', paste(deparse(list(...)), collapse = '\n')), + chunk_script_lines) + writeLines(chunk_script_lines, paste0(autosubmit_suite_dir_suite, '/load_process_save_chunk_autosubmit.R')) + + # Write and copy startR_autosubmit.sh into local folder + write_autosubmit_bash(chunks, cluster, autosubmit_suite_dir = autosubmit_suite_dir) + + # Modify conf files from template and rewrite to /esarchive/autosubmit/expid/conf/ + write_autosubmit_confs(chunks, cluster, autosubmit_suite_dir) + + # Iterate through chunks + chunk_array <- array(1:prod(unlist(chunks)), dim = (unlist(chunks))) + arrays_of_results <- vector('list', length(attr(step_fun, 'OutputDims'))) + names(arrays_of_results) <- names(attr(step_fun, 'OutputDims')) + for (component in 1:length(arrays_of_results)) { + arrays_of_results[[component]] <- vector('list', prod((unlist(chunks)))) + dim(arrays_of_results[[component]]) <- (unlist(chunks)) + } + found_first_result <- FALSE + for (i in 1:length(chunk_array)) { + chunk_indices <- which(chunk_array == i, arr.ind = TRUE)[1, ] + names(chunk_indices) <- names(dim(chunk_array)) + } + + + timings[['cores_per_job']] <- cluster[['cores_per_job']] + timings[['concurrent_chunks']] <- cluster[['max_jobs']] + + t_end_bychunks_setup <- Sys.time() + timings[['bychunks_setup']] <- as.numeric(difftime(t_end_bychunks_setup, + t_begin_bychunks_setup, units = 'secs')) + if (!is_data_dir_shared) { + #NOTE: Not consider this part yet + t_begin_transfer <- Sys.time() + .message("Sending involved files to the cluster file system...") + files_to_send <- NULL + #files_to_check <- NULL + for (cube_header in 1:length(cube_headers)) { + expected_files <- attr(cube_headers[[cube_header]], 'ExpectedFiles') + #files_to_check <- c(files_to_check, expected_files) + #if (cluster[['special_setup']] == 'marenostrum4') { + # expected_files <- paste0('/gpfs/archive/bsc32/', expected_files) + #} + files_to_send <- c(files_to_send, expected_files) + } + #which_files_exist <- sapply(files_to_check, file.exists) + which_files_exist <- sapply(files_to_send, file.exists) + files_to_send <- files_to_send[which_files_exist] + if (cluster[['special_setup']] == 'marenostrum4') { + file_spec <- paste(paste0("/gpfs/archive/bsc32/", + files_to_send), collapse = ' ') + system(paste0("ssh ", cluster[['queue_host']], " 'mkdir -p ", remote_data_dir, + ' ; module load transfer ; cd ', remote_autosubmit_suite_dir_suite, + ' ; dtrsync -Rrav ', '\'', file_spec, '\' "', remote_data_dir, '/"', + " ; sleep 1 ; ", + "while [[ ! $(ls dtrsync_*.out 2>/dev/null | wc -l) -ge 1 ]] ; ", + "do sleep 2 ; done", + " ; sleep 1 ; ", + 'while [[ ! $(grep "total size is" dtrsync_*.out | ', + "wc -l) -ge 1 ]] ; ", + "do sleep 5 ; done", "'")) + } else { + file_spec <- paste(files_to_send, collapse = ' :') + system(paste0("ssh ", cluster[['queue_host']], ' "mkdir -p ', + remote_data_dir, '"')) + system(paste0("rsync -Rrav '", file_spec, "' '", + cluster[['queue_host']], ":", remote_data_dir, "/'")) + } + .message("Files sent successfully.") + t_end_transfer <- Sys.time() + timings[['transfer']] <- as.numeric(difftime(t_end_transfer, t_begin_transfer, units = 'secs')) + } else { + timings[['transfer']] <- 0 + } + if (!silent) { + .message(paste0("Processing chunks... ")) + } + time_begin_first_chunk <- Sys.time() + sys_commands <- paste0("module load ", as_module, "; ", + "autosubmit create ", suite_id, " -np; ", + "autosubmit refresh ", suite_id, "; ") + if (wait) { + sys_commands <- paste0(sys_commands, "autosubmit run ", suite_id) + } else { + sys_commands <- paste0(sys_commands, "nohup autosubmit run ", suite_id, " >/dev/null 2>&1 &") # disown? + } + if (gsub('[[:digit:]]', "", Sys.getenv('HOSTNAME')) == 'bscesautosubmit') { + #NOTE: If we ssh to AS VM and run everything there, we don't need to ssh here + system(sys_commands) + + } else if (gsub("[[:digit:]]", "", Sys.getenv("HOSTNAME")) == "bscearth") { + # ssh from WS to AS VM to run exp + as_login <- paste0(Sys.getenv("USER"), '@', autosubmit_server, '.bsc.es') + sys_commands <- paste0('ssh ', as_login, ' "', sys_commands, '"') #'; exit"') + system(sys_commands) + + } else { + stop("Cannot identify host", Sys.getenv("HOSTNAME"), ". Where to run AS exp?") + } + + # Check the size of tmp/ASLOGS/jobs_failed_status.log. If it is not 0, the jobs failed. + failed_file_size <- system(paste0("du /esarchive/autosubmit/", suite_id, "/tmp/ASLOGS/jobs_failed_status.log"), intern = T) + if (substr(failed_file_size, 1, 1) != 0) { + # Remove bigmemory objects (e.g., a68h_1_1 and a68h_1_1.desc) if they exist + # If run_dir is specified, the files are under run_dir; if not, files are under proj/STARTR_CHUNKING_xxxx/ + if (!is.null(cluster[['run_dir']])) { + file.remove( + file.path(cluster[['run_dir']], + list.files(cluster[['run_dir']])[grepl(paste0("^", suite_id, "_.*"), list.files(cluster[['run_dir']]))]) + ) + } else { + file.remove( + file.path(remote_autosubmit_suite_dir_suite, + list.files(remote_autosubmit_suite_dir_suite)[grepl(paste0("^", suite_id, "_.*"), list.files(remote_autosubmit_suite_dir_suite))]) + ) + } + + stop("Some Autosubmit jobs failed. Check GUI and logs.") + } + + timings[['total']] <- t_begin_total + startr_exec <- list(cluster = cluster, workflow_manager = 'autosubmit', + suite_id = suite_id, chunks = chunks, + num_outputs = length(arrays_of_results), + autosubmit_suite_dir = autosubmit_suite_dir, #ecflow_server = ecflow_server, + timings = timings) + class(startr_exec) <- 'startR_exec' + + if (wait) { + result <- Collect(startr_exec, wait = TRUE, remove = T) + .message("Computation ended successfully.") + return(result) + + } else { + # if wait = F, return startr_exec and merge chunks in Collect(). + return(startr_exec) + } + +} diff --git a/R/ByChunks.R b/R/ByChunks_ecflow.R similarity index 93% rename from R/ByChunks.R rename to R/ByChunks_ecflow.R index 37a554c1e44d8fda4bc60a6a2a0732c54e4009b3..6292448cea2503641ec999e39ffbad2c5fb31653 100644 --- a/R/ByChunks.R +++ b/R/ByChunks_ecflow.R @@ -19,9 +19,10 @@ #'@param threads_compute An integer indicating the number of execution threads #' to use for the computation. The default value is 1. #'@param cluster A list of components that define the configuration of the -#' machine to be run on. The comoponents vary from the different machines. -#' Check \href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab} for more -#' details and examples. +#' machine to be run on. The comoponents vary from the different machines. +#' Check +#' \href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{practical guide} +#' for more examples. #' Only needed when the computation is not run locally. The default value is #' NULL. #'@param ecflow_suite_dir A character string indicating the path to a folder in @@ -51,8 +52,8 @@ #' attached as attributes to the returned list of arrays. #' #'@examples -#' # ByChunks() is internally used in Compute(), not intended to be used by -#' # users. The example just illustrates the inputs of ByChunks(). +#' # ByChunks_ecflow() is internally used in Compute(), not intended to be used +#' # by users. The example just illustrates the inputs of ByChunks_ecflow(). #' # data_path <- system.file('extdata', package = 'startR') #' # path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') #' # sdates <- c('200011', '200012') @@ -77,18 +78,18 @@ #' # output_dims = 'latitude', #' # use_libraries = c('multiApply'), #' # use_attributes = list(data = "Variables")) -#' #ByChunks(step, data) +#' #ByChunks_ecflow(step, data) #' #'@import multiApply #'@importFrom methods is #'@noRd -ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', - threads_load = 2, threads_compute = 1, - cluster = NULL, - ecflow_suite_dir = NULL, - ecflow_server = NULL, - silent = FALSE, debug = FALSE, - wait = TRUE) { +ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', + threads_load = 1, threads_compute = 1, + cluster = NULL, + ecflow_suite_dir = NULL, + ecflow_server = NULL, + silent = FALSE, debug = FALSE, + wait = TRUE) { # Build object to store profiling timings t_begin_total <- Sys.time() t_begin_bychunks_setup <- t_begin_total @@ -181,15 +182,23 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', if (is.null(names(cluster))) { stop("Parameter 'cluster' must be a named list.") } - if (any(!(names(cluster) %in% c('queue_host', 'queue_type', 'data_dir', - 'temp_dir', 'lib_dir', 'init_commands', - 'r_module', 'CDO_module', - 'ecflow_module', 'node_memory', - 'cores_per_job', 'job_wallclock', 'max_jobs', + if (any(!(names(cluster) %in% c('queue_host', 'queue_type', 'data_dir', + 'temp_dir', 'lib_dir', 'init_commands', + 'r_module', 'CDO_module', 'autosubmit_module', + 'ecflow_module', 'node_memory', + 'cores_per_job', 'job_wallclock', 'max_jobs', 'extra_queue_params', 'bidirectional', - 'polling_period', 'special_setup')))) { + 'polling_period', 'special_setup', 'expid', 'hpc_user')))) { + stop("Found invalid component names in parameter 'cluster'.") } + # Remove ecFlow components + redundant_components <- c('autosubmit_module', 'expid', 'hpc_user') + if (any(redundant_components %in% names(cluster))) { + tmp <- redundant_components[which(redundant_components %in% names(cluster))] + .warning(paste0("Cluster component ", paste(tmp, collapse = ','), " not used when ecFlow is the workflow manager.")) + cluster[[tmp]] <- NULL + } default_cluster[names(cluster)] <- cluster } localhost_name <- NULL @@ -258,8 +267,8 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', stop("The component 'CDO_module' of the parameter 'cluster' must be a character string.") } if (nchar(cluster[['CDO_module']]) < 1 || grepl(' ', cluster[['CDO_module']])) { - warning("The component 'CDO_module' of parameter 'cluster' must have ", - " than 1 and only the first element will be used.") + .warning(paste0("The component 'CDO_module' of parameter 'cluster' must have ", + " than 1 and only the first element will be used.")) } cluster[['r_module']] <- paste(cluster[['r_module']], cluster[['CDO_module']]) } @@ -428,8 +437,8 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', ". Make sure passwordless ", "access is properly set in both directions.")) - # Copy load_process_save_chunk.R into shared folder - chunk_script <- file(system.file('chunking/load_process_save_chunk.R', + # Copy load_process_save_chunk_ecflow.R into shared folder + chunk_script <- file(system.file('chunking/ecFlow/load_process_save_chunk_ecflow.R', package = 'startR')) chunk_script_lines <- readLines(chunk_script) close(chunk_script) @@ -480,10 +489,10 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', chunk_script_lines) chunk_script_lines <- gsub('^params <- *', paste0('params <- ', paste(deparse(list(...)), collapse = '\n')), chunk_script_lines) - writeLines(chunk_script_lines, paste0(ecflow_suite_dir_suite, '/load_process_save_chunk.R')) + writeLines(chunk_script_lines, paste0(ecflow_suite_dir_suite, '/load_process_save_chunk_ecflow.R')) # Copy Chunk.ecf into shared folder - chunk_ecf_script <- file(system.file('chunking/Chunk.ecf', + chunk_ecf_script <- file(system.file('chunking/ecFlow/Chunk.ecf', package = 'startR')) chunk_ecf_script_lines <- readLines(chunk_ecf_script) close(chunk_ecf_script) @@ -522,8 +531,8 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', # } else { # transfer_back_line <- '' # } - chunk_ecf_script_lines <- gsub('^Rscript load_process_save_chunk.R --args \\$task_path insert_indices', - paste0('Rscript load_process_save_chunk.R --args $task_path ', paste(ecf_vars, collapse = ' ')), + chunk_ecf_script_lines <- gsub('^Rscript load_process_save_chunk_ecflow.R --args \\$task_path insert_indices', + paste0('Rscript load_process_save_chunk_ecflow.R --args $task_path ', paste(ecf_vars, collapse = ' ')), chunk_ecf_script_lines) #chunk_ecf_script_lines <- gsub('^include_transfer_back_and_rm', transfer_back_line, chunk_ecf_script_lines) writeLines(chunk_ecf_script_lines, paste0(ecflow_suite_dir_suite, '/Chunk.ecf')) @@ -549,7 +558,7 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', # Copy queue header into shared folder #file.copy(system.file(paste0('chunking/', cluster[['queue_type']], '.h'), package = 'startR'), # ecflow_suite_dir_suite) - chunk_queue_header <- file(system.file(paste0('chunking/', cluster[['queue_type']], '.h'), package = 'startR')) + chunk_queue_header <- file(system.file(paste0('chunking/ecFlow/', cluster[['queue_type']], '.h'), package = 'startR')) chunk_queue_header_lines <- readLines(chunk_queue_header) close(chunk_queue_header) chunk_queue_header_lines <- gsub('^include_extra_queue_params', @@ -558,12 +567,10 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', writeLines(chunk_queue_header_lines, paste0(ecflow_suite_dir_suite, '/', cluster[['queue_type']], '.h')) # Copy headers - file.copy(system.file('chunking/head.h', package = 'startR'), + file.copy(system.file('chunking/ecFlow/head.h', package = 'startR'), ecflow_suite_dir_suite) - file.copy(system.file('chunking/tail.h', package = 'startR'), + file.copy(system.file('chunking/ecFlow/tail.h', package = 'startR'), ecflow_suite_dir_suite) - #file.copy(system.file('chunking/clean_devshm.sh', package = 'startR'), - # ecflow_suite_dir_suite) } add_line <- function(suite, line, tabs) { @@ -882,7 +889,8 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', ecflow_server[['port']])) timings[['total']] <- t_begin_total - startr_exec <- list(cluster = cluster, ecflow_server = ecflow_server, + startr_exec <- list(cluster = cluster, ecflow_server = ecflow_server, + workflow_manager = 'ecFlow', suite_id = suite_id, chunks = chunks, num_outputs = length(arrays_of_results), ecflow_suite_dir = ecflow_suite_dir, @@ -1000,3 +1008,15 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', } #TODO: check result dimensions match expected dimensions } + +ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', + threads_load = 2, threads_compute = 1, + cluster = NULL, + ecflow_suite_dir = NULL, + ecflow_server = NULL, + silent = FALSE, debug = FALSE, + wait = TRUE) { + + stop(.Deprecated("ByChunks_ecflow")) +} + diff --git a/R/Collect.R b/R/Collect.R index 4c80b037d4b91101fc0e90fc6c5c3b1f09b5d624..6d752f5fc907d6f6fbe84d3aa015ccbf8f4afc07 100644 --- a/R/Collect.R +++ b/R/Collect.R @@ -1,11 +1,11 @@ #'Collect and merge the computation results #' #'The final step of the startR workflow after the data operation. It is used when -#'the parameter 'wait' of Compute() is FALSE, and the functionality includes -#'updating the job status shown on the EC-Flow GUI and collecting all the chunks -#'of results as one data array when the execution is done. See more details on -#'\href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab}. -#' +#'the parameter 'wait' of Compute() is FALSE. It combines all the chunks of the +#'results as one data array when the execution is done. See more details on +#'\href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{practical guide}. +#'Collect() calls Collect_ecflow() or Collect_autosubmit() according to the +#'chosen workflow manager. #'@param startr_exec An R object returned by Compute() when the parameter 'wait' #' of Compute() is FALSE. It can be directly from a Compute() call or read from #' the RDS file. @@ -13,14 +13,15 @@ #' Collect() call to finish (TRUE) or not (FALSE). If TRUE, it will be a #' blocking call, in which Collect() will retrieve information from the HPC, #' including signals and outputs, each polling_period seconds. The the status -#' can be monitored on the EC-Flow GUI. Collect() will not return until the -#' results of all chunks have been received. If FALSE, Collect() will crash with -#' an error if the execution has not finished yet, otherwise it will return the -#' merged array. The default value is TRUE. -#'@param remove A logical value deciding whether to remove of all data results -#' received from the HPC (and stored under 'ecflow_suite_dir', the parameter in -#' Compute()) after being collected. To preserve the data and Collect() it as -#' many times as desired, set remove to FALSE. The default value is TRUE. +#' can be monitored on the workflow manager GUI. Collect() will not return +#' until the results of all the chunks have been received. If FALSE, Collect() +#' return an error if the execution has not finished, otherwise it will return +#' the merged array. The default value is TRUE. +#'@param remove A logical value deciding whether to remove of all chunk results +#' received from the HPC after data being collected, as well as the local job +#' folder under 'ecflow_suite_dir' or 'autosubmit_suite_dir'. To preserve the +#' data and Collect() them as many times as desired, set remove to FALSE. The +#' default value is TRUE. #'@return A list of merged data array. #' #'@examples @@ -72,10 +73,34 @@ #' #'@export Collect <- function(startr_exec, wait = TRUE, remove = TRUE) { + + # Parameter checks if (!is(startr_exec, 'startR_exec')) { stop("Parameter 'startr_exec' must be an object of the class ", - "'startR_exec', as returned by Collect(..., wait = FALSE).") + "'startR_exec', as returned by Compute(..., wait = FALSE).") + } + if (!tolower(startr_exec$workflow_manager) %in% c('ecflow', 'autosubmit')) { + stop("Cannot identify the workflow manager. Check the value of 'startr_exec$workflow_manager', which should be 'ecFlow' or 'Autosubmit'.") + } + if (!is.logical(wait)) { + stop("Parameter 'wait' must be logical.") + } + if (!is.logical(remove)) { + stop("Parameter 'remove' must be logical.") + } + + if (tolower(startr_exec$workflow_manager) == 'ecflow') { + res <- Collect_ecflow(startr_exec, wait = wait, remove = remove) + } else if (tolower(startr_exec$workflow_manager) == 'autosubmit') { + res <- Collect_autosubmit(startr_exec, wait = wait, remove = remove) } + + return(res) +} + + +Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) { + if (Sys.which('ecflow_client') == '') { stop("ecFlow must be installed in order to collect results from a ", "Compute() execution.") @@ -346,3 +371,59 @@ Collect <- function(startr_exec, wait = TRUE, remove = TRUE) { attr(result, 'startR_compute_profiling') <- timings result } + + + +Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE) { + + suite_id <- startr_exec[['suite_id']] + chunks <- startr_exec[['chunks']] + num_outputs <- startr_exec[['num_outputs']] + autosubmit_suite_dir <- startr_exec[['autosubmit_suite_dir']] + autosubmit_suite_dir_suite <- paste0(autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') + remote_autosubmit_suite_dir <- file.path("/esarchive/autosubmit/", suite_id, 'proj') + remote_autosubmit_suite_dir_suite <- paste0(remote_autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') + run_dir <- startr_exec$cluster[['run_dir']] + + done <- FALSE + sum_received_chunks <- sum(grepl('.*\\.Rds$', list.files(remote_autosubmit_suite_dir_suite))) + + while (!done) { # If wait, try until it is done + if (sum_received_chunks / num_outputs == prod(unlist(chunks))) { + done <- TRUE + + } else if (!wait) { + stop("Computation in progress...") + } else { + Sys.sleep(startr_exec$cluster[['polling_period']]) + message("Computation in progress, ", sum_received_chunks, " of ", prod(unlist(chunks)), " chunks are done...\n", + "Check status on Autosubmit GUI: https://earth.bsc.es/autosubmitapp/experiment/", suite_id) +# Sys.sleep(min(sqrt(attempt), 5)) + } + + } # while !done + + result <- .MergeChunks(remote_autosubmit_suite_dir, suite_id, remove = remove) + if (remove) { + .warning("ATTENTION: The source chunks will be removed from the ", + "system. Store the result after Collect() ends if needed.") + unlink(paste0(autosubmit_suite_dir_suite), + recursive = TRUE) + } + + # Remove bigmemory objects (e.g., a68h_1_1_1_1_1 and a68h_1_1_1_1_1.desc) + # If run_dir is specified, the files are under run_dir; if not, files are under proj/STARTR_CHUNKING_xxxx/ + if (!is.null(run_dir)) { + file.remove( + file.path(run_dir, + list.files(run_dir)[grepl(paste0("^", suite_id, "_.*"), list.files(run_dir))]) + ) + } else { + file.remove( + file.path(remote_autosubmit_suite_dir_suite, + list.files(remote_autosubmit_suite_dir_suite)[grepl(paste0("^", suite_id, "_.*"), list.files(remote_autosubmit_suite_dir_suite))]) + ) + } + + return(result) +} diff --git a/R/Compute.R b/R/Compute.R index 1450b0157b4d5480a8c077c09742f7b02e0e4f12..5a58abd9467bf91c1cea057d13b1bbada0afc1ba 100644 --- a/R/Compute.R +++ b/R/Compute.R @@ -19,15 +19,17 @@ #' those not required as the target dimension in function Step(). The default #' value is 'auto', which lists all the non-target dimensions and each one has #' one chunk. -#'@param threads_load An integer indicating the number of execution threads to -#' use for the data retrieval stage. The default value is 1. -#'@param threads_compute An integer indicating the number of execution threads -#' to use for the computation. The default value is 1. +#'@param threads_load An integer indicating the number of parallel execution +#' cores to use for the data retrieval stage. The default value is 1. +#'@param threads_compute An integer indicating the number of parallel execution +#' cores to use for the computation. The default value is 1. #'@param cluster A list of components that define the configuration of the #' machine to be run on. The comoponents vary from the different machines. #' Check \href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{Practical guide on GitLab} for more #' details and examples. Only needed when the computation is not run locally. #' The default value is NULL. +#'@param workflow_manager Can be NULL, 'ecFlow' or 'Autosubmit'. The default is +#' 'ecFlow'. #'@param ecflow_suite_dir A character string indicating the path to a folder in #' the local workstation where to store temporary files generated for the #' automatic management of the workflow. Only needed when the execution is run @@ -35,7 +37,15 @@ #'@param ecflow_server A named vector indicating the host and port of the #' EC-Flow server. The vector form should be #' \code{c(host = 'hostname', port = port_number)}. Only needed when the -#' execution is run#' remotely. The default value is NULL. +#' execution is run remotely. The default value is NULL. +#'@param autosubmit_suite_dir A character string indicating the path to a folder +#' where to store temporary files generated for the automatic management of the +#' workflow manager. This path should be available in local workstation as well +#' as autosubmit machine. The default value is NULL, and a temporary folder +#' under the current working folder will be created. +#'@param autosubmit_server A character vector indicating the login node of the +#' autosubmit machine. It can be "bscesautosubmit01" or "bscesautosubmit02". +#' The default value is NULL, and the node will be randomly chosen. #'@param silent A logical value deciding whether to print the computation #' progress (FALSE) on the R session or not (TRUE). It only works when the #' execution runs locally or the parameter 'wait' is TRUE. The default value @@ -84,11 +94,11 @@ #' #'@importFrom methods is #'@export -Compute <- function(workflow, chunks = 'auto', - threads_load = 1, threads_compute = 1, - cluster = NULL, ecflow_suite_dir = NULL, - ecflow_server = NULL, silent = FALSE, debug = FALSE, - wait = TRUE) { +Compute <- function(workflow, chunks = 'auto', workflow_manager = 'ecFlow', + threads_load = 1, threads_compute = 1, + cluster = NULL, ecflow_suite_dir = NULL, ecflow_server = NULL, + autosubmit_suite_dir = NULL, autosubmit_server = NULL, + silent = FALSE, debug = FALSE, wait = TRUE) { # Check workflow if (!is(workflow, 'startR_cube') & !is(workflow, 'startR_workflow')) { stop("Parameter 'workflow' must be an object of class 'startR_cube' as ", @@ -144,16 +154,42 @@ Compute <- function(workflow, chunks = 'auto', if (!all(sapply(workflow$inputs, class) == 'startR_cube')) { stop("Workflows with only one step supported by now.") } - # Run ByChunks with the combined operation - res <- ByChunks(step_fun = operation, - cube_headers = workflow$inputs, - chunks = chunks, - threads_load = threads_load, - threads_compute = threads_compute, - cluster = cluster, - ecflow_suite_dir = ecflow_suite_dir, - ecflow_server = ecflow_server, - silent = silent, debug = debug, wait = wait) + + # Run ByChunks with the chosen operation + if (!is.null(cluster)) { + if (is.null(workflow_manager)) { + stop("Specify parameter 'workflow_manager' as 'ecFlow' or 'Autosubmit'.") + } else if (!tolower(workflow_manager) %in% c('ecflow', 'autosubmit')) { + stop("Parameter 'workflow_manager' can only be 'ecFlow' or 'Autosubmit'.") + } + } else { # run locally + workflow_manager <- 'ecflow' + } + + if (tolower(workflow_manager) == 'ecflow') { + # ecFlow or run locally + res <- ByChunks_ecflow(step_fun = operation, + cube_headers = workflow$inputs, + chunks = chunks, + threads_load = threads_load, + threads_compute = threads_compute, + cluster = cluster, + ecflow_suite_dir = ecflow_suite_dir, + ecflow_server = ecflow_server, + silent = silent, debug = debug, wait = wait) + } else { + res <- ByChunks_autosubmit(step_fun = operation, + cube_headers = workflow$inputs, + chunks = chunks, + threads_load = threads_load, + threads_compute = threads_compute, + cluster = cluster, + autosubmit_suite_dir = autosubmit_suite_dir, + autosubmit_server = autosubmit_server, + silent = silent, debug = debug, wait = wait) + + } + # TODO: carry out remaining steps locally, using multiApply # Return results res diff --git a/R/SelectorChecker.R b/R/SelectorChecker.R index 7b69a8b8b30b29100b0e0e02687cfaadd7deb144..92e1d1b93063349141feae9ecece02f6bf0ab03b 100644 --- a/R/SelectorChecker.R +++ b/R/SelectorChecker.R @@ -50,7 +50,7 @@ SelectorChecker <- function(selectors, var = NULL, return_indices = TRUE, crescent_selectors <- TRUE if (all(sapply(selectors, function(x) { - any(c('numeric', "POSIXct", "POSIXlt", "POSIXt", "Date") %in% class(x)) + any(c("numeric", "integer", "POSIXct", "POSIXlt", "POSIXt", "Date") %in% class(x)) }))) { if (selectors[[2]] < selectors[[1]]) { crescent_selectors <- FALSE diff --git a/R/Start.R b/R/Start.R index 702b77633cfda496eedf865bce1fb5b75b849c5a..b0ad40d2c81114ce21ea635471c3d02612ca7c2a 100644 --- a/R/Start.R +++ b/R/Start.R @@ -704,8 +704,8 @@ #'@param num_procs An integer of number of processes to be created for the #' parallel execution of the retrieval/transformation/arrangement of the #' multiple involved files in a call to Start(). If set to NULL, -#' takes the number of available cores (as detected by detectCores() in -#' the package 'future'). The default value is 1 (no parallel execution). +#' takes the number of available cores (as detected by future::availableCores). +#' The default value is 1 (no parallel execution). #'@param ObjectBigmemory a character string to be included as part of the #' bigmemory object name. This parameter is thought to be used internally by the #' chunking capabilities of startR. @@ -4108,7 +4108,90 @@ Start <- function(..., # dim = indices/selectors, } } } + # Retrieve variable metadata + # Compare array_of_metadata_flags with array_of_files_to_load to know which files to take for metadata + if (!is.null(metadata_dims)) { + array_of_metadata_flags <- array(FALSE, dim = dim(array_of_files_to_load)) + metadata_indices_to_load <- as.list(rep(1, length(dim(array_of_files_to_load)))) + names(metadata_indices_to_load) <- names(dim(array_of_files_to_load)) + metadata_indices_to_load[metadata_dims] <- as.list(rep(TRUE, length(metadata_dims))) + array_of_metadata_flags <- do.call('[<-', c(list(array_of_metadata_flags), metadata_indices_to_load, + list(value = rep(TRUE, prod(dim(array_of_files_to_load)[metadata_dims]))))) + + if (tail(names(dim(array_of_files_to_load)), 1) != found_pattern_dim) { + tmp1 <- s2dv::Reorder(array_of_files_to_load, c(2:length(dim(array_of_files_to_load)), 1)) + tmp2 <- s2dv::Reorder(array_of_metadata_flags, c(2:length(dim(array_of_metadata_flags)), 1)) + files_for_metadata <- tmp1[tmp2] + } else { + files_for_metadata <- array_of_files_to_load[array_of_metadata_flags] + } + # Get variable name + #NOTE: This part probably will fail when one netCDF file has more than one variable. + if (found_pattern_dim %in% metadata_dims) { # metadata_dims has "dat" + if (any(metadata_dims %in% c('var', 'variable'))) { # metadata_dim is c('dat', 'var') + how_many_vars <- length(dat[[1]][['selectors']]$var[[1]]) + } else if (length(metadata_dims) > 1) { # metadata_dims is c('dat', xxx) + how_many_vars <- length(dat[[1]][['selectors']][[metadata_dims[which(found_pattern_dim != metadata_dims)]]][[1]]) + } else { # metadata_dims is 'dat' + how_many_vars <- 1 + } + tmp_var <- matrix(NA, how_many_vars, length(dat)) + for (i_dat in 1:dim(array_of_metadata_flags)[found_pattern_dim]) { + if (any(metadata_dims %in% c('var', 'variable'))) { # metadata_dims has "var" + tmp_var[, i_dat] <- dat[[i_dat]][['selectors']]$var[[1]] + } else if (length(metadata_dims) > 1) { # metadata_dims is c('dat', xxx) + tmp_var[, i_dat] <- rep(dat[[i_dat]][['selectors']]$var[[1]][1], + length(dat[[1]][['selectors']][[metadata_dims[which(found_pattern_dim != metadata_dims)]]][[1]])) + } else { # metadata_dims is 'dat' + tmp_var[, i_dat] <- dat[[i_dat]][['selectors']]$var[[1]][1] + } + } + + # if metadat_dims = c('dat', 'var') and [dat = 2, var = 2], tmp_var has length 4, like c('tas', 'tos', 'tas', 'tos'). + # if metadata_dims = 'dat' and [dat = 2], tmp_var has length 2 like c('tas', 'tos'). + tmp_var <- c(tmp_var) + + } else { # metadata_dims doesn't have "dat" + if (any(metadata_dims %in% c('var', 'variable'))) { # metadata_dims has "var" + tmp_var <- dat[[1]][['selectors']]$var[[1]] + } else { + tmp_var <- rep(dat[[1]][['selectors']]$var[[1]][1], length(dat[[1]][['selectors']][[metadata_dims]][[1]])) + } + # if metadata_dims = 'var' and [var = 2], tmp_var has length 2 like c('tas', 'tos') + # if metadata_dims = 'table' and [table = 2], tmp_var has length 1 like 'tas' + } + + loaded_metadata <- vector('list', length = length(files_for_metadata)) + for (i_file in 1:length(files_for_metadata)) { + #NOTE: Not use ncatt_get() because it only gets the attr shown with ncdump -h + tmp <- file_opener(files_for_metadata[i_file]) + if (!is.null(tmp)) { # if file exists + loaded_metadata[[i_file]][[1]] <- tmp$var[[tmp_var[i_file]]] + names(loaded_metadata[[i_file]]) <- tmp_var[i_file] + file_closer(tmp) + } + } + # Find loaded_metadata_files identical as "retrieve = T" case. If dataset_has_files is F, deduct that dataset from counting + ind_loaded_metadata_has_values <- which(!sapply(loaded_metadata, is.null)) # c(1, 2, 4) + if (!all(dataset_has_files)) { # If dataset_has_files has F, deduct that dataset from counting + if (found_pattern_dim %in% metadata_dims) { # metadata_dims has "dat" + dataset_has_files_expand <- rep(dataset_has_files, each = how_many_vars) + i_ind <- 1 + while (i_ind <= length(ind_loaded_metadata_has_values)) { # 3, 4, 8 + if (ind_loaded_metadata_has_values[i_ind] > i_ind) { + ind_loaded_metadata_has_values[i_ind] <- ind_loaded_metadata_has_values[i_ind] - length(which(!dataset_has_files_expand[1:dataset_has_files_expand[i_ind]])) + } + i_ind <- i_ind + 1 + } + } + } + loaded_metadata_files <- as.character(ind_loaded_metadata_has_values) + loaded_metadata <- loaded_metadata[which(!sapply(loaded_metadata, is.null))] + return_metadata <- create_metadata_list(array_of_metadata_flags, metadata_dims, pattern_dims, + loaded_metadata_files, loaded_metadata, dat_names, + dataset_has_files) + } } # Print the warnings from transform if (!is.null(c(warnings1, warnings2, warnings3))) { @@ -4150,27 +4233,28 @@ Start <- function(..., # dim = indices/selectors, file_selectors[[dat[[i]][['name']]]] <- dat[[i]][['selectors']][which(names(dat[[i]][['selectors']]) %in% found_file_dims[[i]])] } + # Prepare attr Variables + if (all(sapply(return_metadata, is.null))) { + # We don't have metadata of the variable (e.g., tas). The returned metadata list only + # contains those are specified in argument "return_vars". + Variables_list <- c(list(common = picked_common_vars), picked_vars) + .warning(paste0("Metadata cannot be retrieved. The reason may be the ", + "non-existence of the first file. Use parameter 'metadata_dims'", + " to assign to file dimensions along which to return metadata, ", + "or check the existence of the first file.")) + } else { + # Add the metadata of the variable (e.g., tas) into the list of picked_vars or + # picked_common_vars. + Variables_list <- combine_metadata_picked_vars( + return_metadata, picked_vars, picked_common_vars, + metadata_dims, pattern_dims, length(dat)) + } + if (retrieve) { if (!silent) { .message("Successfully retrieved data.") } - if (all(sapply(return_metadata, is.null))) { - # We don't have metadata of the variable (e.g., tas). The returned metadata list only - # contains those are specified in argument "return_vars". - Variables_list <- c(list(common = picked_common_vars), picked_vars) - .warning(paste0("Metadata cannot be retrieved. The reason may be the ", - "non-existence of the first file. Use parameter 'metadata_dims'", - " to assign to file dimensions along which to return metadata, ", - "or check the existence of the first file.")) - } else { - # Add the metadata of the variable (e.g., tas) into the list of picked_vars or - # picked_common_vars. - Variables_list <- combine_metadata_picked_vars( - return_metadata, picked_vars, picked_common_vars, - metadata_dims, pattern_dims, length(dat)) - } - attributes(data_array) <- c(attributes(data_array), list(Variables = Variables_list, Files = array_of_files_to_load, @@ -4200,7 +4284,7 @@ Start <- function(..., # dim = indices/selectors, start_call[['retrieve']] <- TRUE attributes(start_call) <- c(attributes(start_call), list(Dimensions = final_dims_fake, - Variables = c(list(common = picked_common_vars), picked_vars), + Variables = Variables_list, ExpectedFiles = array_of_files_to_load, FileSelectors = file_selectors, PatternDim = found_pattern_dim, @@ -4237,6 +4321,7 @@ Start <- function(..., # dim = indices/selectors, second_round_indices <- work_piece[['second_round_indices']] #print("2") file_to_open <- work_piece[['file_path']] + # Get data and metadata sub_array <- file_data_reader(file_to_open, NULL, work_piece[['file_selectors']], first_round_indices, synonims) diff --git a/R/Utils.R b/R/Utils.R index 3d4d864a31c93f80529c2730dc3436e0149a3251..e440ddeedd76a6fe1c9fa3b7f81746f611c0692b 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -869,3 +869,149 @@ val <- withCallingHandlers(expr, warning = wHandler) list(value = val, warnings = myWarnings) } + +# This function writes startR_autosubmit.sh to local startR_autosubmit folder, under expID/ +write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { + # "chunks" should be the argument "chunks" in Compute() plus the redundant margin dims, + # e.g., list(dat = 1, var = 1, sdate = 1, time = 1, lat = 2, lon = 3) + + # Loop through chunks to create load script for each + for (n_chunk in 0:(prod(unlist(chunks)) - 1)) { + + # Create chunk args + chunk_names <- names(chunks) + chunk_args <- matrix(NA, 2, length(chunks)) + chunk_args[1, ] <- paste0('%JOBS.CHUNK_', n_chunk, '.', chunk_names, '%') + chunk_args[2, ] <- paste0('%JOBS.CHUNK_', n_chunk, '.', chunk_names, '_N%') + chunk_args <- paste0('(', paste(c(chunk_args), collapse = ' '), ')') + + bash_script_template <- file(system.file('chunking/Autosubmit/startR_autosubmit.sh', + package = 'startR')) + bash_script_lines <- readLines(bash_script_template) + close(bash_script_template) + + # Rewrite chunk_args= + bash_script_lines <- gsub('^chunk_args=*', paste0('chunk_args=', chunk_args), + bash_script_lines) + # Include init commands + bash_script_lines <- gsub('^include_init_commands', + paste0(paste0(cluster[['init_commands']], collapse = '\n'), '\n'), + + bash_script_lines) + # Rewrite include_module_load + bash_script_lines <- gsub('^include_module_load', + paste0('module load ', cluster[['r_module']]), + bash_script_lines) + # Rewrite cd run_dir + # If run_dir is not specified, the script will run under ${proj_dir} + if (!is.null(cluster[['run_dir']])) { + bash_script_lines <- gsub('^cd_run_dir', + paste0('cd ', cluster[['run_dir']]), + bash_script_lines) + } else { + bash_script_lines <- gsub('^cd_run_dir', 'cd ${proj_dir}', + bash_script_lines) + } + + # Save modified .sh file under local$PROJECT_PATH in expdef.yml + #NOTE: dest_dir is ecflow_suite_dir_suite in ByChunks_autosubmit() + #NOTE: the file will be copied to proj/ by "autosubmit create" + dest_dir <- file.path(autosubmit_suite_dir, paste0("/STARTR_CHUNKING_", cluster$expid)) + + if (!file.exists(dest_dir)) { + dir.create(dest_dir, recursive = TRUE) + } + writeLines(bash_script_lines, paste0(dest_dir, '/startR_autosubmit_', n_chunk, '.sh')) + } +} + +# This function generates the .yml files under autosubmit conf/ +write_autosubmit_confs <- function(chunks, cluster, autosubmit_suite_dir) { + # "chunks" is from Compute() input, e.g., chunks <- list(lat = 2, lon = 3) + # "cluster" is the argument "cluster" in Compute(), to set machine configuration + # "autosubmit_suite_dir" should be the local folder that has R script, like ecflow_suite_dir in Compute() + + # Get config template files from package + template_dir <- system.file('chunking/Autosubmit/', package = 'startR') + config_files <- list.files(template_dir, pattern = "*\\.yml$") + + for (i_file in config_files) { + + conf <- yaml::read_yaml(file.path(template_dir, i_file)) + conf_type <- strsplit(i_file, split = "[.]")[[1]][1] + +############################################################ + if (conf_type == "autosubmit") { + + #Q: Should it be the total amount of chunk? + conf$config$MAXWAITINGJOBS <- as.integer(prod(unlist(chunks))) # total amount of chunk + #NOTE: Nord3 max. amount of queued jobs is 366 + if (conf$config$MAXWAITINGJOBS > 366) conf$config$MAXWAITINGJOBS <- 366 + conf$config$TOTALJOBS <- as.integer(cluster$max_jobs) + +############################################################ + } else if (conf_type == "expdef") { + conf$default$EXPID <- cluster$expid + conf$default$HPCARCH <- cluster$queue_host + # PROJECT_PATH should be where submit.sh and load....R stored --> local startR_autosubmit folder, under expID/ + conf$local$PROJECT_PATH <- file.path(autosubmit_suite_dir, paste0("STARTR_CHUNKING_", cluster$expid)) + +############################################################ + } else if (conf_type == "jobs") { + + chunks_vec <- lapply(lapply(chunks, seq, 1), rev) # list(lat = 1:2, lon = 1:3) + chunk_df <- expand.grid(chunks_vec) + nchunks <- nrow(chunk_df) + chunk_name <- paste0("CHUNK_", 0:(nchunks - 1)) + + # Fill in common configurations + jobs <- conf$JOBS + # wallclock from '01:00:00' to '01:00' + jobs[[1]]$WALLCLOCK <- substr(cluster$job_wallclock, 1, 5) + jobs[[1]]$PLATFORM <- cluster$queue_host + jobs[[1]]$THREADS <- as.integer(cluster$cores_per_job) + jobs[[1]][paste0(names(chunks), "_N")] <- as.integer(unlist(chunks)) + jobs[[1]][names(chunks)] <- "" + + # Create chunks and fill in info for each chunk + if (nchunks > 1) { + jobs <- c(jobs, rep(jobs, nchunks - 1)) + names(jobs) <- chunk_name + } + for (i_chunk in 1:nchunks) { + jobs[[i_chunk]][names(chunks)] <- chunk_df[i_chunk, ] + jobs[[i_chunk]]$FILE <- paste0('startR_autosubmit_', i_chunk - 1, '.sh') + } + + conf$JOBS <- jobs + +############################################################ + } else if (conf_type == "platforms") { + if (tolower(cluster$queue_host) != "local") { + conf$Platforms[[cluster$queue_host]]$USER <- cluster$hpc_user + conf$Platforms[[cluster$queue_host]]$PROCESSORS_PER_NODE <- as.integer(cluster$cores_per_job) + if (!is.null(cluster$extra_queue_params)) { + tmp <- unlist(cluster$extra_queue_params) + for (ii in 1:length(tmp)) { + tmp[ii] <- paste0('\"', tmp[ii], '\"') + } + conf$Platforms[[cluster$queue_host]]$CUSTOM_DIRECTIVES <- paste0('[ ', paste(tmp, collapse = ','), ' ]') + } + } + +############################################################ + } else { + stop("File ", i_file, " is not considered in this function.") + } + +############################################################ + # Output directory + dest_dir <- paste0("/esarchive/autosubmit/", cluster$expid, "/conf/") + dest_file <- paste0(conf_type, "_", cluster$expid, ".yml") + + # Write config file inside autosubmit dir + yaml::write_yaml(conf, paste0(dest_dir, dest_file)) + Sys.chmod(paste0(dest_dir, dest_file), mode = "755", use_umask = F) + + } # for loop each file +} diff --git a/R/zzz.R b/R/zzz.R index 22805ff6e46751fbe4c991a08f5f5bb6a38220b1..1e56e291fa6286d0f15df806c634dc962eb29ac2 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1363,7 +1363,10 @@ combine_metadata_picked_vars <- function(return_metadata, picked_vars, picked_co sublist_names <- lapply(return_metadata, names)[[kk]] if (!is.null(sublist_names)) { for (jj in 1:length(sublist_names)) { - picked_vars[[kk]][[sublist_names[jj]]] <- return_metadata[[kk]][[jj]] + if (!is.null(return_metadata[[kk]][[jj]])) { + picked_vars[[kk]] <- c(picked_vars[[kk]], list(return_metadata[[kk]][[jj]])) + names(picked_vars[[kk]])[length(picked_vars[[kk]])] <- names(return_metadata[[kk]][jj]) + } } } } diff --git a/inst/chunking/Autosubmit/autosubmit.yml b/inst/chunking/Autosubmit/autosubmit.yml new file mode 100644 index 0000000000000000000000000000000000000000..8b129a0dd71a2b6795f5548d7ba5937ce33b7970 --- /dev/null +++ b/inst/chunking/Autosubmit/autosubmit.yml @@ -0,0 +1,16 @@ +config: + AUTOSUBMIT_VERSION: 4.0.0b0 + MAXWAITINGJOBS: # Should it be the total amount of chunk? + TOTALJOBS: + SAFETYSLEEPTIME: 10 + RETRIALS: 0 +#wrappers: +# wrapper_sim: +# TYPE: "vertical" +# JOBS_IN_WRAPPER: "SIM" +mail: + NOTIFICATIONS: False + TO: +storage: + TYPE: "pkl" + COPY_REMOTE_LOGS: True diff --git a/inst/chunking/Autosubmit/expdef.yml b/inst/chunking/Autosubmit/expdef.yml new file mode 100644 index 0000000000000000000000000000000000000000..624040d8f86a319602ab9b2f7920f7d17660b1ab --- /dev/null +++ b/inst/chunking/Autosubmit/expdef.yml @@ -0,0 +1,33 @@ +default: + EXPID: #a659 + HPCARCH: #nord3v2, local +experiment: + DATELIST: 20220401 + MEMBERS: "fc0" + CHUNKSIZEUNIT: month + CHUNKSIZE: 4 + NUMCHUNKS: 2 + CHUNKINI: '' + CALENDAR: standard +project: + PROJECT_TYPE: local + PROJECT_DESTINATION: '' +git: + PROJECT_ORIGIN: '' #https://xxx + PROJECT_BRANCH: '' #master + PROJECT_COMMIT: '' + PROJECT_SUBMODULES: '' + FETCH_SINGLE_BRANCH: True +svn: + PROJECT_URL: '' + PROJECT_REVISION: '' +local: + PROJECT_PATH: #'/home/Earth/aho/startR_local_autosubmit/' +project_files: + FILE_PROJECT_CONF: '' + FILE_JOBS_CONF: '' + JOB_SCRIPTS_TYPE: '' +rerun: + RERUN: FALSE + RERUN_JOBLIST: '' +#Q: Are these all needed and correct? diff --git a/inst/chunking/Autosubmit/jobs.yml b/inst/chunking/Autosubmit/jobs.yml new file mode 100644 index 0000000000000000000000000000000000000000..3ff4d0b284834488ab71053ffb763b6d3e76b93e --- /dev/null +++ b/inst/chunking/Autosubmit/jobs.yml @@ -0,0 +1,9 @@ +JOBS: + CHUNK_0: + PLATFORM: #LOCAL + RUNNING: once + WALLCLOCK: #00:05 + THREADS: + FILE: startR_autosubmit.sh #templates/sleep_5.sh +# DIM: +# DIM_N: diff --git a/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R b/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R new file mode 100644 index 0000000000000000000000000000000000000000..8762eeb01bf10f7de2627645971395659839ee03 --- /dev/null +++ b/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R @@ -0,0 +1,136 @@ +lib_dir <- +if (!is.null(lib_dir)) { + if (!dir.exists(lib_dir)) { + stop("The specified 'lib_dir' does not exist.") + } + .libPaths(new = lib_dir) +} +library(startR) + +out_dir <- + +debug <- +start_calls <- +start_calls_attrs <- +param_dimnames <- +fun <- +params <- +threads_load <- +threads_compute <- + +task_path <- commandArgs(TRUE)[2] + +args <- as.integer(commandArgs(TRUE)[-c(1, 2)]) + +total_specified_dims <- length(args) / 2 +chunk_indices <- args[((1:total_specified_dims) - 1) * 2 + 1] +names(chunk_indices) <- param_dimnames +chunks <- as.list(args[((1:total_specified_dims) - 1) * 2 + 2]) +names(chunks) <- param_dimnames + +t_begin_load <- Sys.time() +data <- vector('list', length(start_calls)) +# Add data names if data input has names +if (!is.null(names(start_calls_attrs))) { + names(data) <- names(start_calls_attrs) +} +for (input in 1:length(data)) { + start_call <- start_calls[[input]] + call_dims <- names(start_calls_attrs[[input]][['Dimensions']]) + dims_to_alter <- which(call_dims %in% param_dimnames) + names_dims_to_alter <- call_dims[dims_to_alter] + # If any dimension comes from split dimensions + split_dims <- start_calls_attrs[[input]][['SplitDims']] + for (k in 1:length(split_dims)) { + if (any(names(split_dims[[k]]) %in% names_dims_to_alter)) { + chunks_split_dims <- rep(1, length(split_dims[[k]])) + names(chunks_split_dims) <- names(split_dims[[k]]) + chunks_indices_split_dims <- chunks_split_dims + split_dims_to_alter <- which(names(split_dims[[k]]) %in% names_dims_to_alter) + chunks_split_dims[split_dims_to_alter] <- unlist(chunks[names(split_dims[[k]])[split_dims_to_alter]]) + chunks_indices_split_dims[split_dims_to_alter] <- chunk_indices[names(split_dims[[k]])[split_dims_to_alter]] + start_call[[names(split_dims)[k]]] <- startR:::.chunk(chunks_indices_split_dims, chunks_split_dims, + eval(start_call[[names(split_dims)[k]]])) + dims_to_alter_to_remove <- which(names_dims_to_alter %in% names(split_dims[[k]])) + if (length(dims_to_alter_to_remove) > 0) { + dims_to_alter <- dims_to_alter[-dims_to_alter_to_remove] + names_dims_to_alter <- names_dims_to_alter[-dims_to_alter_to_remove] + } + } + } + if (length(dims_to_alter) > 0) { + for (call_dim in names_dims_to_alter) { + start_call[[call_dim]] <- startR:::.chunk(chunk_indices[call_dim], chunks[[call_dim]], + eval(start_call[[call_dim]])) + } + } + if (!('num_procs' %in% names(start_call))) { + start_call[['num_procs']] <- threads_load + } + # Creates a name for the temporal file using the chunks numbers: + ## ecFlow should be like "_4737920362_1_1_1_1_1_1_" + ## autosubmit should be like "a659_1_1_1_1_1_1" + + nameMemoryObject <- paste0(task_path, '_', paste(chunk_indices, collapse='_')) #task_path is EXPID actually + + start_call[['ObjectBigmemory']] <- nameMemoryObject + data[[input]] <- tryCatch(eval(start_call), + # Handler when an error occurs: + error = function(e) { + message(paste("The data cannot be loaded.")) + message("See the original error message:") + message(e) + message("\n Current files in /dev/shm:") + noreturn <- lapply(list.files("/dev/shm"), function (x) { + info <- file.info(paste0("/dev/shm/", x)) + message(paste("file:", rownames(info), + "size:", info$size, + "uname:", info$uname))}) + message(getwd()) + file.remove(nameMemoryObject) + file.remove(paste0(nameMemoryObject, ".desc")) + message(paste("Files", nameMemoryObject, "has been removed.")) + stop("The job has failed while loading data. See original error reported above.") + }) + warning(attributes(data[[input]])$ObjectBigmemory) +} +t_end_load <- Sys.time() +t_load <- as.numeric(difftime(t_end_load, t_begin_load, units = 'secs')) + +t_begin_compute <- Sys.time() +if (!is.null(attr(fun, 'UseLibraries'))) { + for (i in seq_along(attr(fun, 'UseLibraries'))) { + require(attr(fun, 'UseLibraries')[i], character.only = TRUE) + } +} +chunk_indices_apply <- setNames(as.integer(chunk_indices), names(chunk_indices)) +chunk_indices_apply <- chunk_indices_apply[names(chunks)[which(chunks > 1)]] +Apply <- multiApply::Apply +res <- do.call("Apply", + c( + list(data, + target_dims = attr(fun, 'TargetDims'), + fun = fun, + output_dims = attr(fun, 'OutputDims'), + use_attributes = attr(fun, 'UseAttributes'), + extra_info = list(chunk_indices = chunk_indices_apply), + ncores = threads_compute), + params + ) + ) +rm(data) +gc() + +for (component in names(res)) { + filename <- paste0(component, '__') + for (i in 1:total_specified_dims) { + filename <- paste0(filename, param_dimnames[i], '_', chunk_indices[i], '__') + } + # Saving in a temporary file, then renaming. This way, the polling mechanism + # won't transfer back results before the save is completed. + saveRDS(res[[component]], file = paste0(out_dir, '/', filename, '.Rds.tmp')) + file.rename(paste0(out_dir, '/', filename, '.Rds.tmp'), + paste0(out_dir, '/', filename, '.Rds')) +} +rm(res) +gc() diff --git a/inst/chunking/Autosubmit/platforms.yml b/inst/chunking/Autosubmit/platforms.yml new file mode 100644 index 0000000000000000000000000000000000000000..f8d8f70c11902875dd39a90b52c11a0080c2ef59 --- /dev/null +++ b/inst/chunking/Autosubmit/platforms.yml @@ -0,0 +1,14 @@ +Platforms: + nord3: + TYPE: SLURM + HOST: nord4.bsc.es #Q: Should we have more login nodes? + PROJECT: bsc32 + ADD_PROJECT_TO_HOST: "false" + USER: #bsc32734 + PROCESSORS_PER_NODE: #16 + SERIAL_QUEUE: debug + QUEUE: bsc_es + SCRATCH_DIR: /gpfs/scratch + CUSTOM_DIRECTIVES: # "['#SBATCH --exclusive']" "['#SBATCH --constraint=medmem']" +# MAX_WALLCLOCK: '48:00' +#Q: ARE THESE SETTING CORRECT? diff --git a/inst/chunking/Autosubmit/startR_autosubmit.sh b/inst/chunking/Autosubmit/startR_autosubmit.sh new file mode 100644 index 0000000000000000000000000000000000000000..63d9e6124b510a4b5a93ef529c71be9a3fec0264 --- /dev/null +++ b/inst/chunking/Autosubmit/startR_autosubmit.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +######## AUTOSUBMIT INPUTS ####### +proj_dir=%PROJDIR% +task_path=%DEFAULT.EXPID% +chunknum=%JOBNAME% #e.g., a68h_CHUNK_0 +chunknum="${chunknum:5}" #e.g., CHUNK_0 +################################## + +# Modified by write_bash.R +# e.g., chunk_args=(%JOBS."${chunknum}".dat% %JOBS."${chunknum}".dat_N% %JOBS."${chunknum}".var% %JOBS."${chunknum}".var_N% %JOBS."${chunknum}".sdate% %JOBS."${chunknum}".sdate_N%) +chunk_args= + +include_init_commands +include_module_load + +##Should move to the path that has load_process_save_chunk_autosubmit.R +#cd ${proj_dir} + +# move to run_dir +cd_run_dir + +#e.g., Rscript load_process_save_chunk_autosubmit.R --args $task_path 1 1 1 1 2 2 1 1 1 2 1 2 +Rscript ${proj_dir}/load_process_save_chunk_autosubmit.R --args ${task_path} ${chunk_args[@]} + diff --git a/inst/chunking/Chunk.ecf b/inst/chunking/ecFlow/Chunk.ecf similarity index 82% rename from inst/chunking/Chunk.ecf rename to inst/chunking/ecFlow/Chunk.ecf index 60bd051a657d28ef957876c28c8ab2a45686f579..5a265fb0db82b49cce435e3d04306ada59b51ac2 100644 --- a/inst/chunking/Chunk.ecf +++ b/inst/chunking/ecFlow/Chunk.ecf @@ -12,7 +12,7 @@ set -vx cd %REMOTE_ECF_HOME% task_path=%REMOTE_ECF_HOME%/%ECF_NAME% -Rscript load_process_save_chunk.R --args $task_path insert_indices +Rscript load_process_save_chunk_ecflow.R --args $task_path insert_indices #include_transfer_back_and_rm #clean temporal folder diff --git a/inst/chunking/clean_devshm.sh b/inst/chunking/ecFlow/clean_devshm.sh similarity index 100% rename from inst/chunking/clean_devshm.sh rename to inst/chunking/ecFlow/clean_devshm.sh diff --git a/inst/chunking/head.h b/inst/chunking/ecFlow/head.h similarity index 100% rename from inst/chunking/head.h rename to inst/chunking/ecFlow/head.h diff --git a/inst/chunking/load_process_save_chunk.R b/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R similarity index 98% rename from inst/chunking/load_process_save_chunk.R rename to inst/chunking/ecFlow/load_process_save_chunk_ecflow.R index b7b73a9ff027937015d3e8b4400c1080c990c43a..1bc5d6deabd43f33c4e51892812cb7547a049808 100644 --- a/inst/chunking/load_process_save_chunk.R +++ b/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R @@ -37,6 +37,10 @@ t_job_setup <- as.numeric(difftime(t_end_job_setup, t_begin_job_setup, units = ' t_begin_load <- Sys.time() data <- vector('list', length(start_calls)) +# Add data names if data input has names +if (!is.null(names(start_calls_attrs))) { + names(data) <- names(start_calls_attrs) +} for (input in 1:length(data)) { start_call <- start_calls[[input]] call_dims <- names(start_calls_attrs[[input]][['Dimensions']]) diff --git a/inst/chunking/lsf.h b/inst/chunking/ecFlow/lsf.h similarity index 100% rename from inst/chunking/lsf.h rename to inst/chunking/ecFlow/lsf.h diff --git a/inst/chunking/pbs.h b/inst/chunking/ecFlow/pbs.h similarity index 100% rename from inst/chunking/pbs.h rename to inst/chunking/ecFlow/pbs.h diff --git a/inst/chunking/slurm.h b/inst/chunking/ecFlow/slurm.h similarity index 100% rename from inst/chunking/slurm.h rename to inst/chunking/ecFlow/slurm.h diff --git a/inst/chunking/tail.h b/inst/chunking/ecFlow/tail.h similarity index 100% rename from inst/chunking/tail.h rename to inst/chunking/ecFlow/tail.h diff --git a/inst/doc/faq.md b/inst/doc/faq.md index 1508742096218a5ca6ee05840b347855afbaaf03..ffe91a514552f734178f68a46e173becbd0604e4 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -1001,13 +1001,10 @@ See [How-to-21](#21-retrieve-the-complete-data-when-the-dimension-length-varies- In the self-defined function in startR workflow, the dimensions required for the computations are used as target dimensions, and the rest can be used to chunk the data in pieces. There is one situation that some information of one dimension is needed in the function but it is not depended by the computation. In this case, we may be able to chunk through this dimension while using it in the function still. It is a saver if you have a complex case with no margin dimension left (see [How-to-25](#25-what-to-do-if-your-function-has-too-many-target-dimensions).) -You just need to define a parameter in your function 'nchunks = chunk_indices' and use it in the function. -The use case [RainFARM precipitation downscaling](https://earth.bsc.es/gitlab/es/startR/-/blob/develop-RainFARMCase/inst/doc/usecase/ex2_5_rainFARM.R) demonstrates an example that the start date dimension is used as chunking dimension, -but we use its chunk number to know the start date value of each chunk. -The first part of the function performs downscaling method, which requres longitude and latitude dimensions, so these two dimensions must be the target dimensions in the workflow. -After that, the results are saved as netCDF file following esarchive convention. We need start date value here to decide the file name. -As you can see, the sdate dimension is not required for the computation, so it is not necessary to be the target dimension. We can just use 'chunk_indices' to get the chunk number therefore get the corresponding start date value for the file name. +We have two examples: (1) [ex2_5_RainFARM precipitation downscaling](inst/doc/usecase/ex2_5_rainFARM.R) +shows how to get start date for each chunk using chunk number; (2) [ex2_14](inst/doc/usecase/ex2_14_margin_dim_indices.R) shows how to distinguish the variable in each chunk since "variable" is one of the chunking dimensions +(__NOTE: In this case, it is easier to simply use attributes to find which variable it is. Check use case for more details.__) There are many other possible applications of this parameter. Please share with us other uses cases you may create. diff --git a/inst/doc/practical_guide.md b/inst/doc/practical_guide.md index 70b29a61b9d73cb7db017c3da9ed8ebf10c15be3..b22c6292c2923f54f2119fdbb5af3898a6c9af5d 100644 --- a/inst/doc/practical_guide.md +++ b/inst/doc/practical_guide.md @@ -14,13 +14,15 @@ If you would like to start using startR rightaway on the BSC infrastructure, you 2. [**Step() and AddStep()**](#4-2-step-and-addstep) 3. [**Compute()**](#4-3-compute) 1. [**Compute() locally**](#4-3-1-compute-locally) - 2. [**Compute() on HPCs**](#4-3-2-compute-on-hpcs) + 2. [**Compute() on HPCs with ecFlow**](#4-3-2-compute-on-hpcs-with-ecflow) + 3. [**Compute() on HPCs with Autosubmit**](#4-3-3-compute-on-hpcs-with-autosubmit) 4. [**Collect() and the EC-Flow GUI**](#4-4-collect-and-the-ec-flow-gui) 5. [**Additional information**](#5-additional-information) 1. [**How to choose the number of chunks, jobs and cores**](#5-1-how-to-choose-the-number-of-chunks-jobs-and-cores) 2. [**How to clean a failed execution**](#5-2-how-to-clean-a-failed-execution) 3. [**Visualizing the profiling of the execution**](#5-3-visualizing-the-profiling-of-the-execution) 4. [**Pending features**](#5-4-pending-features) + 5. [**ecFlow server and port**](#5-5-ecflow-server-and-port) 6. [**Other examples**](#6-other-examples) 7. [**Compute() cluster templates**](#7-compute-cluster-templates) @@ -370,8 +372,18 @@ It is not possible for now to define workflows with more than one step, but this ### 4-3. Compute() Once the data sources are declared and the workflow is defined, you can proceed to specify the execution parameters (including which platform to run on) and trigger the execution with the `Compute()` function. +The execution can run locally (only on the machine where the R session is running) or on different HPCs (Nord3-v2, CTE-Power9 and other HPCs). -Next, a few examples are shown with `Compute()` calls to trigger the processing of a dataset locally (only on the machine where the R session is running) and different HPCs (Nord3-v2, CTE-Power9 and other HPCs). However, let's first define a `Start()` call that involves a smaller subset of data in order not to make the examples too heavy. +The common Compute() parameters of local and remote execution are: +- `wf`: The workflow defined by the previous steps. +- `chunks`: The dimensions to be chunked and how many chunks you want for each dimension. +startR will automatically chunk the data for you. See more details in session [#5-1](#5-1-how-to-choose-the-number-of-chunks-jobs-and-cores). +- `threads_load`: The number of parallel execution cores to be created for data retrieval stage. It is used as Start() parameter "num_procs" if it is not specified when Start() is defined. +- `threads_compute`: The number of parallel execution cores to be created for data computing. It is used as multiApply::Apply parameter "ncores". + +Using more than 2 threads for the retrieval will usually be perjudicial, since two will already be able to make full use of the bandwidth between the workstation and the data repository. The optimal number of threads for the computation will depend on the number of processors in your machine, the number of cores they have, and the number of threads supported by each of them. + +In the following example, we first define a `Start()` call that involves a smaller subset of data in order not to make the examples too heavy. ```r library(startR) @@ -525,7 +537,7 @@ dim(res$output1) 2 1 1 1 640 1296 ``` -In addition to performing the computation in chunks, you can adjust the number of execution threads to use for the data retrieval stage (with `threads_load`) and for the computation (with `threads_compute`). Using more than 2 threads for the retrieval will usually be perjudicial, since two will already be able to make full use of the bandwidth between the workstation and the data repository. The optimal number of threads for the computation will depend on the number of processors in your machine, the number of cores they have, and the number of threads supported by each of them. +Run Compute() with desired chunks and resource setting. ```r res <- Compute(wf, @@ -566,9 +578,12 @@ res <- Compute(wf, * max: 8.03660178184509 ``` -#### 4-3-2. Compute() on HPCs +#### 4-3-2. Compute() on HPCs with ecFlow -In order to run the computation on a HPC, you will need to make sure the passwordless connection with the login node of that HPC is configured, as shown at the beginning of this guide. If possible, in both directions. Also, you will need to know whether there is a shared file system between your workstation and that HPC, and will need information on the number of nodes, cores per node, threads per core, RAM memory per node, and type of workload used by that HPC (Slurm, PBS and LSF supported). +We can use workflow manager (ecFlow or Autosubmit) to dispatch computation jobs on a HPC. +To use Autosubmit, check the next session. +You will need to make sure that the passwordless connection with the login node of that HPC is configured, as shown at the beginning of this guide. +If possible, in both directions. Also, you will need to know whether there is a shared file system between your workstation and that HPC, and will need information on the number of nodes, cores per node, threads per core, RAM memory per node, and type of workload used by that HPC (Slurm, PBS and LSF supported). You will need to add two parameters to your `Compute()` call: `cluster` and `ecflow_suite_dir`. @@ -607,7 +622,7 @@ The cluster components and options are explained next: - `temp_dir`: directory on the HPC where to store temporary files. Must be accessible from the HPC login node and all HPC nodes. - `lib_dir`: directory on the HPC where the startR R package and other required R packages are installed, accessible from all HPC nodes. These installed packages must be compatible with the R module specified in `r_module`. This parameter is optional; only required when the libraries are not installed in the R module. - `r_module`: name of the UNIX environment module to be used for R. If not specified, 'module load R' will be used. -- `cores_per_job`: number of computing cores to be requested when submitting the job for each chunk to the HPC queue. Each node may be capable of supporting more than one computing thread. +- `cores_per_job`: number of computing cores to be requested when submitting the job for each chunk to the HPC queue. Each node (should be core?) may be capable of supporting more than one computing thread. - `job_wallclock`: amount of time to reserve the resources when submitting the job for each chunk. Must follow the specific format required by the specified `queue_type`. - `max_jobs`: maximum number of jobs (chunks) to be queued simultaneously onto the HPC queue. Submitting too many jobs could overload the bandwidth between the HPC nodes and the storage system, or could overload the queue system. - `extra_queue_params`: list of character strings with additional queue headers for the jobs to be submitted to the HPC. Mainly used to specify the amount of memory to book for each job (e.g. '#SBATCH --mem-per-cpu=30000'; __NOTE: this line does not work on Nord3v2__), to request special queuing (e.g. '#SBATCH --qos=bsc_es'), or to request use of specific software (e.g. '#SBATCH --reservation=test-rhel-7.5'). @@ -689,6 +704,64 @@ As mentioned above in the definition of the `cluster` parameters, it is strongly You can find the `cluster` configuration for other HPCs at the end of this guide [Compute() cluster templates](#compute-cluster-templates) +#### 4-3-3. Compute() on HPCs with Autosubmit + +To use Autosubmit as workflow manager, add the following parameters to your Compute() call: + `cluster`, `autosubmit_suite_dir`, and `autosubmit_server`. + +`autosubmit_suite_dir` is the path where to store temporary files generated for +Autosubmit to establish the workflow. It should be found in both workstation and the Autosubmit machine. + +`autosubmit_server` is the login node of the Autosubmit machine, i.e., 'bscesautosubmit01'or 'bscesautosubmit02'. + +The parameter `cluster` expects a list of components that provide the configuration of Autosubmit machine. For now, the supported platforms are 'local' (run on Autosubmit machine) and 'nord3' (Autosubmit submits jobs to Nord3). +You can see one example of cluster configuration below. + +```r + res <- Compute(wf, chunks = list(sdate = 2), + threads_compute = 4, threads_load = 2, + cluster = list( + queue_host = 'nord3', + expid = , + hpc_user = "bsc32xxx", + r_module = "R/4.1.2-foss-2019b", + CDO_module = "CDO/1.9.8-foss-2019b", + autosubmit_module = 'autosubmit/4.0.0b-foss-2015a-Python-3.7.3', + cores_per_job = 4, + job_wallclock = '01:00:00', + max_jobs = 4 + ), + workflow_manager = 'autosubmit', + autosubmit_suite_dir = "/home/Earth//startR_local_autosubmit/", + autosubmit_server = 'bscesautosubmit01', + wait = TRUE + ) +``` + +The cluster components and options are explained next: + +- `queue_host`: Must match the platform name in Autosubmit configuration file _platforms.yml_, or 'local'. The current provided platforms are: 'nord3'. +- `expid`: The Autosubmit experiment to run the computation. You can create the experiment beforehand or let startR create one for you by not specifying this componenet. +To have the good practice, note down the expid if it is automatically created by startR and re-use/delete it afterwards. + - `hpc_user`: Your user ID on the HPC (i.e., "bsc32xxx"). It is required if "queue_host" is not 'local'. +- `data_dir`: The path to the data repository if the data is not shared. +- `lib_dir`: directory on the HPC where the startR R package and other required R packages are installed, accessible from all HPC nodes. These installed packages must be compatible with the R module specified in `r_module`. This parameter is optional; only required when the libraries are not installed in the R module. +- `init_commands`: The initial commands in bash script before R script runs. For example, the modules required by computation can be loaded here. +- `r_module`: Name of the UNIX environment module to be used for R. If not specified, `module load R` will be used. +- `CDO_module`: Name of the UNIX environment module to be used for CDO. If not specified, it is NULL and no CDO module will be loaded. Make sure to assign it if `tranform` is required in Start(). +- `autosubmit_module`: The name of the Autosubmit module. If not specified, `module load autosubmit` will be used. +- `cores_per_job`: Number of computing cores to be requested when submitting the job for each chunk to the HPC queue. +It is corresponded to the parameter "THREADS" in _jobs.yml_ and "PROCESSORS_PER_NODE" in _platforms.yml_. +- `job_wallclock`: amount of time to reserve the resources when submitting the job for each chunk. Must follow the specific format required by the specified `queue_type`. +- `max_jobs`: maximum number of jobs (chunks) to be queued simultaneously onto the HPC queue. Submitting too many jobs could overload the bandwidth between the HPC nodes and the storage system, or could overload the queue system. +- `extra_queue_params`: list of character strings for additional queue headers for the jobs +to be submitted to the HPC. For example, to constraint using medmem node ('#SBATCH --constraint=medmem'); to request an exclusive mode ('#SBATCH --exclusive'). +- `polling_period`: when the connection is unidirectional, the workstation will ask the HPC login node for results each `polling_period` seconds. An excessively small value can overload the login node or result in temporary banning. +- `special_setup`: name of the machine if the computation requires an special setup. Only Marenostrum 4 needs this parameter (e.g. special_setup = 'marenostrum4'). + +After the Compute() call is executed, you can monitor the status on [Autosubmit GUI](https://earth.bsc.es/autosubmitapp/). + + ### 4-4. Collect() and the EC-Flow GUI Usually, in use cases where large data inputs are involved, it is convenient to add the parameter `wait = FALSE` to your `Compute()` call. With this parameter, `Compute()` will immediately return an object with information about your startR execution. You will be able to store this object onto disk. After doing that, you will not need to worry in case your workstation turns off in the middle of the computation. You will be able to close your R session, and collect the results later on with the `Collect()` function. @@ -722,9 +795,14 @@ module load ecFlow ecflow_ui & ``` -After doing that, a window will pop up. You will be able to monitor the status of your EC-Flow suites there. However, if it is the first time you are using the EC-Flow GUI with startR, you will need to register the EC-Flow server that has been started automatically by `Compute()`. You can open the top menu "Manage servers" > "New server" > set host to 'localhost' > set port to '5678' > save. +After doing that, a window will pop up. You will be able to monitor the status of your EC-Flow suites there. +However, if it is the first time you are using the EC-Flow GUI with startR, +you will need to register the EC-Flow server that has been started automatically by `Compute()`. +You can open the top menu "Manage servers" > "Add server" > Put a recognizable 'Name' for host > set 'Host' to your workstation (i.e., bscearthxxx) or 'localhost' > set 'Port' to '5678' > save. +See more information about ecFlow server in [#5-5](#5-5-ecflow-server-and-port). + +Note that the host and port can be adjusted with the parameter `ecflow_server` in `Compute()`, which must be provided in the form `c(host = 'hostname', port = port_number)`. _(NOTE: 'host' is not supported for now. You can use `ecflow_server = c(port = xxxx)` to change port number.)_ -Note that the host and port can be adjusted with the parameter `ecflow_server` in `Compute()`, which must be provided in the form `c(host = 'hostname', port = port_number)`. After registering the EC-Flow server, an expandable entry will appear, where you can see listed the jobs to be executed, one for each chunk, with their status represented by a colour. Gray means pending, blue means queuing, green means in progress, and yellow means completed. @@ -844,6 +922,15 @@ You can click on the image to expand it. - Adding feature in `Start()` to read sparse grid points. - Allow for chunking along "essential" (a.k.a. "target") dimensions. +### 5-5. ecFlow server and port + +You cannot start two ecFlow servers on the same machine with the same port number. +That is, the port number on one workstation cannot be shared. For example, if port '5678' on workstation 'bscearth123' is taken by user A, +user B cannot ssh to 'bscearth123' and use the port number '5678'. But user B can use a new port number and specify 'ecflow_server' in Compute(). +Or, if user B uses another workstation that has port number '5678' available, s/he can use it without problem. + +You can check the host-port you have in `~/.ecflowrc/servers`. To stop using a server, you can go to ecFlow UI, right click the server > halt > checkpoint > terminate. + ## 6. Other examples You can find more use cases in [usecase.md](inst/doc/usecase.md). diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index 47ee89e526d4a21718f7f9651ad436db76271d0f..47f807e67d88fd07e126988212944133d605996e 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -78,7 +78,7 @@ The problem may occur when the dimension number of the splitted selector is more 4. [Use two functions in workflow](inst/doc/usecase/ex2_4_two_func.R) - 5. [RainFARM precipitation downscaling](https://earth.bsc.es/gitlab/es/startR/-/blob/develop-RainFARMCase/inst/doc/usecase/ex2_5_rainFARM.R) + 5. [RainFARM precipitation downscaling](/inst/doc/usecase/ex2_5_rainFARM.R) This example shows how to apply a statistical downscaling function with startR and simultaneously (considering the memory size if unnecessary dimensions are included) saves the data by chunks (e.g., chunking those dimensions which are not required for downscaling) in the esarchive format. It is not recommended to save big outputs. Consider to perform some analysis and then retrieve the result instead of saving data. This is a simplified example of RainFARM for more information visit: https://www.medscope-project.eu/products/data/. Find more explanation of this use case in FAQ [How-to-27](inst/doc/faq.md#27-utilize-chunk-number-in-the-function). @@ -106,8 +106,11 @@ Find more explanation of this use case in FAQ [How-to-27](inst/doc/faq.md#27-uti This script shows how to load irregular grid data by Start(), then regrid it by s2dv::CDORemap in the workflow. It is a solution before Start() can deal with irregular regridding directly. + 14. [Get margin dimension indices in startR workflow](inst/doc/usecase/ex2_14_margin_dim_indices.R) + This usecase shows you how to know the margin dimension indices in the self-defined function. + 3. **Verification workflows** - 1. [Weekly ECV Subseasonal Hindcast Verification](inst/doc/usecase/ex3_1_SubseasonalECVHindcast.md) + 1. [Weekly ECV Subseasonal Hindcast Verification](inst/doc/usecase/ex3_1_SubseasonalECVHindcast.md) This is a practical case to compute monthly skill scores for the ECMWF/S2S-ENSForhc subseasonal hindcast using as a reference dataset ERA5. The ECV is air temperature at surface level (tas). Note that since this case is practical, it is heavy and takes much time to finish running. diff --git a/inst/doc/usecase/ex1_13_implicit_dependency.R b/inst/doc/usecase/ex1_13_implicit_dependency.R index 6740a21dc31c8916dacdfc48750a0bafabf9b285..8f60413d504cd92d5e9114a3c863972863dcd237 100644 --- a/inst/doc/usecase/ex1_13_implicit_dependency.R +++ b/inst/doc/usecase/ex1_13_implicit_dependency.R @@ -55,6 +55,7 @@ summary(exp) #============================================================================= # Case 2: 'region' depends on 'sdate' +#NOTE: Exp "a35b" has been deleted. This example cannot be run now. path <- paste0('/esarchive/exp/ecearth/a35b/diags/DCPP/EC-Earth-Consortium/', 'EC-Earth3-HR/dcppA-hindcast/r1i1p1f1/Omon/$var$_mixed/gn/v20201107/', '$var$_Omon_EC-Earth3-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_$chunk$.nc') diff --git a/inst/doc/usecase/ex2_14_margin_dim_indices.R b/inst/doc/usecase/ex2_14_margin_dim_indices.R new file mode 100644 index 0000000000000000000000000000000000000000..4ce1caf8c9a70b4d3f71444b5b7acff36cbd23c4 --- /dev/null +++ b/inst/doc/usecase/ex2_14_margin_dim_indices.R @@ -0,0 +1,104 @@ +# Author: An-Chi Ho +# Date: 4th July 2023 +# ------------------------------------------------------------------ +# This usecase shows you how to know the margin dimension indices in the self-defined function. +# In this example, we chunk the data along dimensions 'var' and 'sdate'. We can get the indices +# of each chunck, and when dimension 'var' is 2 (i.e., 'tas'), we convert unit from K to degC. +# +# [UPDATE_12072023] This case can be much easier, simply use attributes to +# identify which variable it is in each chunk because attributes are also +# chunked along with data. +#------------------------------------------------------------------ + + library(startR) + + data <- Start(dat = "/esarchive/exp/ecmwf/system5c3s/monthly_mean/$var$_f6h/$var$_$sdate$.nc", + var = c('psl', 'tas'), + sdate = paste0(2011:2018, '0501'), + ensemble = 'all', + time = indices(1:3), + lat = values(list(20, 80)), lat_reorder = Sort(), + lon = values(list(-80, 40)), lon_reorder = CircularSort(-180, 180), + synonims = list(lat = c('lat', 'latitude'), lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', lon = NULL, lat = NULL), + retrieve = FALSE) + +#---------------- METHOD 1 (RECOMMENDED) ----------------- + func <- function(x) { + # x: [lat, lon] + attrs_names <- names(attr(x, 'Variables')$common) + if ('tas' %in% attrs_names) x <- x - 273.15 + + res <- ClimProjDiags::WeightedMean(x, lat = c(attr(x, 'Variables')$common$lat), lon = c(attr(x, 'Variables')$common$lon)) + + return(res) + } + + +#---------------- METHOD 2 ----------------- + #NOTE: 'chunk_indices', 'chunks', and 'start_call' are the variables from startR:::ByChunks + func <- function(x) { + # x: [lat, lon] + + #----- Get margin dim indices --------- + # code modified from startR Util.R .chunk() + dim_indices <- lapply(names(chunks), + function(x) { + n_indices <- attr(start_call, 'Dimensions')[[x]] + chunk_sizes <- rep(floor(n_indices / chunks[[x]]), chunks[[x]]) + chunks_to_extend <- n_indices - chunk_sizes[1] * chunks[[x]] + if (chunks_to_extend > 0) { + chunk_sizes[1:chunks_to_extend] <- chunk_sizes[1:chunks_to_extend] + 1 + } + chunk_size <- chunk_sizes[chunk_indices[x]] + offset <- 0 + if (chunk_indices[x] > 1) { + offset <- sum(chunk_sizes[1:(chunk_indices[x] - 1)]) + } + 1:chunk_sizes[chunk_indices[x]] + offset + }) + names(dim_indices) <- names(chunks) + + # The first chunk [var = 1, sdate = 1] + #str(dim_indices) + #List of 5 + # $ dat : num 1 + # $ var : num 1 + # $ sdate : num [1:4] 1 2 3 4 + # $ ensemble: num [1:25] 1 2 3 4 5 6 7 8 9 10 ... + # $ time : num [1:3] 1 2 3 + + # The fourth chunk [var = 2, sdate = 2] + #str(dim_indices) + #List of 5 + # $ dat : num 1 + # $ var : num 2 + # $ sdate : num [1:4] 5 6 7 8 + # $ ensemble: num [1:25] 1 2 3 4 5 6 7 8 9 10 ... + # $ time : num [1:3] 1 2 3 + + if (dim_indices$var == 2) { # tas + x <- x - 273.15 + } + + res <- ClimProjDiags::WeightedMean(x, lat = c(attr(x, 'Variables')$common$lat), lon = c(attr(x, 'Variables')$common$lon)) + + return(res) + } + +#-------------------------------------------------------- + + step <- Step(func, target_dims = c('lat', 'lon'), output_dims = NULL, + use_attributes = list("Variables")) + wf <- AddStep(data, step) + + res <- Compute(wf, chunks = list(var = 2, sdate = 2)) + + +# Check result: +summary(res$output1[1, 1, , , ]) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 101373 101525 101573 101570 101615 101749 +summary(res$output1[1, 2, , , ]) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 13.56 14.27 17.24 16.87 19.06 19.78 diff --git a/inst/doc/usecase/ex2_5_rainFARM.R b/inst/doc/usecase/ex2_5_rainFARM.R index 8d315a03c1f47588f881b39513dbf6b76cf43b7e..8b58901bcdf9c5a791fc7be8d4f9510604364033 100644 --- a/inst/doc/usecase/ex2_5_rainFARM.R +++ b/inst/doc/usecase/ex2_5_rainFARM.R @@ -1,6 +1,19 @@ # ------------------------------------------------------------------------------ # Downscaling precipitation using RainFARM # ------------------------------------------------------------------------------ +# This usecase demonstrates that the start date dimension is used as chunking +# dimension, but the chunk number is used to know the start date value of each +# chunk. +# The first part of the function performs downscaling method, which requires +# longitude and latitude dimensions, so these two dimensions must be the target +# dimensions in the workflow. +# After that, the results are saved as netCDF file following esarchive convention. +# We need start date value here to decide the file name. +# As you can see, the sdate dimension is not required for the computation, so it +# is not necessary to be the target dimension. We can just use 'chunk_indices' to +# get the chunk number therefore get the corresponding start date value for the +# file name. +# ------------------------------------------------------------------------------ # Note 1: The data could be first transformed with QuantileMapping from CSTools # Note 2: Extra parameters could be used to downscale the data: weights, slope... # See more information in: @@ -74,15 +87,20 @@ step <- Step(Chunk_RF, use_libraries = c('CSTools', 'ncdf4'), use_attributes = list(data = "Variables")) -workflow <- AddStep(data, step, nf = 4, - destination = "/esarchive/scratch/nperez/git/Flor/cstools/test_RF_start/", +workflow <- AddStep(list(data = data), step, nf = 4, + destination = "./test_RF_start/", startdates = as.Date(sdates, format = "%Y%m%d")) + +#========= OPTION 1: Compute locally ============ res <- Compute(workflow, chunks = list(sdate = 4), threads_load = 2, threads_compute = 4) + +#========= OPTION 2: Compute ON NORD3 ============ + #-----------modify according to your personal info--------- queue_host = 'nord3' # your own host name for nord3v2 temp_dir = '/gpfs/scratch/bsc32/bsc32339/startR_hpc/' diff --git a/man/CDORemapper.Rd b/man/CDORemapper.Rd index 024ce32de1194ca691976a6207f98f830aa7233b..5ced7cd78044bd65aa814eaa62e3d9b6769bad32 100644 --- a/man/CDORemapper.Rd +++ b/man/CDORemapper.Rd @@ -51,7 +51,7 @@ perform the interpolation, hence CDO is required to be installed. data_path <- system.file('extdata', package = 'startR') path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') sdates <- c('200011') - \donttest{ + \dontrun{ data <- Start(dat = list(list(path = path_obs)), var = 'tos', sdate = sdates, diff --git a/man/Collect.Rd b/man/Collect.Rd index 97b529b07b001dd96d68c7de8db83802e8503ce1..d90cacaf8367095c5f4505fa8371151a1fdf4060 100644 --- a/man/Collect.Rd +++ b/man/Collect.Rd @@ -15,25 +15,27 @@ the RDS file.} Collect() call to finish (TRUE) or not (FALSE). If TRUE, it will be a blocking call, in which Collect() will retrieve information from the HPC, including signals and outputs, each polling_period seconds. The the status -can be monitored on the EC-Flow GUI. Collect() will not return until the -results of all chunks have been received. If FALSE, Collect() will crash with -an error if the execution has not finished yet, otherwise it will return the -merged array. The default value is TRUE.} +can be monitored on the workflow manager GUI. Collect() will not return +until the results of all the chunks have been received. If FALSE, Collect() +return an error if the execution has not finished, otherwise it will return +the merged array. The default value is TRUE.} -\item{remove}{A logical value deciding whether to remove of all data results -received from the HPC (and stored under 'ecflow_suite_dir', the parameter in -Compute()) after being collected. To preserve the data and Collect() it as -many times as desired, set remove to FALSE. The default value is TRUE.} +\item{remove}{A logical value deciding whether to remove of all chunk results +received from the HPC after data being collected, as well as the local job +folder under 'ecflow_suite_dir' or 'autosubmit_suite_dir'. To preserve the +data and Collect() them as many times as desired, set remove to FALSE. The +default value is TRUE.} } \value{ A list of merged data array. } \description{ The final step of the startR workflow after the data operation. It is used when -the parameter 'wait' of Compute() is FALSE, and the functionality includes -updating the job status shown on the EC-Flow GUI and collecting all the chunks -of results as one data array when the execution is done. See more details on -\href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab}. +the parameter 'wait' of Compute() is FALSE. It combines all the chunks of the +results as one data array when the execution is done. See more details on +\href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{practical guide}. +Collect() calls Collect_ecflow() or Collect_autosubmit() according to the +chosen workflow manager. } \examples{ data_path <- system.file('extdata', package = 'startR') diff --git a/man/Compute.Rd b/man/Compute.Rd index 5b03abd18895ba0332f7245be2046384486745ae..96d063a361ce3302abac5a8040bbe8b71086d51e 100644 --- a/man/Compute.Rd +++ b/man/Compute.Rd @@ -7,11 +7,14 @@ Compute( workflow, chunks = "auto", + workflow_manager = "ecFlow", threads_load = 1, threads_compute = 1, cluster = NULL, ecflow_suite_dir = NULL, ecflow_server = NULL, + autosubmit_suite_dir = NULL, + autosubmit_server = NULL, silent = FALSE, debug = FALSE, wait = TRUE @@ -28,11 +31,14 @@ those not required as the target dimension in function Step(). The default value is 'auto', which lists all the non-target dimensions and each one has one chunk.} -\item{threads_load}{An integer indicating the number of execution threads to -use for the data retrieval stage. The default value is 1.} +\item{workflow_manager}{Can be NULL, 'ecFlow' or 'Autosubmit'. The default is +'ecFlow'.} -\item{threads_compute}{An integer indicating the number of execution threads -to use for the computation. The default value is 1.} +\item{threads_load}{An integer indicating the number of parallel execution +cores to use for the data retrieval stage. The default value is 1.} + +\item{threads_compute}{An integer indicating the number of parallel execution +cores to use for the computation. The default value is 1.} \item{cluster}{A list of components that define the configuration of the machine to be run on. The comoponents vary from the different machines. @@ -48,7 +54,17 @@ remotely. The default value is NULL.} \item{ecflow_server}{A named vector indicating the host and port of the EC-Flow server. The vector form should be \code{c(host = 'hostname', port = port_number)}. Only needed when the -execution is run#' remotely. The default value is NULL.} +execution is run remotely. The default value is NULL.} + +\item{autosubmit_suite_dir}{A character string indicating the path to a folder +where to store temporary files generated for the automatic management of the +workflow manager. This path should be available in local workstation as well +as autosubmit machine. The default value is NULL, and a temporary folder +under the current working folder will be created.} + +\item{autosubmit_server}{A character vector indicating the login node of the +autosubmit machine. It can be "bscesautosubmit01" or "bscesautosubmit02". +The default value is NULL, and the node will be randomly chosen.} \item{silent}{A logical value deciding whether to print the computation progress (FALSE) on the R session or not (TRUE). It only works when the diff --git a/man/Start.Rd b/man/Start.Rd index 3bdae42cdd792b32c68cd45656ab47e1f20c2120..25eb8d744084a7d7da286206eb01d3d59acfa907 100644 --- a/man/Start.Rd +++ b/man/Start.Rd @@ -684,8 +684,8 @@ value is FALSE.} \item{num_procs}{An integer of number of processes to be created for the parallel execution of the retrieval/transformation/arrangement of the multiple involved files in a call to Start(). If set to NULL, -takes the number of available cores (as detected by detectCores() in -the package 'future'). The default value is 1 (no parallel execution).} +takes the number of available cores (as detected by future::availableCores). +The default value is 1 (no parallel execution).} \item{ObjectBigmemory}{a character string to be included as part of the bigmemory object name. This parameter is thought to be used internally by the diff --git a/tests/testthat/test-AddStep-DimNames.R b/tests/testthat/test-AddStep-DimNames.R index 2fe6b3976c570c71356c459d3c3a6ec505ac296b..5e1fe9cb345c92b3329c4edacdd6c35244e88c46 100644 --- a/tests/testthat/test-AddStep-DimNames.R +++ b/tests/testthat/test-AddStep-DimNames.R @@ -1,9 +1,8 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Error with bad dimensions tests.") test_that("Single File - Local execution", { suppressWarnings( - data <- Start(dataset = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$sdate$$month$.nc', + data <- Start(dataset = '/esarchive/scratch/aho/startR_unittest_files/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$sdate$$month$.nc', var = 'tas', sdate = '2000', month = indices(1), diff --git a/tests/testthat/test-Compute-CDORemap.R b/tests/testthat/test-Compute-CDORemap.R index 28df2340a6883b72a9a859ca783791a7740c7ee3..fb31d00bc062368d0aa4ac642a2af2cf276c3168 100644 --- a/tests/testthat/test-Compute-CDORemap.R +++ b/tests/testthat/test-Compute-CDORemap.R @@ -1,8 +1,7 @@ -context("Compute use CDORemap") - test_that("ex2_3", { repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, diff --git a/tests/testthat/test-Compute-NumChunks.R b/tests/testthat/test-Compute-NumChunks.R index 9e626e44fdac8790aecda70c00071fc25f17ab38..ffce880299e12b7043f5628b4114bdd2206327fe 100644 --- a/tests/testthat/test-Compute-NumChunks.R +++ b/tests/testthat/test-Compute-NumChunks.R @@ -1,10 +1,12 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Number of chunks tests.") test_that("Single File - Local execution", { +path <- '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$sdate$$month$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -data <- Start(dataset = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$sdate$$month$.nc', +data <- Start(dataset = path, var = 'tas', sdate = '2000', month = indices(1), @@ -45,7 +47,7 @@ res2 <- Compute(workflow = wf, expect_equal( res1, res2, -check.attributes = FALSE +ignore_attr = TRUE ) }) diff --git a/tests/testthat/test-Compute-chunk_depend_dim.R b/tests/testthat/test-Compute-chunk_depend_dim.R index ce92b94e39f5a0f2bcdde360518592adf07a7605..101bfb592ed5e21a975db603e167123b1b22dd90 100644 --- a/tests/testthat/test-Compute-chunk_depend_dim.R +++ b/tests/testthat/test-Compute-chunk_depend_dim.R @@ -6,12 +6,12 @@ # b. depended dim is list of values # Note that 2.b. doesn't work. -context("Chunk over dimensions that have dependency relationship") - path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/', 'HadGEM3-GC31-MM/dcppA-hindcast/', 'r1i1p1f2/Omon/tos/gn/v20200417/', '$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc') +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + sdates <- c('2016', '2017', '2018') # retrieve = T for verification @@ -149,7 +149,9 @@ test_that("2.a. depending dim is indices(); depended dim is indices()", { suppressWarnings( data <- Start(dat = path, var = 'tos', - sdate = indices(57:59), # 2016, 2017, 2018 +#NOTE: sdate was indices(57:59) if path is /esarchive/. Now the path is under my scratch, +# the first sdate is 2016. + sdate = indices(1:3), # 2016, 2017, 2018 chunk = indices(2:4), chunk_depends = 'sdate', time = 'all', @@ -204,7 +206,9 @@ expect_error( suppressWarnings( Start(dat = path, var = 'tos', - sdate = indices(57:59), # 2016, 2017, 2018 +#NOTE: sdate was indices(57:59) if path is /esarchive/. Now the path is under my scratch, +# the first sdate is 2016. + sdate = indices(1:3), # 2016, 2017, 2018 chunk = chunks, chunk_depends = 'sdate', time = 'all', diff --git a/tests/testthat/test-Compute-chunk_split_dim.R b/tests/testthat/test-Compute-chunk_split_dim.R index 09da16096467e861dc5035840f346d3d46ce0275..0c1da4ac5dda36b01669bd566efa6d0563fb4e34 100644 --- a/tests/testthat/test-Compute-chunk_split_dim.R +++ b/tests/testthat/test-Compute-chunk_split_dim.R @@ -1,13 +1,12 @@ # This unit test is to check chunking over the split dim. It involves # how to arrange the chunks in a correct order even when chunking is happening. -context("Chunk over split dim") - test_that("1. The files are not repeated", { repos_exp <- paste0('/esarchive/exp/ecearth/a1tr/cmorfiles/CMIP/EC-Earth-Consortium/', 'EC-Earth3/historical/r24i1p1f1/Amon/$var$/gr/v20190312/', '$var$_Amon_EC-Earth3_historical_r24i1p1f1_gr_$sdate$01-$sdate$12.nc') +repos_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_exp) suppressWarnings( exp <- Start(dat = repos_exp, @@ -32,6 +31,8 @@ dates <- attr(exp, 'Variables')$common$time # 4 3 repos_obs <- '/esarchive/recon/ecmwf/erainterim/monthly_mean/$var$_f6h/$var$_$date$.nc' +repos_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_obs) + suppressWarnings( obs <- Start(dat = repos_obs, var = 'tas', @@ -116,13 +117,16 @@ c(lon = 3, dat = 1, var = 1, sdate = 4, time = 3, lat = 2) test_that("2. The files are repeated", { -ecmwf_path_hc <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily/$var$_f24h/$sdate$/$var$_$syear$.nc') -obs_path <-paste0('/esarchive/recon/ecmwf/era5/daily/$var$-r240x121/$var$_$file_date$.nc') +ecmwf_path_hc <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily_mean/$var$_f6h/$sdate$/$var$_$syear$.nc') +ecmwf_path_hc <- paste0('/esarchive/scratch/aho/startR_unittest_files/', ecmwf_path_hc) +obs_path <- paste0('/esarchive/recon/ecmwf/era5/daily/$var$-r240x121/$var$_$file_date$.nc') +obs_path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', obs_path) + sdates.seq <- c("20161222","20161229", "20170105","20170112") suppressWarnings( hcst <- Start(dat = ecmwf_path_hc, - var = 'sfcWind', + var = 'tas', sdate = sdates.seq, syear = indices(1:2), #'all', time = 'all', @@ -207,15 +211,15 @@ aperm(res3, c(7, 2, 3, 4, 1, 5, 6)) ) expect_equal( dim(res1), -c(time = 47, dat = 1, var = 1, latitude = 1, longitude = 2, sdate = 4, syear = 2) +c(time = 46, dat = 1, var = 1, latitude = 1, longitude = 2, sdate = 4, syear = 2) ) expect_equal( dim(res2), -c(sdate = 4, dat = 1, var = 1, latitude = 1, longitude = 2, syear = 2, time = 47) +c(sdate = 4, dat = 1, var = 1, latitude = 1, longitude = 2, syear = 2, time = 46) ) expect_equal( dim(res3), -c(longitude = 2, dat = 1, var = 1, latitude = 1, sdate = 4, syear = 2, time = 47) +c(longitude = 2, dat = 1, var = 1, latitude = 1, sdate = 4, syear = 2, time = 46) ) diff --git a/tests/testthat/test-Compute-extra_params.R b/tests/testthat/test-Compute-extra_params.R index 9b42e43c126fe53e0f3f015c9247a724ba1a02d8..f055e96b48853b8f9f79437fcf2004dd7f50f253 100644 --- a/tests/testthat/test-Compute-extra_params.R +++ b/tests/testthat/test-Compute-extra_params.R @@ -1,4 +1,3 @@ -context("Compute, extra function arguments") test_that("ex2_6", { @@ -7,6 +6,8 @@ test_that("ex2_6", { # Prepare sdates and paths #========================= dataset <- "/esarchive/recon/ecmwf/era5/daily_mean/$var$_f1h/$var$_$sdate$.nc" + dataset <- paste0('/esarchive/scratch/aho/startR_unittest_files/', dataset) + sdates <- paste0(1981:1982, rep(10:12, 2)) #=================== # Get daily winds diff --git a/tests/testthat/test-Compute-inconsistent_target_dim.R b/tests/testthat/test-Compute-inconsistent_target_dim.R index 13d2a44435799415b4091e0a8c6710ff05e78f1d..58f96a93af392b6ec7a4d3f3021370f4ede5256a 100644 --- a/tests/testthat/test-Compute-inconsistent_target_dim.R +++ b/tests/testthat/test-Compute-inconsistent_target_dim.R @@ -1,4 +1,3 @@ -context("Compute()/ByChunks(): dimension consistence check") # If dataset are more than 1 (e.g., exp and obs), ByChunks() checks if # they have consistent dimensions in favor of Apply() computation. However, # only margin dimensions need to be identical. Target dimensions can have @@ -6,7 +5,9 @@ context("Compute()/ByChunks(): dimension consistence check") test_that("ex2_11", { path.exp <- '/esarchive/exp/ecmwf/system5c3s/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path.exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path.exp) path.obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$date$.nc' +path.obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path.obs) var <- 'tos' y1 <- 1981 diff --git a/tests/testthat/test-Compute-irregular_regrid.R b/tests/testthat/test-Compute-irregular_regrid.R index 00a5c1d7f1b0afc5b39dc9addd970202866a8ecb..7de1471811e0ce838b28368267eb418a5a230a20 100644 --- a/tests/testthat/test-Compute-irregular_regrid.R +++ b/tests/testthat/test-Compute-irregular_regrid.R @@ -1,12 +1,12 @@ library(s2dv) -context("Irregular regriding in the workflow") - test_that("1. ex2_13", { path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/', 'DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/$member$/Omon/$var$/gn/v20200101/', '$var$_*_s$sdate$-$member$_gn_$aux$.nc') +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( data <- Start(dataset = path, var = 'tos', diff --git a/tests/testthat/test-Compute-timedim.R b/tests/testthat/test-Compute-timedim.R index 80d96ff0bf2ade58ae1b30c9f2cadd1c6e6e8e36..fbc5af0464989ebb977d6ac7c69adde6aa868b4a 100644 --- a/tests/testthat/test-Compute-timedim.R +++ b/tests/testthat/test-Compute-timedim.R @@ -1,8 +1,7 @@ -context("Compute on time dimension") - test_that("ex2_1", { repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, diff --git a/tests/testthat/test-Compute-transform_all.R b/tests/testthat/test-Compute-transform_all.R index e6363f424629366284f1836f66dee47dde1d0b24..05d5de6639254346bea59cbfb16466e6f516699a 100644 --- a/tests/testthat/test-Compute-transform_all.R +++ b/tests/testthat/test-Compute-transform_all.R @@ -1,9 +1,10 @@ -context("Transform with 'all'") test_that("1. Chunk along non-lat/lon dim", { #skip_on_cran() path <- '/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/Amon/$var$/gr/v20190713/$var$_Amon_*_s$sdate$-$member$_gr_$fyear$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( data <- Start(dat = path, var = 'tos', @@ -54,8 +55,11 @@ test_that("2. chunk along lon", { #NOTE: the results are not identical when exp has extra cells = 2 +path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), diff --git a/tests/testthat/test-Compute-transform_indices.R b/tests/testthat/test-Compute-transform_indices.R index 34ddf4854c8b7823eccb254098f398fd89273add..c2d3e35a65a99110996e19ddc6404c2b0f381747 100644 --- a/tests/testthat/test-Compute-transform_indices.R +++ b/tests/testthat/test-Compute-transform_indices.R @@ -1,4 +1,3 @@ -context("Transform with indices") # Using indinces() to assign lat and lon, and transform the data. # Also test transform + chunk along lat/lon. @@ -25,6 +24,7 @@ test_that("1. global", { #skip_on_cran() path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) #----------------------------------- # crop = region @@ -149,6 +149,7 @@ test_that("2. regional, no border", { #skip_on_cran() path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) # crop = region suppressWarnings( @@ -248,6 +249,7 @@ test_that("3. regional, at lon border", { #skip_on_cran() path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) # crop = region suppressWarnings( diff --git a/tests/testthat/test-Compute-transform_values.R b/tests/testthat/test-Compute-transform_values.R index e6b6c26c08711a6c73970df5330e33fce1aba80f..25a803f2b158048242a382dd880106181a34cfc8 100644 --- a/tests/testthat/test-Compute-transform_values.R +++ b/tests/testthat/test-Compute-transform_values.R @@ -1,4 +1,3 @@ -context("Compute: Transform and chunk values()") # Using values() to assign lat and lon, and transform the data. # Also test transform + chunk along lat/lon. @@ -17,8 +16,11 @@ lats.max <- 90 # crop = region #NOTE: res1 and res3 differ if extra_cells = 2. But if retrieve = T, extra_cells = 2 or 8 is equal. +path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -95,6 +97,8 @@ tolerance = 0.001 # crop = region, selector is values(c()) library(easyNCDF) pathh <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +pathh <- paste0('/esarchive/scratch/aho/startR_unittest_files/', pathh) + file <- NcOpen(pathh) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -106,8 +110,11 @@ lons <- NcToArray(file, dim_indices = list(longitude = 1:1296), vars_to_read = 'longitude') NcClose(file) +path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -152,8 +159,12 @@ lons.min <- -180 lons.max <- 179.9 lats.min <- -90 lats.max <- 90 + +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -242,8 +253,11 @@ lats.min <- 20 lats.max <- 40 # crop = region +path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', #paste0(2000:2001, '0101'), ensemble = indices(1), #'all', @@ -326,8 +340,11 @@ lats.min <- 21 lats.max <- 41 # crop = region +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', #paste0(2000:2001, '0101'), ensemble = indices(1), #'all', @@ -427,8 +444,11 @@ lats.max <- 40 #NOTE: transform_extra_cells = 8 the results are not equal # crop = region +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -520,8 +540,11 @@ tolerance = 0.001 # crop = region, CircularSort(-180, 180) #NOTE: transform_extra_cells = 8 the results are not equal +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), diff --git a/tests/testthat/test-Compute-two_data.R b/tests/testthat/test-Compute-two_data.R index 9cb7145dc05c5c41b7935342c5b84a5d88147391..dfa579abd9f4f201e848c446cfe5798151662e48 100644 --- a/tests/testthat/test-Compute-two_data.R +++ b/tests/testthat/test-Compute-two_data.R @@ -1,10 +1,10 @@ -context("Compute with two datasets") - test_that("ex2_7", { # exp data repos <- paste0('/esarchive/exp/ecmwf/system4_m1/monthly_mean/', '$var$_f6h/$var$_$sdate$.nc') + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + sdates <- sapply(2013:2014, function(x) paste0(x, sprintf('%02d', 1:12), '01')) suppressWarnings( @@ -24,6 +24,8 @@ suppressWarnings( # obs data repos_obs <- paste0('/esarchive/recon/ecmwf/erainterim/monthly_mean/', '$var$_f6h/$var$_$sdate$.nc') + repos_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_obs) + sdates_obs <- (sapply(2012:2013, function(x) paste0(x, sprintf('%02d', 1:12)))) suppressWarnings( obs <- Start(dat = repos_obs, diff --git a/tests/testthat/test-Compute-use_attribute.R b/tests/testthat/test-Compute-use_attribute.R index 2ca73a770f040f19c650960e1be8e8c97b6e294e..6f218e664a37e1481f08efb18e9f4b50789125a8 100644 --- a/tests/testthat/test-Compute-use_attribute.R +++ b/tests/testthat/test-Compute-use_attribute.R @@ -1,8 +1,7 @@ -context("Compute use attributes") - test_that("ex2_2", { repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, diff --git a/tests/testthat/test-Start-DCPP-across-depends.R b/tests/testthat/test-Start-DCPP-across-depends.R index a3d95866a79d9fda53e2c6a594122eb8d7c6c743..bfe44b13727dd6cfa264efc4b5fdf144b3004ee0 100644 --- a/tests/testthat/test-Start-DCPP-across-depends.R +++ b/tests/testthat/test-Start-DCPP-across-depends.R @@ -1,6 +1,6 @@ -context("DCPP successfull retrieved for depends and across parameters.") test_that("Chunks of DCPP files- Local execution", { path <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc' + path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) sdates <- c('2017', '2018') suppressWarnings( @@ -19,24 +19,31 @@ suppressWarnings( ) # [sdate = 2, chunk = 3] +path <- "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r1i1p1f2_gn_202201-202212.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -dat_2018_chunk3 <- Start(dat = '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r1i1p1f2_gn_202201-202212.nc', +dat_2018_chunk3 <- Start(dat = path, var = 'tos', time = 'all', i = indices(1:10), j = indices(1:10), retrieve = TRUE) ) expect_equal(dat[1,1,2,25:36,,], dat_2018_chunk3[1,1,,,]) # [sdate = 1, chunk = 2] +path <- "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2017-r1i1p1f2_gn_202001-202012.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) suppressWarnings( -dat_2017_chunk2 <- Start(dat = '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2017-r1i1p1f2_gn_202001-202012.nc', +dat_2017_chunk2 <- Start(dat = path, var = 'tos', time = 'all', i = indices(1:10), j = indices(1:10), retrieve = TRUE) ) expect_equal(dat[1,1,1,13:24,,], dat_2017_chunk2[1,1,,,]) # [sdate = 2, chunk = 1] +path <- "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r1i1p1f2_gn_202001-202012.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) suppressWarnings( -dat_2018_chunk1 <- Start(dat = '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r1i1p1f2_gn_202001-202012.nc', +dat_2018_chunk1 <- Start(dat = path, var = 'tos', time = 'all', i = indices(1:10), j = indices(1:10), retrieve = TRUE) ) diff --git a/tests/testthat/test-Start-calendar.R b/tests/testthat/test-Start-calendar.R index da63e53a3ef0966bf17aab6ee7c9f8af46cf92f3..7dfbc2cfc6e3bb9724fa4445b36964aa4359bae4 100644 --- a/tests/testthat/test-Start-calendar.R +++ b/tests/testthat/test-Start-calendar.R @@ -1,10 +1,10 @@ -context("Start() different calendar") test_that("1. 360_day, daily, unit = 'days since 1850-01-01'", { path_hadgem3 <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast//HadGEM3-GC31-MM/', 'DCPP/MOHC/HadGEM3-GC31-MM/', 'dcppA-hindcast/r1i1p1f2/day/$var$/gn/v20200417/', '$var$_day_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$fyear$.nc') +path_hadgem3 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_hadgem3) sdate <- c('2000', '2001') fyear_hadgem3 <- indices(1) @@ -48,6 +48,7 @@ expect_equal( test_that("2. 365_day, daily, unit = 'days since 1984-01-01'", { path_bcc_csm2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/BCC-CSM2-MR/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200114/$var$_day_BCC-CSM2-MR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_19800101-19891231.nc' +path_bcc_csm2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_bcc_csm2) suppressWarnings( data <- Start(dat = path_bcc_csm2, @@ -82,6 +83,7 @@ test_that("3. standard, daily, unit = 'days since 1850-1-1 00:00:00'", { 'DCPP/MPI-M/MPI-ESM1-2-HR/', 'dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200101/', '$var$_day_MPI-ESM1-2-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_$fyear$.nc') + path_mpi_esm <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_mpi_esm) var <- 'tasmax' sdate <- '2000' @@ -122,6 +124,8 @@ test_that("4. standard, monthly, unit = 'days since 1850-1-1 00:00:00'", { 'DCPP/MPI-M/MPI-ESM1-2-HR/', 'dcppA-hindcast/r1i1p1f1/Amon/$var$/gn/v20200320/', '$var$_Amon_MPI-ESM1-2-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_200011-201012.nc') + path_mpi_esm <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_mpi_esm) + sdate <- '2000' fyear_mpi_esm <- paste0(sdate, '1101-', as.numeric(sdate) + 10, '1231') @@ -154,6 +158,7 @@ expect_equal( test_that("5. proleptic_gregorian, 6hrly, unit = 'hours since 2000-11-01 00:00:00'", { repos_obs <- paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', '$var$/$var$_199405.nc') + repos_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_obs) date <- paste0('1994-05-', sprintf('%02d', 1:31), ' 00:00:00') date <- as.POSIXct(date, tz = 'UTC') # attr(date, 'tzone') <- 'UTC' @@ -189,6 +194,8 @@ expect_equal( test_that("6. standard, monthly, unit = 'months since 1850-01-01 00:00:00'", { repos_obs <- '/esarchive/obs/ukmo/hadisst_v1.1/monthly_mean/$var$/$var$_$date$.nc' + repos_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_obs) + suppressWarnings( obs <- Start(dat = repos_obs, var = 'tos', @@ -246,6 +253,8 @@ c("1960-11-16 00:00:00", "1960-12-16 12:00:00", "1961-01-16 12:00:00", "1961-02- test_that("7. proleptic_gregorian, monthly, unit = 'days since 1850-1-1 00:00:00'", { repos <- '/esarchive/exp/mpi-esm-lr/cmip5-historical_i0p1/monthly_mean/$var$/$var$_$sdate$.nc' + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( data <- Start(dat = repos, var = 'tas', @@ -278,6 +287,7 @@ suppressWarnings( test_that("8. gregorian, 3hrly, unit = 'days since 1850-1-1'", { repos <- '/esarchive/exp/CMIP5/historical/ecearth/cmip5-historical_i0p1/$var$_3hr_EC-EARTH_historical_r6i1p1_$period$.nc' + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, var = 'vas', diff --git a/tests/testthat/test-Start-depends_values.R b/tests/testthat/test-Start-depends_values.R index 18d1b9f92024074ec756e2a938d18fe472efb5e8..9cccc2d24694d28fb76c8d853dcb8de96f8ea3e8 100644 --- a/tests/testthat/test-Start-depends_values.R +++ b/tests/testthat/test-Start-depends_values.R @@ -2,10 +2,8 @@ # and depending dimensions. The depending dimension should be a list with # names that are the values of depended dimensions. -context("Start() using values() to define dependency relations") - - path <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) sdates <- c('2016', '2017', '2018') chunks <- array(dim = c(chunk = 3, sdate = 3)) diff --git a/tests/testthat/test-Start-first_file_missing.R b/tests/testthat/test-Start-first_file_missing.R index 392841aa0a44121807d74d27df6456615f252f97..fecbd7c05cbe19ca40dd9d4a058495893a49fd44 100644 --- a/tests/testthat/test-Start-first_file_missing.R +++ b/tests/testthat/test-Start-first_file_missing.R @@ -1,5 +1,3 @@ -context("Start() retrieves files that the first file is missing") - # When some of the files are missing, Start() still can retrieve the data and # put NA in those missing positions. However, when the first file is missing, # Start() returned error before because of failing to find metadata. The bug is @@ -7,6 +5,7 @@ context("Start() retrieves files that the first file is missing") # the data. The parameter 'metadata_dims' can also be used in this case. file <- "/esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/$var$_f24h/$var$_$file_date$.nc" +file <- paste0('/esarchive/scratch/aho/startR_unittest_files/', file) var <- 'tas' sdates1 <- c('20130611') #exists sdates2 <- c('20130618') #does not exist @@ -119,7 +118,7 @@ data <- Start(dat = file, ) expect_equal( as.vector(attr(data, 'NotFoundFiles')), - c("/esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20130618.nc", NA) + c("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20130618.nc", NA) ) }) @@ -149,7 +148,7 @@ data <- Start(dat = file, ) expect_equal( as.vector(attr(data, 'NotFoundFiles')), - c(NA, "/esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20130618.nc") + c(NA, "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20130618.nc") ) }) diff --git a/tests/testthat/test-Start-global-lon-across_meridian.R b/tests/testthat/test-Start-global-lon-across_meridian.R index 34c861f1cb568d9254e582c9a796d50a85dc0d94..921c3313d5c0d22b0d98344e06463faac6a5ee28 100644 --- a/tests/testthat/test-Start-global-lon-across_meridian.R +++ b/tests/testthat/test-Start-global-lon-across_meridian.R @@ -1,10 +1,9 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() across_meridia global lon length check") - test_that("first test", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) var <- 'tas' lon.min <- 0 lon.max <- 359.723 #360 diff --git a/tests/testthat/test-Start-implicit_dependency_by_selector.R b/tests/testthat/test-Start-implicit_dependency_by_selector.R index 10e5545fcb80f3af7eefed32a8f00dadd65feacc..d493a87ba41589d96988146d27d24afac3dc9406 100644 --- a/tests/testthat/test-Start-implicit_dependency_by_selector.R +++ b/tests/testthat/test-Start-implicit_dependency_by_selector.R @@ -6,80 +6,80 @@ # If assign a selector with an array that has file dim as dimension, Start() read # the values depending on the the file dim. #--------------------------------------------------- -context("Start() implicit dependency by selector dimension") - -test_that("1. region with different index between files", { - -path <- paste0('/esarchive/exp/ecearth/a35b/diags/DCPP/EC-Earth-Consortium/', - 'EC-Earth3-HR/dcppA-hindcast/r1i1p1f1/Omon/$var$_mixed/gn/v20201107/', - '$var$_Omon_EC-Earth3-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_$chunk$.nc') - -# two sdates have different index for Nino3. -region <- array('Nino3', dim = c(sdate = 2, region = 1)) - -suppressWarnings( -data <- Start(dat = path, - var = 'tosmean', - sdate = c('1993', '2013'), - chunk = indices(1:2), - chunk_depends = 'sdate', - region = region, - time = 'all', - time_across = 'chunk', - merge_across_dims = TRUE, - return_vars = list(time = c('sdate', 'chunk'), - region = 'sdate'), - retrieve = T) -) -suppressWarnings( -data1 <- Start(dat = path, - var = 'tosmean', - sdate = c('1993'), - chunk = indices(1:2), - chunk_depends = 'sdate', - region = 'Nino3', - time = 'all', #c(1:length(forecast_month)), - time_across = 'chunk', - merge_across_dims = TRUE, - return_vars = list(time = c('sdate', 'chunk'), - region = NULL), - retrieve = T) -) -suppressWarnings( -data2 <- Start(dat = path, - var = 'tosmean', - sdate = c('2013'), - chunk = indices(1:2), - chunk_depends = 'sdate', - region = 'Nino3', - time = 'all', #c(1:length(forecast_month)), - time_across = 'chunk', - merge_across_dims = TRUE, - return_vars = list(time = c('sdate', 'chunk'), - region = NULL), - retrieve = T) -) - -expect_equal( -dim(data), -c(dat = 1, var = 1, sdate = 2, region = 1, time = 2) -) -expect_equal( -data[1, 1, 1, 1, ], -data1[1, 1, 1, 1, ] -) -expect_equal( -data[1, 1, 2, 1, ], -data2[1, 1, 1, 1, ] -) - - -}) +#NOTE: The files don't exist anymore. +#test_that("1. region with different index between files", { +# +#path <- paste0('/esarchive/exp/ecearth/a35b/diags/DCPP/EC-Earth-Consortium/', +# 'EC-Earth3-HR/dcppA-hindcast/r1i1p1f1/Omon/$var$_mixed/gn/v20201107/', +# '$var$_Omon_EC-Earth3-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_$chunk$.nc') +# +## two sdates have different index for Nino3. +#region <- array('Nino3', dim = c(sdate = 2, region = 1)) +# +#suppressWarnings( +#data <- Start(dat = path, +# var = 'tosmean', +# sdate = c('1993', '2013'), +# chunk = indices(1:2), +# chunk_depends = 'sdate', +# region = region, +# time = 'all', +# time_across = 'chunk', +# merge_across_dims = TRUE, +# return_vars = list(time = c('sdate', 'chunk'), +# region = 'sdate'), +# retrieve = T) +#) +#suppressWarnings( +#data1 <- Start(dat = path, +# var = 'tosmean', +# sdate = c('1993'), +# chunk = indices(1:2), +# chunk_depends = 'sdate', +# region = 'Nino3', +# time = 'all', #c(1:length(forecast_month)), +# time_across = 'chunk', +# merge_across_dims = TRUE, +# return_vars = list(time = c('sdate', 'chunk'), +# region = NULL), +# retrieve = T) +#) +#suppressWarnings( +#data2 <- Start(dat = path, +# var = 'tosmean', +# sdate = c('2013'), +# chunk = indices(1:2), +# chunk_depends = 'sdate', +# region = 'Nino3', +# time = 'all', #c(1:length(forecast_month)), +# time_across = 'chunk', +# merge_across_dims = TRUE, +# return_vars = list(time = c('sdate', 'chunk'), +# region = NULL), +# retrieve = T) +#) +# +#expect_equal( +#dim(data), +#c(dat = 1, var = 1, sdate = 2, region = 1, time = 2) +#) +#expect_equal( +#data[1, 1, 1, 1, ], +#data1[1, 1, 1, 1, ] +#) +#expect_equal( +#data[1, 1, 2, 1, ], +#data2[1, 1, 1, 1, ] +#) +# +# +#}) test_that("2. time depends on sdate", { repos <- '/esarchive/exp/ecmwf/system4_m1/daily_mean/$var$_f24h/$var$_$sdate$.nc' +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) sdates <- paste0(2001:2003, '0501') tmp <- as.POSIXct(sapply(2001:2003, function(x) paste0(x, '-05-', sprintf('%02d', 1:31))), tz = 'UTC') tmp <- array(tmp, dim = c(time = 31, sdate = 3)) @@ -122,6 +122,7 @@ test_that("3. region depends on member and sdate", { reg <- array('Nino3.4', dim = c(sdate = 3, memb = 2, region = 1)) path_SR <- paste0('/esarchive/exp/ecearth/a42y/diags/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$memb$/Omon/$var$/gn/v*/$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$memb$_gn_$chunk$.nc') +path_SR <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_SR) suppressWarnings( data <- Start(dat = path_SR, var = 'tosmean', diff --git a/tests/testthat/test-Start-implicit_inner_dim.R b/tests/testthat/test-Start-implicit_inner_dim.R index 3788af0f48c41729cf56bcc2a00049bda3561c7e..7e0264c5a3905d6125c8b326fed548403d56dd03 100644 --- a/tests/testthat/test-Start-implicit_inner_dim.R +++ b/tests/testthat/test-Start-implicit_inner_dim.R @@ -1,4 +1,3 @@ -context("Start() implicit inner dimension") # The unit test is for the implicit inner dimension. If the inner dimension length is 1, # startR allows it not to be specified in the call. Users can still define it in # 'return_vars'. @@ -8,6 +7,7 @@ context("Start() implicit inner dimension") test_that("1. time = 1", { obs.path <- "/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$file_date$.nc" +obs.path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', obs.path) variable <- "prlr" dates_file <- c("201311","201312") diff --git a/tests/testthat/test-Start-indices_list_vector.R b/tests/testthat/test-Start-indices_list_vector.R index 82e5cb1dc1183e3092460c59c6499549d925abd0..2effede805887d40bef16cb1da1986e9c5eddd79 100644 --- a/tests/testthat/test-Start-indices_list_vector.R +++ b/tests/testthat/test-Start-indices_list_vector.R @@ -4,14 +4,15 @@ # 3. transform, indices reversed # 4. no transform, indices reversed -context("List of indices and vector of indices") +repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) test_that("1. transform", { # lat and lon are lists of indices suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -36,7 +37,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of indices suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -74,7 +75,7 @@ test_that("2. no transform", { # lat and lon are lists of indices suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -93,7 +94,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of indices suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -194,7 +195,7 @@ test_that("4. no transform, indices reverse", { # lat and lon are lists of indices suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -215,7 +216,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of indices suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), diff --git a/tests/testthat/test-Start-largest_dims_length.R b/tests/testthat/test-Start-largest_dims_length.R index b448f895b44d150ea0bb519b288a8ee78d4c6ee1..211c1321ac8865914ea4ecdd51fdce53044629cd 100644 --- a/tests/testthat/test-Start-largest_dims_length.R +++ b/tests/testthat/test-Start-largest_dims_length.R @@ -1,4 +1,3 @@ -context("Start() largest_dims_length check") # When certain inner dim of files is not consistent, the parameter 'largest_dims_length' can # be used to ensure the returned array has the largest length of inner dimensions. @@ -7,9 +6,10 @@ test_that("1. inconsistent member length", { # system3: 40 members. repos <- list(list(name = 'system5c3s', - path = "/esarchive/exp/ecmwf/system5c3s/monthly_mean/g500_f12h/$var$_$sdate$.nc"), + path = "/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/ecmwf/system5c3s/monthly_mean/g500_f12h/$var$_$sdate$.nc"), list(name = 'system3_m1-c3s', - path = "/esarchive/exp/cmcc/system3_m1-c3s/monthly_mean/g500_f12h/$var$_$sdate$.nc")) + path = "/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/cmcc/system3_m1-c3s/monthly_mean/g500_f12h/$var$_$sdate$.nc")) + # largest_dims_length = FALSE suppressWarnings( @@ -140,6 +140,7 @@ test_that("2. inconsistent time length, merge_across_dims = T", { path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/EC-Earth3/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/', '$member$/Amon/$var$/gr/v20210309/', '$var$_Amon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc') +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) suppressWarnings( data <- Start(dataset = path, diff --git a/tests/testthat/test-Start-line_order-consistency.R b/tests/testthat/test-Start-line_order-consistency.R index 6b797a89ead97863062c2fec52e7e8fce9f4930d..11be10996738dc56baf8fff4c6d248540c3e29e8 100644 --- a/tests/testthat/test-Start-line_order-consistency.R +++ b/tests/testthat/test-Start-line_order-consistency.R @@ -1,9 +1,8 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() line order consistency check") - variable <- "tas" obs.path <- "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_$file_date$.nc" + obs.path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', obs.path) dates_file <- "201702" lats.min <- -90 diff --git a/tests/testthat/test-Start-metadata_dims.R b/tests/testthat/test-Start-metadata_dims.R index ce30eec3100f661df7c99f3706af8a314597f366..2a2e7358824ca68b3240765e9b106ea0ff41c93c 100644 --- a/tests/testthat/test-Start-metadata_dims.R +++ b/tests/testthat/test-Start-metadata_dims.R @@ -1,7 +1,6 @@ -context("Start() metadata_dims check") - test_that("1. One data set, one var", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = list(list(name = 'system5_m1', path = repos)), var = 'tas', @@ -19,10 +18,6 @@ suppressWarnings( retrieve = T ) ) - expect_equal( - length(attr(data, 'Variables')), - 2 - ) expect_equal( names(attr(data, 'Variables')), c("common", "system5_m1") @@ -45,12 +40,49 @@ suppressWarnings( tolerance = 0.0001 ) + +suppressWarnings( + dataF <- Start(dat = list(list(name = 'system5_m1', path = repos)), + var = 'tas', + sdate = '20170101', + ensemble = indices(1), + time = indices(1), + lat = indices(1:2), + lon = indices(2:3), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + lon = 'dat', + lat = 'dat'), + metadata_dims = 'dat', # it can be omitted since it is automatically specified as 'dat' + retrieve = F + ) +) + expect_equal( + names(attr(dataF, 'Variables')), + c("common", "system5_m1") + ) + expect_equal( + names(attr(dataF, 'Variables')$common), + c('time', 'tas') + ) + expect_equal( + names(attr(dataF, 'Variables')$system5_m1), + c("lon", "lat") + ) + expect_equal( + length(attr(dataF, 'Variables')$common$tas), + 22 + ) + }) test_that("2. Two data sets, one var", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos2) suppressWarnings( data <- Start(dat = list(list(name = 'system4_m1', path = repos2), list(name = 'system5_m1', path = repos)), @@ -69,10 +101,6 @@ suppressWarnings( retrieve = T ) ) - expect_equal( - length(attr(data, 'Variables')), - 3 - ) expect_equal( names(attr(data, 'Variables')), c("common", "system4_m1", "system5_m1") @@ -107,12 +135,59 @@ suppressWarnings( c(247.2570, 248.5016), tolerance = 0.0001 ) + +suppressWarnings( + dataF <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = 'tas', + sdate = '20170101', + ensemble = indices(1), + time = indices(1), + lat = indices(1:2), + lon = indices(2:3), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + lon = 'dat', + lat = 'dat'), + metadata_dims = 'dat', # it can be omitted since it is automatically specified as 'dat' + retrieve = F + ) +) + + expect_equal( + names(attr(dataF, 'Variables')), + c("common", "system4_m1", "system5_m1") + ) + expect_equal( + names(attr(dataF, 'Variables')$common), + 'time' + ) + expect_equal( + names(attr(dataF, 'Variables')$system4_m1), + c("lon", "lat", "tas") + ) + expect_equal( + names(attr(dataF, 'Variables')$system5_m1), + c("lon", "lat", "tas") + ) + expect_equal( + length(attr(dataF, 'Variables')$system5_m1$tas), + 22 + ) + expect_equal( + length(attr(dataF, 'Variables')$system4_m1$tas), + 22 + ) + + }) test_that("3. One data set, two vars", { repos <- paste0('/esarchive/exp/ecearth/a1tr/cmorfiles/CMIP/EC-Earth-Consortium/', 'EC-Earth3/historical/r24i1p1f1/Amon/$var$/gr/v20190312/', '$var$_Amon_EC-Earth3_historical_r24i1p1f1_gr_185001-185012.nc') + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) var <- c('tas', 'clt') suppressWarnings( data <- Start(dat = repos, @@ -127,10 +202,6 @@ suppressWarnings( retrieve = TRUE ) ) - expect_equal( - length(attr(data, 'Variables')), - 2 - ) expect_equal( names(attr(data, 'Variables')), c("common", "dat1") @@ -161,11 +232,50 @@ suppressWarnings( c(250.00110, 25.04345), tolerance = 0.0001 ) + + +suppressWarnings( + dataF <- Start(dat = repos, + var = var, + time = indices(1), + lat = indices(9:10), + lon = indices(10:11), + return_vars = list(lat = NULL, lon = NULL), + metadata_dims = 'var', + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + retrieve = F + ) +) + + expect_equal( + names(attr(dataF, 'Variables')), + c("common", "dat1") + ) + expect_equal( + names(attr(dataF, 'Variables')$common), + c('lat', 'lon', 'tas', 'clt') + ) + expect_equal( + is.null(attr(dataF, 'Variables')$dat1), + TRUE + ) + expect_equal( + length(attr(dataF, 'Variables')$common$tas), + 22 + ) + expect_equal( + length(attr(dataF, 'Variables')$common$clt), + 22 + ) + }) test_that("4. Two data sets, two vars", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos2) suppressWarnings( data <- Start(dat = list(list(name = 'system4_m1', path = repos2), list(name = 'system5_m1', path = repos)), @@ -184,10 +294,6 @@ suppressWarnings( retrieve = T ) ) - expect_equal( - length(attr(data, 'Variables')), - 3 - ) expect_equal( names(attr(data, 'Variables')), c("common", "system4_m1", "system5_m1") @@ -213,6 +319,14 @@ suppressWarnings( 11 ) expect_equal( + attr(data, 'Variables')$system4_m1$tas$dim[[1]][1:2], + list(name = 'longitude', len = 512) + ) + expect_equal( + attr(data, 'Variables')$system5_m1$tas$dim[[1]][1:2], + list(name = 'longitude', len = 1296) + ) + expect_equal( data[1, , 1, 1, 1, 2, 2], c(247.227219, 6.370782), tolerance = 0.0001 @@ -223,6 +337,57 @@ suppressWarnings( tolerance = 0.0001 ) +suppressWarnings( + dataF <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = c('tas', 'sfcWind'), + sdate = '20170101', + ensemble = indices(1), + time = indices(1), + lat = indices(1:2), + lon = indices(1:2), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + lon = 'dat', + lat = 'dat'), + metadata_dims = 'dat', + retrieve = F + ) +) + expect_equal( + names(attr(dataF, 'Variables')), + c("common", "system4_m1", "system5_m1") + ) + expect_equal( + names(attr(dataF, 'Variables')$common), + 'time' + ) + expect_equal( + names(attr(dataF, 'Variables')$system4_m1), + c("lon", "lat", "tas") + ) + expect_equal( + names(attr(dataF, 'Variables')$system5_m1), + c("lon", "lat", "tas") + ) + expect_equal( + length(attr(dataF, 'Variables')$system5_m1$tas), + 22 + ) + expect_equal( + length(attr(dataF, 'Variables')$system4_m1$tas), + 22 + ) + expect_equal( + attr(dataF, 'Variables')$system4_m1$tas$dim[[1]][1:2], + list(name = 'longitude', len = 512) + ) + expect_equal( + attr(dataF, 'Variables')$system5_m1$tas$dim[[1]][1:2], + list(name = 'longitude', len = 1296) + ) + #------------------------------------------------------------- suppressWarnings( data <- Start(dat = list(list(name = 'system4_m1', path = repos2), @@ -243,10 +408,6 @@ suppressWarnings( ) ) - expect_equal( - length(attr(data, 'Variables')), - 3 - ) expect_equal( names(attr(data, 'Variables')), c("common", "system4_m1", "system5_m1") @@ -289,11 +450,90 @@ suppressWarnings( c(248.781540, 5.794801), tolerance = 0.0001 ) + +suppressWarnings( + dataF <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = c('tas', 'sfcWind'), + sdate = '20170101', + ensemble = indices(1), + time = indices(1), + lat = indices(1:2), + lon = indices(1:2), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + lon = 'dat', + lat = 'dat'), + metadata_dims = c('dat', 'var'), + retrieve = F + ) +) + + expect_equal( + names(attr(dataF, 'Variables')), + c("common", "system4_m1", "system5_m1") + ) + expect_equal( + names(attr(dataF, 'Variables')$common), + 'time' + ) + expect_equal( + names(attr(dataF, 'Variables')$system4_m1), + c("lon", "lat", "tas", "sfcWind") + ) + expect_equal( + names(attr(dataF, 'Variables')$system5_m1), + c("lon", "lat", "tas", "sfcWind") + ) + expect_equal( + length(attr(dataF, 'Variables')$system5_m1$tas), + 22 + ) + expect_equal( + length(attr(dataF, 'Variables')$system5_m1$sfcWind), + 22 + ) + expect_equal( + length(attr(dataF, 'Variables')$system4_m1$tas), + 22 + ) + expect_equal( + length(attr(dataF, 'Variables')$system4_m1$sfcWind), + 22 + ) + expect_equal( + attr(dataF, 'Variables')$system4_m1$tas$dim[[1]][1:2], + list(name = 'longitude', len = 512) + ) + expect_equal( + attr(dataF, 'Variables')$system5_m1$tas$dim[[1]][1:2], + list(name = 'longitude', len = 1296) + ) + expect_equal( + attr(dataF, 'Variables')$system4_m1$sfcWind$dim[[1]][1:2], + list(name = 'longitude', len = 512) + ) + expect_equal( + attr(dataF, 'Variables')$system5_m1$sfcWind$dim[[1]][1:2], + list(name = 'lon', len = 1296) + ) + expect_equal( + attr(dataF, 'Variables')$system4_m1$tas$name, + 'tas' + ) + expect_equal( + attr(dataF, 'Variables')$system5_m1$sfcWind$name, + 'sfcWind' + ) + }) test_that("5. Specify metadata_dims with another file dimension", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos2) suppressWarnings( data <- Start(dat = list(list(name = 'system4_m1', path = repos2), list(name = 'system5_m1', path = repos)), @@ -313,10 +553,6 @@ suppressWarnings( ) ) - expect_equal( - length(attr(data, 'Variables')), - 3 - ) expect_equal( names(attr(data, 'Variables')), c("common", "system4_m1", "system5_m1") @@ -337,12 +573,173 @@ suppressWarnings( length(attr(data, 'Variables')$common$tas), 12 ) + expect_equal( + attr(data, 'Variables')$common[[3]]$dim[[4]]$units, + "hours since 2016-01-01 00:00:00" + ) + +suppressWarnings( + dataF <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = c('tas', 'sfcWind'), + sdate = c('20150101', '20160101', '20170101'), + ensemble = indices(1), + time = indices(1), + lat = indices(1:10), + lon = indices(1:10), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + lon = 'dat', + lat = 'dat'), + metadata_dims = 'sdate', + retrieve = F + ) +) + + expect_equal( + names(attr(dataF, 'Variables')), + c("common", "system4_m1", "system5_m1") + ) + expect_equal( + names(attr(dataF, 'Variables')$common), + c('time', 'tas', 'tas', 'tas') + ) + expect_equal( + names(attr(dataF, 'Variables')$system4_m1), + c("lon", "lat") + ) + expect_equal( + names(attr(dataF, 'Variables')$system5_m1), + c("lon", "lat") + ) + expect_equal( + length(attr(dataF, 'Variables')$common[[2]]), + 22 + ) + expect_equal( + attr(data, 'Variables')$common$time, + attr(dataF, 'Variables')$common$time + ) + expect_equal( + attr(dataF, 'Variables')$common[[3]]$dim[[4]]$units, + "hours since 2016-01-01 00:00:00" + ) + +#------------------------------------------------------------------ +suppressWarnings( + data <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = c('tas', 'sfcWind'), + sdate = c('20150101', '20160101', '20170101'), + ensemble = indices(1), + time = indices(1), + lat = indices(1:10), + lon = indices(1:10), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + lon = 'dat', + lat = 'dat'), + metadata_dims = c('dat', 'sdate'), + retrieve = T + ) +) + + expect_equal( + names(attr(data, 'Variables')), + c("common", "system4_m1", "system5_m1") + ) + expect_equal( + names(attr(data, 'Variables')$common), + c('time') + ) + expect_equal( + names(attr(data, 'Variables')$system4_m1), + c("lon", "lat", "tas", "tas", "tas") + ) + expect_equal( + names(attr(data, 'Variables')$system5_m1), + c("lon", "lat", "tas", "tas", "tas") + ) + expect_equal( + length(attr(data, 'Variables')$system4_m1[[3]]), + 12 + ) + expect_equal( + length(attr(data, 'Variables')$system4_m1[[4]]), + 12 + ) + expect_equal( + length(attr(data, 'Variables')$system5_m1[[5]]), + 12 + ) + expect_equal( + attr(data, 'Variables')$system4_m1[[3]]$dim[[4]]$units, + "hours since 2015-01-01 00:00:00" + ) + expect_equal( + attr(data, 'Variables')$system5_m1[[4]]$dim[[4]]$units, + "hours since 2016-01-01 00:00:00" + ) + +suppressWarnings( + dataF <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = c('tas', 'sfcWind'), + sdate = c('20150101', '20160101', '20170101'), + ensemble = indices(1), + time = indices(1), + lat = indices(1:10), + lon = indices(1:10), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + lon = 'dat', + lat = 'dat'), + metadata_dims = c('dat', 'sdate'), + retrieve = F + ) +) + expect_equal( + names(attr(dataF, 'Variables')), + c("common", "system4_m1", "system5_m1") + ) + expect_equal( + names(attr(dataF, 'Variables')$common), + c('time') + ) + expect_equal( + names(attr(dataF, 'Variables')$system4_m1), + c("lon", "lat", "tas", "tas", "tas") + ) + expect_equal( + names(attr(dataF, 'Variables')$system5_m1), + c("lon", "lat", "tas", "tas", "tas") + ) + expect_equal( + length(attr(dataF, 'Variables')$system4_m1[[3]]), + 22 + ) + expect_equal( + attr(data, 'Variables')$common$time, + attr(dataF, 'Variables')$common$time + ) + expect_equal( + attr(dataF, 'Variables')$system4_m1[[3]]$dim[[4]]$units, + "hours since 2015-01-01 00:00:00" + ) + expect_equal( + attr(dataF, 'Variables')$system5_m1[[4]]$dim[[4]]$units, + "hours since 2016-01-01 00:00:00" + ) }) test_that("6. One data set, two vars from one file", { mask_path <- '/esarchive/autosubmit/con_files/mask.regions.Ec3.0_O1L46.nc' +mask_path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', mask_path) suppressWarnings( data <- Start(repos = mask_path, var = c('nav_lon', 'nav_lat'), @@ -374,13 +771,27 @@ data <- Start(repos = mask_path, length(attr(data, 'Variables')$common$nav_lat), 8 ) +#NOTE: The following code doesn't work, and since this netCDF file doesn't follow the convention (one var per file), we leave this development to the future. +#suppressWarnings( +#dataF <- Start(repos = mask_path, +# var = c('nav_lon', 'nav_lat'), +# t = 'first', +# z = 'first', +# x = 'all', +# y = 'all', +# return_vars = list(var_names = NULL), +# var_var = 'var_names', +# retrieve = F) +#) }) test_that("7. Two data sets, while one is missing", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) # incorrect path. Therefore repos2 doesn't have any valid files repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f2h/$var$_$sdate$.nc" # correct one is _f6h + repos2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos2) var <- 'tas' suppressWarnings( data <- Start(dat = list(list(name = 'system4_m1', path = repos2), @@ -396,7 +807,7 @@ suppressWarnings( return_vars = list(time = 'sdate', lon = 'dat', lat = 'dat'), - metadata_dims = 'dat', # it can be omitted since it is automatically specified as 'dat' + metadata_dims = 'dat', retrieve = T ) ) @@ -409,6 +820,10 @@ suppressWarnings( list(lon = NULL, lat = NULL) ) expect_equal( + names(attr(data, "Variables")$system5_m1), + c('lon', 'lat', 'tas') + ) + expect_equal( length(attr(data, "Variables")$system5_m1$lon), 1296 ) @@ -418,20 +833,63 @@ suppressWarnings( ) expect_equal( attr(data, 'Files'), - array(c(NA, "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20170101.nc"), + array(c(NA, "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20170101.nc"), + dim = c(dat = 2, var = 1, sdate = 1)) + ) + +suppressWarnings( + dataF <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = var, + sdate = '20170101', + ensemble = indices(1), + time = indices(1), + lat = 'all', + lon = 'all', + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + lon = 'dat', + lat = 'dat'), + metadata_dims = 'dat', + retrieve = F + ) +) + + expect_equal( + attr(dataF, "Variables")$system4_m1, + list(lon = NULL, lat = NULL) + ) + expect_equal( + length(attr(dataF, "Variables")$system5_m1$lon), + 1296 + ) + expect_equal( + names(attr(dataF, "Variables")$system5_m1), + c('lon', 'lat', 'tas') + ) + expect_equal( + length(attr(dataF, "Variables")$system5_m1$tas), + 22 + ) + expect_equal( + attr(dataF, 'ExpectedFiles'), + array(c("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system4_m1/monthly_mean/tas_f2h/tas_20170101.nc", "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20170101.nc"), dim = c(dat = 2, var = 1, sdate = 1)) ) + + }) test_that("8. Two data sets, both have files but the first file is missing", { path_list <- list( MPI = list(name = 'MPI_ESM', - path = paste0('/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/', + path = paste0('/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/', 'DCPP/MPI-M/MPI-ESM1-2-HR/', 'dcppA-hindcast/$member$/day/$var$/gn/v20200101/', '$var$_day_MPI-ESM1-2-HR_dcppA-hindcast_s$sdate$-$member$_gn_$chunk$.nc')), Had = list(name = 'HadGEM3', - path = paste0('/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/', + path = paste0('/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/', 'DCPP/MOHC/HadGEM3-GC31-MM/', 'dcppA-hindcast/$member$/day/$var$/gn/v20200417/', '$var$_day_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-$member$_gn_$chunk$.nc'))) @@ -457,10 +915,6 @@ data <- Start(dataset = path_list, 5500 ) expect_equal( - length(attr(data, "Variables")$MPI_ESM), - 3 - ) - expect_equal( length(attr(data, "Variables")$MPI_ESM$lon), 30 ) @@ -482,11 +936,50 @@ data <- Start(dataset = path_list, ) expect_equal( attr(data, 'Files'), - array(c("/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/r1i1p1f1/day/tasmin/gn/v20200101/tasmin_day_MPI-ESM1-2-HR_dcppA-hindcast_s2018-r1i1p1f1_gn_20181101-20281231.nc", + array(c("/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/r1i1p1f1/day/tasmin/gn/v20200101/tasmin_day_MPI-ESM1-2-HR_dcppA-hindcast_s2018-r1i1p1f1_gn_20181101-20281231.nc", NA, NA, NA, NA, NA, NA, - "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r2i1p1f2/day/tasmin/gn/v20200417/tasmin_day_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r2i1p1f2_gn_20181101-20181230.nc"), + "/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r2i1p1f2/day/tasmin/gn/v20200417/tasmin_day_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r2i1p1f2_gn_20181101-20181230.nc"), dim = c(dataset = 2, var = 1, member = 2, sdate = 1, chunk = 2)) ) +suppressWarnings( +dataF <- Start(dataset = path_list, + var = 'tasmin', + member = list(c('r1i1p1f1', 'r2i1p1f2')), + sdate = paste0(2018), + chunk = list(c('20181101-20281231', '20181101-20181230')), + time = indices(1), #'all', + lat = values(list(0, 14)), + lon = values(list(0, 28)), + synonims = list(time = c('fmonth','time'), + lon = c('lon', 'longitude'), + lat = c('lat', 'latitude')), + return_vars = list(lat = 'dataset', lon = 'dataset'), + lat_reorder = Sort(), + num_procs = 1, + retrieve = F) +) + + expect_equal( + length(attr(dataF, "Variables")$MPI_ESM$lon), + 30 + ) + expect_equal( + names(attr(dataF, "Variables")$MPI_ESM), + c('lat', 'lon', 'tasmin') + ) + expect_equal( + length(attr(dataF, "Variables")$MPI_ESM$tasmin), + 22 + ) + expect_equal( + names(attr(dataF, "Variables")$HadGEM3), + c('lat', 'lon') + ) + expect_equal( + length(attr(dataF, "Variables")$HadGEM3$lon), + 34 + ) + }) diff --git a/tests/testthat/test-Start-metadata_filedim_dependency.R b/tests/testthat/test-Start-metadata_filedim_dependency.R index cfd7dfb1d3559a7b67ed0afdfe8633998412eae2..227383bd17d735bd7ded03c465d12b0a67be4160 100644 --- a/tests/testthat/test-Start-metadata_filedim_dependency.R +++ b/tests/testthat/test-Start-metadata_filedim_dependency.R @@ -1,4 +1,3 @@ -context("Start() metadata filedim dependency") # When inner dimension selector is an array with filedim dimension name (e.g., time = [sdate = 2, time = 4], # or *_across is used, the inner dim has dependency on file dim. In this case, return_vars must # specify this relationship, i.e., return_vars = list(time = 'sdate'). @@ -6,6 +5,7 @@ context("Start() metadata filedim dependency") # Preparation: Get the time values repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, @@ -83,7 +83,7 @@ expect_equal( expect_equal( test6, test4, - check.attributes = FALSE + ignore_attr = TRUE ) #---------------------------------------- @@ -119,7 +119,7 @@ expect_equal( expect_equal( test6, test6a, - check.attributes = FALSE + ignore_attr = TRUE ) #---------------------------------------- @@ -157,7 +157,7 @@ expect_equal( expect_equal( test14a, test6a, - check.attributes = FALSE + ignore_attr = TRUE ) #------------------------------------------------- @@ -193,7 +193,7 @@ expect_equal( expect_equal( test15a, test6a, - check.attributes = FALSE + ignore_attr = TRUE ) }) diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index ac332cdc95e0608fa320706bf26e8ad4bddfcc5d..b14326894d512bb2b003d55cf422c6b413d54552 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -1,10 +1,12 @@ -context("Start() metadata reshaping") # When data is reshaping (e.g., time_across = 'sdate'), the corresponding attribute should be reshaped too. test_that("1. time across fyear, fyear depends on sdate", { +repos <- "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +data <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -54,7 +56,7 @@ as.vector(seq(as.POSIXct('1962-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1962-12 # retrieve = FALSE suppressWarnings( -dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +dataF <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -82,8 +84,11 @@ dates test_that("2. time across fyear, only one sdate", { +repos <- "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +data <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -118,7 +123,7 @@ as.vector(seq(as.POSIXct('1961-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1961-12 #retrieve = FALSE suppressWarnings( -dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +dataF <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -145,8 +150,11 @@ dates test_that("3. time across fyear, fyear depends on sdate, 1st fyear is empty, 3rd fyear has more indices than 2nd one, 1964 is leap year", { +repos <- "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +data <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -193,7 +201,7 @@ as.vector(seq(as.POSIXct('1963-10-01 12:00:00', tz = 'UTC'), as.POSIXct('1964-03 suppressWarnings( -dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +dataF <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -228,9 +236,10 @@ datess <- as.POSIXct(array(datess, dim = c(time = 31, sdate = 8)), dates_file <- sort(unique(gsub('-', '', sapply(as.character(datess), substr, 1, 7)))) +repos <- "/esarchive/recon/ecmwf/erainterim/6hourly/$var$/$var$_$file_date$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( - data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), + data <- Start(dat = repos, var = 'tas', file_date = dates_file, time = values(datess), #[time = 31, sdate = 8] @@ -277,8 +286,7 @@ as.vector(datess) ) suppressWarnings( - dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), + dataF <- Start(dat = repos, var = 'tas', file_date = dates_file, time = values(datess), #[time = 31, sdate = 8] @@ -310,8 +318,12 @@ dates test_that("5. test 1 but merge_across_dims_narm = F", { + +repos <- "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +data <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -361,7 +373,7 @@ c(as.vector(seq(as.POSIXct('1962-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1962- ) suppressWarnings( -dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +dataF <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -392,9 +404,11 @@ test_that("6. split time dim only", { datess <- seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-07-14', tz = 'UTC'), by = 'days') datess <- as.POSIXct(array(datess, dim = c(time = 7, week = 2)), origin = '1970-01-01', tz = 'UTC') +repos <- '/esarchive/recon/ecmwf/erainterim/6hourly/$var$/$var$_199407.nc' +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_199407.nc'), +data <- Start(dat = repos, var = 'tas', # file_date = '199407', time = values(datess), #[time = 7, week = 2] @@ -439,8 +453,7 @@ as.vector(seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-07-14', tz ) suppressWarnings( -dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_199407.nc'), +dataF <- Start(dat = repos, var = 'tas', # file_date = '199407', time = values(datess), #[time = 7, week = 2] @@ -470,9 +483,10 @@ datess <- seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-08-31', tz datess <- as.POSIXct(array(datess, dim = c(time = 31, month = 2)), origin = '1970-01-01', tz = 'UTC') +repos <- '/esarchive/recon/ecmwf/erainterim/6hourly/$var$/$var$_$file_date$.nc' +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( -data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +data <- Start(dat = repos, var = 'tas', file_date = c('199407', '199408'), time = values(datess), #[time = 31, month = 2] @@ -518,8 +532,7 @@ as.vector(seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-08-31', tz ) suppressWarnings( -dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +dataF <- Start(dat = repos, var = 'tas', file_date = c('199407', '199408'), time = values(datess), #[time = 31, month = 2] @@ -549,9 +562,10 @@ test_that("8. split sdate dim", { file_date <- array(c(paste0(1993:1995, '07'), paste0(1993:1995, '08')), dim = c(syear = 3, smonth = 2)) +repos <- '/esarchive/recon/ecmwf/erainterim/6hourly/$var$/$var$_$file_date$.nc' +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( -data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +data <- Start(dat = repos, var = 'tas', file_date = file_date, #[syear = 3, smonth = 2] time = indices(1:2), @@ -596,8 +610,7 @@ seq(as.POSIXct('1993-08-01 06:00:00', tz = 'UTC'), as.POSIXct('1995-08-01 06:00: ) suppressWarnings( -dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +dataF <- Start(dat = repos, var = 'tas', file_date = file_date, #[syear = 3, smonth = 2] time = indices(1:2), @@ -617,8 +630,7 @@ dates # no return_vars suppressWarnings( -data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +data <- Start(dat = repos, var = 'tas', file_date = file_date, #[syear = 3, smonth = 2] time = indices(1:2), @@ -636,8 +648,7 @@ names(attributes(data)$Variables$common), ) suppressWarnings( -data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +data <- Start(dat = repos, var = 'tas', file_date = file_date, #[syear = 3, smonth = 2] time = indices(1:2), @@ -651,7 +662,7 @@ data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', ) expect_equal( names(attributes(data)$Variables$common), -NULL +"tas" ) }) @@ -660,8 +671,11 @@ test_that("9. split file dim that contains 'time', and 'time' inner dim is impli dates_arr <- array(c(paste0(1961, '0', 1:5), paste0(1962, '0', 1:5)), dim = c(time = 5, syear = 2)) +repos <- "/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$file_date$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$file_date$.nc', +data <- Start(dat = repos, var = 'tas', file_date = dates_arr, # [syear, time] split_multiselected_dims = TRUE, @@ -704,7 +718,7 @@ as.vector(dates_arr) suppressWarnings( -dataF <- Start(dat = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$file_date$.nc', +dataF <- Start(dat = repos, var = 'tas', file_date = dates_arr, # [syear, time] split_multiselected_dims = TRUE, @@ -735,8 +749,10 @@ y2 <- seq(a, b, by = 'days') y2 <- y2[-3] # remove 28 Feb time_array <- array(c(y1, y2), dim = c(time = 3, file_date = 2)) time_array <- as.POSIXct(time_array, origin = '1970-01-01', tz = 'UTC') +repos <- "/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$file_date$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( -data <- Start(dat = "/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$file_date$.nc", +data <- Start(dat = repos, var = "tas", file_date = paste0(1994:1995, '1101'), #1996 is leap year time = time_array, #[time = 3, file_date = 2] @@ -772,7 +788,7 @@ as.vector(time_array) ) suppressWarnings( -dataF <- Start(dat = "/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$file_date$.nc", +dataF <- Start(dat = repos, var = "tas", file_date = paste0(1994:1995, '1101'), #1996 is leap year time = time_array, #[time = 3, file_date = 2] diff --git a/tests/testthat/test-Start-multiple-sdates.R b/tests/testthat/test-Start-multiple-sdates.R index d0c4bd38b55050d51b0c2f25487ed7a168ce5529..e16f2bf4eb647ba103a7b2de612ab3828ab118e0 100644 --- a/tests/testthat/test-Start-multiple-sdates.R +++ b/tests/testthat/test-Start-multiple-sdates.R @@ -1,16 +1,16 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() multiple sdate with split + merge dim") - # When certain values in one observation file are required more than once, # and 'merge_across_dims' + 'split_multiselected_dims' are used, the values may be misplaced. # It might happen when reading experimental data with many start dates, # and the corresponding observations are required to have the same data structure. -ecmwf_path_hc <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily/$var$_f24h/$sdate$/$var$_$syear$.nc') +ecmwf_path_hc <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily_mean/$var$_f6h/$sdate$/$var$_$syear$.nc') +ecmwf_path_hc <- paste0('/esarchive/scratch/aho/startR_unittest_files/', ecmwf_path_hc) obs_path <-paste0('/esarchive/recon/ecmwf/era5/daily/$var$-r240x121/$var$_$file_date$.nc') +obs_path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', obs_path) -var_name <- 'sfcWind' +var_name <- 'tas' var100_name <- 'windagl100' sdates.seq <- c("20161222","20161229","20170105","20170112") @@ -55,7 +55,7 @@ obs <- Start(dat = obs_path, ) expect_equal( dim(obs), - c(dat = 1, var = 1, latitude = 1, longitude = 1, sdate = 4, syear = 20, time = 47) + c(dat = 1, var = 1, latitude = 1, longitude = 1, sdate = 4, syear = 20, time = 46) ) expect_equal( obs[1, 1, 1, 1, 1, 1, 8:15], @@ -131,7 +131,7 @@ obs <- Start(dat = obs_path, expect_equal( dim(obs), - c(dat = 1, var = 1, latitude = 1, longitude = 1, sdate = 4, syear = 20, time = 47) + c(dat = 1, var = 1, latitude = 1, longitude = 1, sdate = 4, syear = 20, time = 46) ) expect_equal( obs[1, 1, 1, 1, 1, 1, 8:15], diff --git a/tests/testthat/test-Start-path_glob_permissive.R b/tests/testthat/test-Start-path_glob_permissive.R index ddd69be18d24b30380ecfd161bb3a7c63d78aa8d..75f28d43c7e751dc5da5d6bfefce88477adeed85 100644 --- a/tests/testthat/test-Start-path_glob_permissive.R +++ b/tests/testthat/test-Start-path_glob_permissive.R @@ -1,5 +1,3 @@ -context("Start() path_glob_permissive check") - test_that("1. expid/member/version", { years <- paste0(c(1960:1961), '01-', c(1960:1961), '12') @@ -8,6 +6,7 @@ years <- paste0(c(1960:1961), '01-', c(1960:1961), '12') repos <- paste0('/esarchive/exp/ecearth/$expid$/diags/CMIP/EC-Earth-Consortium/', 'EC-Earth3/historical/*/Omon/$var$/gn/v*/', '$var$_Omon_EC-Earth3_historical_*_gn_$year$.nc') +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, var = 'tosmean', @@ -30,10 +29,10 @@ data <- Start(dat = repos, ) expect_equal( attr(data, 'Files'), - array(c("/esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196001-196012.nc", - "/esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196001-196012.nc", - "/esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196101-196112.nc", - "/esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196101-196112.nc"), + array(c("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196001-196012.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196001-196012.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196101-196112.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196101-196112.nc"), dim = c(dat = 1, var = 1, expid = 2, year = 2)) ) # NOTE: in R_3.2.0, the following test doesn't have dimension. In R_3.6.2 it does. @@ -48,6 +47,7 @@ data <- Start(dat = repos, repos <- paste0('/esarchive/exp/ecearth/$expid$/diags/CMIP/EC-Earth-Consortium/', 'EC-Earth3/historical/$member$/Omon/$var$/gn/v*/', '$var$_Omon_EC-Earth3_historical_*_gn_$year$.nc') +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, var = 'tosmean', @@ -72,10 +72,10 @@ data <- Start(dat = repos, ) expect_equal( attr(data, 'Files'), - array(c("/esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196001-196012.nc", - "/esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196001-196012.nc", - "/esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196101-196112.nc", - "/esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196101-196112.nc"), + array(c("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196001-196012.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196001-196012.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196101-196112.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196101-196112.nc"), dim = c(dat = 1, var = 1, expid = 2, year = 2, member = 1)) ) # NOTE: in R_3.2.0, the following test doesn't have dimension. In R_3.6.2 it does. @@ -138,6 +138,7 @@ test_that("2. tag at the end", { path <- "/esarchive/exp/ecmwf/system4_m1/6hourly/$var$/$var$_$year$0*.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) suppressWarnings( data <- Start(dat = path, var = "tas", @@ -152,8 +153,8 @@ data <- Start(dat = path, expect_equal( as.list(attr(data, 'ExpectedFiles')), -list("/esarchive/exp/ecmwf/system4_m1/6hourly/tas/tas_19940501.nc", - "/esarchive/exp/ecmwf/system4_m1/6hourly/tas/tas_19950101.nc") +list("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system4_m1/6hourly/tas/tas_19940501.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system4_m1/6hourly/tas/tas_19950101.nc") ) }) diff --git a/tests/testthat/test-Start-reorder-lat.R b/tests/testthat/test-Start-reorder-lat.R index 2fe5de95b5f1c1dded90ad24f868e864c1704c92..c87792e972259100dae6ba604aa1851b95a2d3fc 100644 --- a/tests/testthat/test-Start-reorder-lat.R +++ b/tests/testthat/test-Start-reorder-lat.R @@ -1,7 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() lat Reorder test") - #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix #2 selector range 1-[10, 20] 2-[20, 10] 3-[-10, -20] 4-[-20, -10] #3 resolution 1-1 2-<1 3->1 @@ -13,6 +11,7 @@ context("Start() lat Reorder test") ############################################## path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) ## Origin longitude in file: [0:360] @@ -884,8 +883,10 @@ test_that("4-x-2-12-123-2-1-x", { # 1-1. no Sort(), NULL ## lat should be descending +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) suppressWarnings( -exp1_1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1_1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -904,7 +905,7 @@ exp1_1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$v # 1-2. Sort(), NULL ## lat should be ascending suppressWarnings( -exp1_2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1_2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -923,7 +924,7 @@ exp1_2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$v # 1-3. Sort(drcreasing = T), NULL ## lat should be descending suppressWarnings( -exp1_3 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1_3 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -971,7 +972,7 @@ as.vector(attr(exp1_3, 'Variables')$common$latitude) # 2-1. no Sort(), 'dat' ## lat should be descending suppressWarnings( -exp2_1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2_1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -990,7 +991,7 @@ exp2_1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$v # 2-2. Sort(), 'dat' ## lat should be ascending suppressWarnings( -exp2_2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2_2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -1009,7 +1010,7 @@ exp2_2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$v # 2-3. Sort(drcreasing = T), NULL ## lat should be descending suppressWarnings( -exp2_3 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2_3 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), diff --git a/tests/testthat/test-Start-reorder-latCoarse.R b/tests/testthat/test-Start-reorder-latCoarse.R index 6ca7b15c3c006db1137295003ce69ff8bc001bea..34a766f78653209266c493823d460b2cbe89d842 100644 --- a/tests/testthat/test-Start-reorder-latCoarse.R +++ b/tests/testthat/test-Start-reorder-latCoarse.R @@ -1,7 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() lat Reorder test") - #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix #2 selector range 1-[10, 20] 2-[20, 10] 3-[-10, -20] 4-[-20, -10] #3 resolution 1-1 2-<1 3->1 4-> mixed @@ -16,6 +14,7 @@ context("Start() lat Reorder test") ############################################## #path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' path_exp <- '/esarchive/exp/ncar/cesm-dple/monthly_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) ## Origin longitude in file: [0:358.75] step 1.25 degrees #288 values ## latitude: -90 o 90 {-90, -89.05759 ...} #192 values ############################################## diff --git a/tests/testthat/test-Start-reorder-lon-180to180.R b/tests/testthat/test-Start-reorder-lon-180to180.R index aa209b8353300d849203c37594e5671fec76b1e9..0f71f0a0feb28e2cfa6ae7f7a4fcd1b1849c6428 100644 --- a/tests/testthat/test-Start-reorder-lon-180to180.R +++ b/tests/testthat/test-Start-reorder-lon-180to180.R @@ -1,6 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() lon Reorder non-transform -180to180 test") #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix #2 selector range 1-[10, 20] 2-[20, 10] 3-[-10, -20] 4-[-20, -10] 5-[-10, 10] 6-[10, -10] 7-[300, 350] 8-[170, 190] #3 resolution 1-1 2-<1 3->1 @@ -15,6 +14,7 @@ context("Start() lon Reorder non-transform -180to180 test") ## Origin longitude in file: [-179.71875:180] path_exp <- '/esarchive/recon/ecmwf/era5/original_files/reorder/daily_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) variable <- 'tas' sdate <- '199212' diff --git a/tests/testthat/test-Start-reorder-lon-transform_-180to180.R b/tests/testthat/test-Start-reorder-lon-transform_-180to180.R index 4351aa438689ac88f387b95346cd05d891168362..5e7701ab41f74ccee5350f04ee88ccb140ca3116 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_-180to180.R +++ b/tests/testthat/test-Start-reorder-lon-transform_-180to180.R @@ -1,6 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() lon Reorder transform -180to180 test") #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix #2 selector range 1-[10, 20] 2-[20, 10] 3-[-10, -20] 4-[-20, -10] 5-[-10, 10] 6-[10, -10] 7-[300, 350] 8-[170, 190] #3 resolution 1-1 2-<1 3->1 @@ -14,6 +13,7 @@ context("Start() lon Reorder transform -180to180 test") # 3-2 ## Origin longitude in file: [-179.71875:180] path_exp <- '/esarchive/recon/ecmwf/era5/original_files/reorder/daily_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) variable <- 'tas' sdate <- '199212' diff --git a/tests/testthat/test-Start-reorder-lon-transform_0to360.R b/tests/testthat/test-Start-reorder-lon-transform_0to360.R index 3d2047ea27f31f98e84928e0cbcb7ecfef186df2..86ad5e737b30c81668e5b57dbce8f5953a930b66 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_0to360.R +++ b/tests/testthat/test-Start-reorder-lon-transform_0to360.R @@ -1,6 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() lon Reorder transform 0to360 test") #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix #2 selector range 1-[10, 20] 2-[20, 10] 3-[-10, -20] 4-[-20, -10] 5-[-10, 10] 6-[10, -10] 7-[300, 350] 8-[350, 370] #3 resolution 1-1 2-<1 3->1 @@ -14,6 +13,8 @@ context("Start() lon Reorder transform 0to360 test") # 3-2 ## Origin longitude in file: [0:360] path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) + variable <- 'psl' sdate <- '19821201' diff --git a/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R b/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R index d4629af99d396bbc65a34824542c1b78181bc9d5..c18d34a241b47843d04ed45a881fa832fb56b786 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R +++ b/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R @@ -1,6 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() lon Reorder transform 0to360 test") #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix #2 selector range 1-[10, 20] 2-[20, 10] 3-[-10, -20] 4-[-20, -10] 5-[-10, 10] 6-[10, -10] 7-[300, 350] 8-[350, 370] #3 resolution 1-1 2-<1 3->1 4 -> mixed diff --git a/tests/testthat/test-Start-reorder-lon0to360.R b/tests/testthat/test-Start-reorder-lon0to360.R index 340860aa8f3a9d47fbbb974a741eb368aa2cdd9c..1e946d994a38fdfe97c061e02e53d4b2804359fa 100644 --- a/tests/testthat/test-Start-reorder-lon0to360.R +++ b/tests/testthat/test-Start-reorder-lon0to360.R @@ -1,6 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() lon Reorder non-transform 0to360 test") #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix #2 selector range 1-[10, 20] 2-[20, 10] 3-[-10, -20] 4-[-20, -10] 5-[-10, 10] 6-[10, -10] 7-[300, 350] 8-[350, 370] #3 resolution 1-1 2-<1 3->1 @@ -13,6 +12,7 @@ context("Start() lon Reorder non-transform 0to360 test") ############################################## # 3-2 path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) ## Origin longitude in file: [0:359.722222222222] diff --git a/tests/testthat/test-Start-reorder-lon0to360Coarse.R b/tests/testthat/test-Start-reorder-lon0to360Coarse.R index e093a8870cd2640057c2cb025d730488cc9295af..71361d95896f50b465789e7186d43a43bc773be9 100644 --- a/tests/testthat/test-Start-reorder-lon0to360Coarse.R +++ b/tests/testthat/test-Start-reorder-lon0to360Coarse.R @@ -1,6 +1,5 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() lon Reorder non-transform 0to360 test") #1 selector type 1-values(list) 2-values(vector) 3-indices 4-'all' 5-mix #2 selector range 1-[10, 20] 2-[20, 10] 3-[-10, -20] 4-[-20, -10] 5-[-10, 10] 6-[10, -10] 7-[300, 350] 8-[350, 370] #3 resolution 1-1 2-<1 3->1 4-> mixed @@ -13,6 +12,7 @@ context("Start() lon Reorder non-transform 0to360 test") ############################################## # 3-2 path_exp <- '/esarchive/exp/ncar/cesm-dple/monthly_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) ## Origin longitude in file: [0:358.75] step 1.25 degrees #288 values ## latitude: -90 o 90 {-90, -89.05759 ...} #192 values diff --git a/tests/testthat/test-Start-reorder-metadata.R b/tests/testthat/test-Start-reorder-metadata.R index 4b6f909176214091a55866245e44e59009ae480c..ea727e5c76b9dba815f6c408cfd91f993d147dc4 100644 --- a/tests/testthat/test-Start-reorder-metadata.R +++ b/tests/testthat/test-Start-reorder-metadata.R @@ -1,4 +1,3 @@ -context("Start() reorder metadata check") # Ensure returns_vars = NULL or 'dat' have the same metadata test_that("1. Sort() and CircularSort(0, 360)", { diff --git a/tests/testthat/test-Start-reorder-retrieve.R b/tests/testthat/test-Start-reorder-retrieve.R index 42a79ce343a0dad629e0b6a5f1577cc98ad03e47..25efcfc91b22f6126afbac26864461107f3f2f5e 100644 --- a/tests/testthat/test-Start-reorder-retrieve.R +++ b/tests/testthat/test-Start-reorder-retrieve.R @@ -1,12 +1,10 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() lon Reorder non-transform retrieve test") - - ############################################## test_that("original range 0to360", { ## Origin longitude in file: [0:359.722222222222] path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) lons.min <- -2 lons.max <- 2 @@ -86,6 +84,7 @@ res2 <- Start(dat = path_exp, test_that("original range -180to180", { ## Origin longitude in file: [0:359.722222222222] path_exp <- '/esarchive/recon/ecmwf/era5/original_files/reorder/daily_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) variable <- 'tas' sdate <- '199212' diff --git a/tests/testthat/test-Start-reorder_all.R b/tests/testthat/test-Start-reorder_all.R index b8279de2fdf1c2d8c7622da51ce19bb239ad9824..87a4416e5edef0b51e2b2f06304e4a2ad4e1f2e4 100644 --- a/tests/testthat/test-Start-reorder_all.R +++ b/tests/testthat/test-Start-reorder_all.R @@ -1,12 +1,10 @@ # No transform, test reorder function Sort() and CircularSort() with selector 'all'. - -context("No transform, reorder test: 'all'") - #--------------------------------------------------------------- # cdo is used to verify the data values library(easyNCDF) path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -23,6 +21,7 @@ NcClose(file) #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. lat", { # lon_reorder = CircularSort(0, 360) diff --git a/tests/testthat/test-Start-reorder_indices.R b/tests/testthat/test-Start-reorder_indices.R index b2ca0ace687be4d7ff157cc64e4e357d34b0445e..59d00d4accf229fb7b9086060147b071a029bd70 100644 --- a/tests/testthat/test-Start-reorder_indices.R +++ b/tests/testthat/test-Start-reorder_indices.R @@ -1,11 +1,10 @@ # No transform, test reorder function Sort() and CircularSort() with selector indices(). -context("No transform, reorder test: indices()") - #--------------------------------------------------------------- # cdo is used to verify the data values library(easyNCDF) path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -22,6 +21,7 @@ NcClose(file) #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. lat", { # lon_reorder = CircularSort(0, 360) diff --git a/tests/testthat/test-Start-reshape.R b/tests/testthat/test-Start-reshape.R index 3d576d806115cf4ce0caec983b475f45ad0a37be..480a3bc64c24c2e74765c289d8179f739602fe40 100644 --- a/tests/testthat/test-Start-reshape.R +++ b/tests/testthat/test-Start-reshape.R @@ -1,8 +1,9 @@ -context("Start() reshape parameters check") # This one is more comprehensive than test-Start-split-merge.R path_exp <- '/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) path_obs <- '/esarchive/recon/ecmwf/era5/daily_mean/$var$_f1h-r360x181/$var$_$date$.nc' +path_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_obs) var <- 'tas' sdate <- paste0(1993:1995, '1201') @@ -31,7 +32,7 @@ easy_sdate <- c('199312', paste0(rep(1994:1995, each = 3), c('01', '02', '12')), easy_array <- c() for (i in 1:length(easy_sdate)) { - easy_file <- NcOpen(paste0('/esarchive/recon/ecmwf/era5/daily_mean/tas_f1h-r360x181/tas_', + easy_file <- NcOpen(paste0('/esarchive/scratch/aho/startR_unittest_files//esarchive/recon/ecmwf/era5/daily_mean/tas_f1h-r360x181/tas_', easy_sdate[i], '.nc')) if (substr(easy_sdate[i], 5, 6) == '02') { sub_time <- 1:28 @@ -400,7 +401,7 @@ exp1 <- Start(dat = path_exp, ) # easyNCDF easy_sdate_exp <- '19931201' -easy_file_exp <- NcOpen(paste0('/esarchive/exp/ecmwf/system5c3s/daily_mean/tas_f6h/tas_', +easy_file_exp <- NcOpen(paste0('/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system5c3s/daily_mean/tas_f6h/tas_', easy_sdate_exp, '.nc')) easy_exp <- NcToArray(easy_file_exp, vars_to_read = 'tas', dim_indices = list(longitude = c(1), latitude = c(1), ensemble = c(1), @@ -490,7 +491,7 @@ obs2 <- Start(dat = path_obs, retrieve = TRUE) ) # easyNCDF -easy_file_199311 <- NcOpen(paste0('/esarchive/recon/ecmwf/era5/daily_mean/tas_f1h-r360x181/tas_', +easy_file_199311 <- NcOpen(paste0('/esarchive/scratch/aho/startR_unittest_files//esarchive/recon/ecmwf/era5/daily_mean/tas_f1h-r360x181/tas_', '199311', '.nc')) easy_obs_199311 <- NcToArray(easy_file_199311, vars_to_read = 'tas', dim_indices = list(lon = c(1), lat = c(1), time = 1:30)) diff --git a/tests/testthat/test-Start-return_vars_name.R b/tests/testthat/test-Start-return_vars_name.R index 4bf83c66f1be76cd5103d63a3a8d50bdee545695..e3ff876e4dc2dc3f8e5018652488590f4b601aa2 100644 --- a/tests/testthat/test-Start-return_vars_name.R +++ b/tests/testthat/test-Start-return_vars_name.R @@ -1,8 +1,8 @@ -context("Start() return_vars name") # The name of return_vars should be one of the inner dimension names. The synonims can # be used but will be changed back to the inner dim names. repos_obs <- '/esarchive/obs/ukmo/hadisst_v1.1/monthly_mean/$var$/$var$_$date$.nc' +repos_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_obs) #--------------------------------------------------------------- diff --git a/tests/testthat/test-Start-split-merge.R b/tests/testthat/test-Start-split-merge.R index 8793296fd06001424a32a0a8369803a0a1831243..699c01cc145214b33d2aa8198f3f32956c42e03f 100644 --- a/tests/testthat/test-Start-split-merge.R +++ b/tests/testthat/test-Start-split-merge.R @@ -1,7 +1,7 @@ -context("Start() split + merge dim and value check") -var_name <- 'sfcWind' -path.exp <- '/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily/$var$_f24h/$sdate$/$var$_$syear$.nc' +var_name <- 'tas' +path.exp <- '/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily_mean/$var$_f6h/$sdate$/$var$_$syear$.nc' +path.exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path.exp) suppressWarnings( hcst <- Start(dat = path.exp, @@ -24,7 +24,8 @@ file_date <- sort(unique(gsub('-', '', sapply(as.character(dates), substr, 1, 7)))) path.obs <- '/esarchive/recon/ecmwf/era5/1hourly/$var$/$var$_$file_date$.nc' - +path.obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path.obs) +var_name <- "sfcWind" test_that("1. split + merge + narm", { suppressWarnings( @@ -148,6 +149,7 @@ obs <- Start(dat = path.obs, test_that("4. split only", { obs.path <- "/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$file_date$.nc" +obs.path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', obs.path) variable <- "prlr" dates_file <- c("201311","201312","201411","201412") dim(dates_file) <- c(smonth = 2, syear = 2) diff --git a/tests/testthat/test-Start-time_unit.R b/tests/testthat/test-Start-time_unit.R index 3aa193042caec24d0065c1eb60d987a3fb96aa0a..0c499d31398189a840eedd0fb5a6f07cafcddba7 100644 --- a/tests/testthat/test-Start-time_unit.R +++ b/tests/testthat/test-Start-time_unit.R @@ -1,10 +1,9 @@ -context("To detect the variable with time format and adjust the units") test_that("1. The data has units like time", { suppressWarnings( -FD <- Start(dat = '/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var$_MON.nc', +FD <- Start(dat = '/esarchive/scratch/aho/startR_unittest_files/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var$_MON.nc', var = 'FD', # units: days time = indices(1), longitude = indices(1), @@ -14,7 +13,7 @@ FD <- Start(dat = '/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var retrieve = TRUE) ) suppressWarnings( -FD2 <- Start(dat = '/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var$_MON.nc', +FD2 <- Start(dat = '/esarchive/scratch/aho/startR_unittest_files/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var$_MON.nc', var = 'FD', # units: days time = indices(1), longitude = indices(1), @@ -39,7 +38,7 @@ test_that("2. The metadata variable name is not time", { # VITIGEOOS vari <- "rsds" -anlgs <- paste0("/esarchive/oper/VITIGEOSS","/output/cfsv2/weekly_mean/", +anlgs <- paste0("/esarchive/scratch/aho/startR_unittest_files/esarchive/oper/VITIGEOSS","/output/cfsv2/weekly_mean/", "$var$/$var$-vitigeoss-cat","_1999-2018_", "$file_date$.nc") file_date_array <- array(dim = c(sweek = 2, sday = 3)) diff --git a/tests/testthat/test-Start-transform-all.R b/tests/testthat/test-Start-transform-all.R index 8a9ca657f61e7cf32a1f55d4531cdc515c781479..a8290a6860ba659d9576b6d94c4ce61de65db6a6 100644 --- a/tests/testthat/test-Start-transform-all.R +++ b/tests/testthat/test-Start-transform-all.R @@ -3,14 +3,12 @@ # The test contains three calls with different target grids: # two with 'r128x64' (from different original grid) and one with 'r100x50'. -context("Transform test target grid: lon and lat = 'all'") - #--------------------------------------------------------------- # cdo is used to verify the data values # Test 1: original grid 'r360x180' library(easyNCDF) -grid1 <- '/esarchive/exp/CMIP6/dcppA-hindcast/CanESM5/DCPP/CCCma/CanESM5/dcppA-hindcast/r1i1p2f1/Omon/tos/gr/v20190429/tos_Omon_CanESM5_dcppA-hindcast_s1980-r1i1p2f1_gr_198101-199012.nc' # 'r128x64' -path <- '/esarchive/exp/CMIP6/dcppA-hindcast/CESM1-1-CAM5-CMIP5/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/r1i1p1f1/Omon/tos/gr/v20191016/tos_Omon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s2015-r1i1p1f1_gr_201511-202512.nc' # 'r360x180' +grid1 <- '/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/CanESM5/DCPP/CCCma/CanESM5/dcppA-hindcast/r1i1p2f1/Omon/tos/gr/v20190429/tos_Omon_CanESM5_dcppA-hindcast_s1980-r1i1p2f1_gr_198101-199012.nc' # 'r128x64' +path <- '/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/CESM1-1-CAM5-CMIP5/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/r1i1p1f1/Omon/tos/gr/v20191016/tos_Omon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s2015-r1i1p1f1_gr_201511-202512.nc' # 'r360x180' file <- NcOpen(path) arr <- NcToArray(file, @@ -36,6 +34,8 @@ suppressWarnings( #--------------------------------------------------------------- # Test 2: Original grid 'r432x324' path <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Amon/tas/gn/v20200417/tas_Amon_HadGEM3-GC31-MM_dcppA-hindcast_s2009-r1i1p1f2_gn_201501-201512.nc' # 'r432x324' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(lat = 1:324, lon = 1:432, time = 1:2), @@ -53,6 +53,7 @@ suppressWarnings( #--------------------------------------------------------------- path1 <- '/esarchive/exp/CMIP6/dcppA-hindcast/CESM1-1-CAM5-CMIP5/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/r1i1p1f1/Omon/$var$/gr/v20191016/$var$_Omon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s$sdate$-r1i1p1f1_gr_$sdate$11-202512.nc' # 'r360x180' +path1 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path1) test_that("1. 'all'", { suppressWarnings( @@ -108,6 +109,7 @@ test_that("1. 'all'", { #--------------------------------------------------------------- path2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Amon/$var$/gn/v20200417/$var$_Amon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$sdate$11-201512.nc' # 'r432x324' +path2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path2) test_that("2. test path 2", { suppressWarnings( diff --git a/tests/testthat/test-Start-transform-border.R b/tests/testthat/test-Start-transform-border.R index dee8b4e0582e007057df58d7deeeafbc6d66cd84..9b3cc6a6828a40f0f99577486a383a44a730e054 100644 --- a/tests/testthat/test-Start-transform-border.R +++ b/tests/testthat/test-Start-transform-border.R @@ -1,4 +1,3 @@ -context("Transform: check with cdo") ############################################## # This unit test checks different border situations: normal regional that doesn't touch the borders, @@ -26,6 +25,9 @@ context("Transform: check with cdo") # The result of cdo is from CDO version 1.9.8. +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + ############################################## test_that("1. normal regional situation", { @@ -35,7 +37,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -99,7 +101,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -173,7 +175,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -251,7 +253,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -308,7 +310,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -386,7 +388,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -456,7 +458,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -517,7 +519,7 @@ lons.min <- 0 lons.max <- 359 suppressWarnings( - exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', + exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -617,7 +619,7 @@ lons.min <- 0.5 lons.max <- 359.9 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -709,4 +711,4 @@ expect_equal( # [8,] 299.2109 300.3170 300.1524 299.6214 298.8563 # [9,] 299.4723 299.9515 299.4566 299.0601 299.5071 # [10,] 299.5299 299.7573 299.0317 299.1104 300.0644 -############################################## \ No newline at end of file +############################################## diff --git a/tests/testthat/test-Start-transform-lat-Sort-all.R b/tests/testthat/test-Start-transform-lat-Sort-all.R index b41ec0a106162306061b268a0d6e9e66e6a5b60d..d7d895e3a3a66e9aff7f3f771c2c299ba1295d94 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-all.R +++ b/tests/testthat/test-Start-transform-lat-Sort-all.R @@ -4,12 +4,10 @@ # The test contains three calls: lat_reorder = Sort(), no lat_reorder, and lat_reorder = Sort(decreasing = T). # Note that the original latitude is descending [90:-90]. cdo result is ascending [-90:90]. -context("Transform and lat_reorder test: 'all'") - #--------------------------------------------------------------- # cdo is used to verify the data values library(easyNCDF) -path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +path <- "/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -27,6 +25,7 @@ arr2 <- s2dv::CDORemap(arr, lons = as.vector(lons), lats = as.vector(lats), #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. 'all'", { diff --git a/tests/testthat/test-Start-transform-lat-Sort-indices.R b/tests/testthat/test-Start-transform-lat-Sort-indices.R index 6c3a7976f50b9e3f68dbc072f0321ac0a6d60fc9..16daa79309116ef9502a9fdf9988ea43cbff2995 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-indices.R +++ b/tests/testthat/test-Start-transform-lat-Sort-indices.R @@ -9,12 +9,11 @@ #!!!!!!!!!!!!!!!!!!!!!PROBLEM in test 2, indices(640:1)!!!!!!!!!!!!!!!!!!!! #TODO: Add regional test -context("Transform and lat_reorder test: indices") - #--------------------------------------------------------------- # cdo is used to verify the data values library(easyNCDF) path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -32,6 +31,7 @@ arr2 <- s2dv::CDORemap(arr, lons = as.vector(lons), lats = as.vector(lats), #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. indices(1:640)", { diff --git a/tests/testthat/test-Start-transform-lat-Sort-values.R b/tests/testthat/test-Start-transform-lat-Sort-values.R index 92490ae6fb03f264c4f04ee0d3a4a503cfa995a6..b70b637cc140c51c37b5e7ab5514df330d5ec85d 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-values.R +++ b/tests/testthat/test-Start-transform-lat-Sort-values.R @@ -7,12 +7,11 @@ # Each of them contains three calls: lat_reorder = Sort(), no lat_reorder, and lat_reorder = Sort(decreasing = T). # Note that the original latitude is descending [90:-90]. cdo result is ascending [-90:90]. -context("Transform and lat_reorder test: values") - #--------------------------------------------------------------- # cdo is used to verify the data values library(easyNCDF) pathh <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +pathh <- paste0('/esarchive/scratch/aho/startR_unittest_files/', pathh) file <- NcOpen(pathh) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -30,6 +29,7 @@ arr2 <- s2dv::CDORemap(arr, lons = as.vector(lons), lats = as.vector(lats), #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. values(list(-90, 90))", { diff --git a/tests/testthat/test-Start-transform-lon-across_meridian.R b/tests/testthat/test-Start-transform-lon-across_meridian.R index f16404671bb44b0080c851f27b7388e55eeb1595..d07388ea13697deebfacacc3d00c9cb72822a1dd 100644 --- a/tests/testthat/test-Start-transform-lon-across_meridian.R +++ b/tests/testthat/test-Start-transform-lon-across_meridian.R @@ -1,10 +1,10 @@ #if (identical(Sys.getenv("NOT_CRAN"), "")) Sys.setenv(NOT_CRAN='true') -context("Start() transform across_meridian lon order check") - test_that("first test", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + var <- 'tas' lon.min <- 170 lon.max <- 185 #359.723 #360 diff --git a/tests/testthat/test-Start-transform-metadata.R b/tests/testthat/test-Start-transform-metadata.R index ede3c959c22c258b384dc7789ab68fb299d7040c..227f09dea14bca0dfdeb478fd06b541ba2dd416f 100644 --- a/tests/testthat/test-Start-transform-metadata.R +++ b/tests/testthat/test-Start-transform-metadata.R @@ -1,10 +1,10 @@ -context("Start() transform metadata check") # Ensure returns_vars = NULL or 'dat' have the same metadata test_that("1. Sort() and CircularSort(0, 360)", { # Original lon is [-180, 180] path_exp <- '/esarchive/recon/ecmwf/era5/original_files/reorder/daily_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) lons.min <- -10 lons.max <- 10 lats.min <- 10 @@ -140,6 +140,7 @@ test_that("2. Sort(decreasing = TRUE) and CircularSort(-180, 180)", { # Original lon is [0, 360] path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) lons.min <- 190 lons.max <- 200 lats.min <- 10 diff --git a/tests/testthat/test-Start-transform-three-selectors.R b/tests/testthat/test-Start-transform-three-selectors.R index 657cca3589991dc3ad485090ffdc66cf4b1581b7..95e7c2b88ecdb53f7ddf8d6c0b6f2ce36f57a7d3 100644 --- a/tests/testthat/test-Start-transform-three-selectors.R +++ b/tests/testthat/test-Start-transform-three-selectors.R @@ -8,12 +8,11 @@ # Note that the original latitude is descending [90:-90]. -context("Transform: three selector forms") - #--------------------------------------------------------------- # cdo is used to verify the data values library(easyNCDF) path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -31,6 +30,7 @@ arr2 <- s2dv::CDORemap(arr, lons = as.vector(lons), lats = as.vector(lats), #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. indices", { diff --git a/tests/testthat/test-Start-two_dats.R b/tests/testthat/test-Start-two_dats.R index ff83441a05299f9db2b804a0d9f8234ae1ecaa12..46b57d84d29f6fe357f9246a241a940257b89076 100644 --- a/tests/testthat/test-Start-two_dats.R +++ b/tests/testthat/test-Start-two_dats.R @@ -1,14 +1,15 @@ # ex1_8 -context("Start() two dats and two vars in one call") test_that("1. ex1_8, case 1", { path_tas <- paste0('/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/', 'dcppA-hindcast/$member$/Amon/$var$/gr/v20190713/', '$var$_Amon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc') +path_tas <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_tas) path_tos <- paste0('/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/', 'dcppA-hindcast/$member$/Omon/$var$/gr/v20190713/', '$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc') +path_tos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_tos) suppressWarnings( data <- Start(dataset = list(list(path = path_tas), list(path = path_tos)), var = c('tas', 'tos'), diff --git a/tests/testthat/test-Start-values_list_vector.R b/tests/testthat/test-Start-values_list_vector.R index 76c4f91d2758665a32cf49098353bc6c98dcf703..1a6288be8d6f98df37326d33a4908253df1cabac 100644 --- a/tests/testthat/test-Start-values_list_vector.R +++ b/tests/testthat/test-Start-values_list_vector.R @@ -4,12 +4,12 @@ # 3. transform, indices reversed # 4. no transform, indices reversed -context("List of values and vector of values") #----------------------------------------------------------------- # To get lat and lon vectors library(easyNCDF) pathh <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +pathh <- paste0('/esarchive/scratch/aho/startR_unittest_files/', pathh) file <- NcOpen(pathh) lats <- NcToArray(file, dim_indices = list(latitude = 1:35), vars_to_read = 'latitude') @@ -18,11 +18,14 @@ lons <- NcToArray(file, NcClose(file) #------------------------------------------------------------------ +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + test_that("1. transform", { # lat and lon are lists of values suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -47,7 +50,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of values. This one is a weird usage though... suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -85,7 +88,7 @@ test_that("2. no transform", { # lat and lon are lists of indices suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -104,7 +107,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of indices suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -137,7 +140,7 @@ test_that("3. transform, vector reverse", { # lat and lon are lists of values suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -163,7 +166,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var #WRONG!!!!!!!!!! # lat and lon are vectors of values suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -201,7 +204,7 @@ test_that("4. no transform, vector reverse", { # lat and lon are lists of values suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -220,7 +223,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of values suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1),