From 791377e8508f48134032b57a0cde9a4ccb0c43cd Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 27 Jul 2023 17:19:25 +0200 Subject: [PATCH 01/32] Autosubmit as workflow manager --- R/ByChunks.R | 20 +- R/ByChunks_autosubmit.R | 741 ++++++++++++++++++ R/Collect.R | 72 +- R/Compute.R | 49 +- R/Utils.R | 141 ++++ inst/chunking/Autosubmit/autosubmit.yml | 16 + inst/chunking/Autosubmit/expdef.yml | 33 + inst/chunking/Autosubmit/jobs.yml | 9 + .../Autosubmit/load_process_save_chunk_AS.R | 132 ++++ inst/chunking/Autosubmit/platforms.yml | 14 + inst/chunking/Autosubmit/startR_autosubmit.sh | 25 + 11 files changed, 1231 insertions(+), 21 deletions(-) create mode 100644 R/ByChunks_autosubmit.R create mode 100644 inst/chunking/Autosubmit/autosubmit.yml create mode 100644 inst/chunking/Autosubmit/expdef.yml create mode 100644 inst/chunking/Autosubmit/jobs.yml create mode 100644 inst/chunking/Autosubmit/load_process_save_chunk_AS.R create mode 100644 inst/chunking/Autosubmit/platforms.yml create mode 100644 inst/chunking/Autosubmit/startR_autosubmit.sh diff --git a/R/ByChunks.R b/R/ByChunks.R index 37a554c..547b6ea 100644 --- a/R/ByChunks.R +++ b/R/ByChunks.R @@ -181,15 +181,23 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', if (is.null(names(cluster))) { stop("Parameter 'cluster' must be a named list.") } - if (any(!(names(cluster) %in% c('queue_host', 'queue_type', 'data_dir', - 'temp_dir', 'lib_dir', 'init_commands', - 'r_module', 'CDO_module', - 'ecflow_module', 'node_memory', - 'cores_per_job', 'job_wallclock', 'max_jobs', + if (any(!(names(cluster) %in% c('queue_host', 'queue_type', 'data_dir', + 'temp_dir', 'lib_dir', 'init_commands', + 'r_module', 'CDO_module', 'autosubmit_module', + 'ecflow_module', 'node_memory', + 'cores_per_job', 'job_wallclock', 'max_jobs', 'extra_queue_params', 'bidirectional', - 'polling_period', 'special_setup')))) { + 'polling_period', 'special_setup', 'expid', 'hpc_user')))) { + stop("Found invalid component names in parameter 'cluster'.") } + # Remove ecFlow components + redundant_components <- c('autosubmit_module', 'expid', 'hpc_user') + if (any(redundant_components %in% names(cluster))) { + tmp <- redundant_components[which(redundant_components %in% names(cluster))] + warning("Cluster component ", paste(tmp, collapse = ','), " not used when ecFlow is the workflow manager.") + cluster[[tmp]] <- NULL + } default_cluster[names(cluster)] <- cluster } localhost_name <- NULL diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R new file mode 100644 index 0000000..a246676 --- /dev/null +++ b/R/ByChunks_autosubmit.R @@ -0,0 +1,741 @@ +#'Execute the operation by chunks +#' +#'This is an internal function used in Compute(), executing the operation by +#'the chunks specified in Compute(). It also returns the configuration details +#'and profiling information. +#' +#'@param step_fun A function with the class 'startR_step_fun' containing the +#' details of operation. +#'@param cube_headers A list with the class 'startR_cube' returned by Start(). +#' It contains the details of data to be operated. +#'@param \dots Additional parameters for the inputs of 'step_fun'. +#'@param chunks A named list of dimensions which to split the data along and +#' the number of chunks to make for each. The chunked dimension can only be +#' those not required as the target dimension in function Step(). The default +#' value is 'auto', which lists all the non-target dimensions and each one has +#' one chunk. +#'@param threads_load An integer indicating the number of execution threads to +#' use for the data retrieval stage. The default value is 1. +#'@param threads_compute An integer indicating the number of execution threads +#' to use for the computation. The default value is 1. +#'@param cluster A list of components that define the configuration of the +#' machine to be run on. The comoponents vary from the different machines. +#' Check \href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab} for more +#' details and examples. +#' Only needed when the computation is not run locally. The default value is +#' NULL. +#'@param ecflow_suite_dir A character string indicating the path to a folder in +#' the local workstation where to store temporary files generated for the +#' automatic management of the workflow. Only needed when the execution is run +#' remotely. The default value is NULL. +#'@param ecflow_server A named vector indicating the host and port of the +#' EC-Flow server. The vector form should be +#' \code{c(host = 'hostname', port = port_number)}. Only needed when the +#' execution is run remotely. The default value is NULL. +#'@param silent A logical value deciding whether to print the computation +#' progress (FALSE) on the R session or not (TRUE). It only works when the +#' execution runs locally or the parameter 'wait' is TRUE. The default value +#' is FALSE. +#'@param debug A logical value deciding whether to return detailed messages on +#' the progress and operations in a Compute() call (TRUE) or not (FALSE). +#' Automatically changed to FALSE if parameter 'silent' is TRUE. The default +#' value is FALSE. +#'@param wait A logical value deciding whether the R session waits for the +#' Compute() call to finish (TRUE) or not (FALSE). If FALSE, it will return an +#' object with all the information of the startR execution that can be stored +#' in your disk. After that, the R session can be closed and the results can +#' be collected later with the Collect() function. The default value is TRUE. +#' +#'@return A list of data arrays for the output returned by the last step in the +#' specified workflow. The configuration details and profiling information are +#' attached as attributes to the returned list of arrays. +#' +#'@examples +#' # ByChunks() is internally used in Compute(), not intended to be used by +#' # users. The example just illustrates the inputs of ByChunks(). +#' # data_path <- system.file('extdata', package = 'startR') +#' # path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') +#' # sdates <- c('200011', '200012') +#' # data <- Start(dat = list(list(path = path_obs)), +#' # var = 'tos', +#' # sdate = sdates, +#' # time = 'all', +#' # latitude = 'all', +#' # longitude = 'all', +#' # return_vars = list(latitude = 'dat', +#' # longitude = 'dat', +#' # time = 'sdate'), +#' # retrieve = FALSE) +#' # fun <- function(x) { +#' # lat = attributes(x)$Variables$dat1$latitude +#' # weight = sqrt(cos(lat * pi / 180)) +#' # corrected = Apply(list(x), target_dims = "latitude", +#' # fun = function(x) {x * weight}) +#' # } +#' # step <- Step(fun = fun, +#' # target_dims = 'latitude', +#' # output_dims = 'latitude', +#' # use_libraries = c('multiApply'), +#' # use_attributes = list(data = "Variables")) +#' #ByChunks(step, data) +#' +#'@import multiApply +#'@importFrom methods is +#'@noRd +ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', + threads_load = 2, threads_compute = 1, + cluster = NULL, + autosubmit_suite_dir = NULL, autosubmit_server = NULL, + silent = FALSE, debug = FALSE, wait = TRUE) { + + # Build object to store profiling timings + t_begin_total <- Sys.time() + t_begin_bychunks_setup <- t_begin_total + timings <- list(nchunks = NULL, + concurrent_chunks = NULL, + cores_per_job = NULL, + threads_load = NULL, + threads_compute = NULL, + bychunks_setup = NULL, + transfer = NULL, + queue = NULL, + job_setup = NULL, + load = NULL, + compute = NULL, + transfer_back = NULL, + merge = NULL, + total = NULL) + + MergeArrays <- .MergeArrays + + # Check input headers + if (is(cube_headers, 'startR_cube')) { + cube_headers <- list(cube_headers) + } + if (!all(sapply(lapply(cube_headers, class), + function(x) 'startR_cube' %in% x))) { + stop("All objects passed in 'cube_headers' must be of class 'startR_cube', ", + "as returned by Start().") + } + + # Check step_fun + if (!is.function(step_fun)) { + stop("Parameter 'step_fun' must be a function.") + } + + # Check cores + if (!is.numeric(threads_load)) { + stop("Parameter 'threads_load' must be a numeric value.") + } + threads_load <- round(threads_load) + if (!is.numeric(threads_compute)) { + stop("Parameter 'threads_compute' must be a numeric value.") + } + threads_compute <- round(threads_compute) + timings[['threads_load']] <- threads_load + timings[['threads_compute']] <- threads_compute + + on_cluster <- !is.null(cluster) + + # Check autosubmit_suite_dir + suite_id <- cluster[['expid']] + + #NOTE: + #autosubmit_suite_dir: /home/Earth/aho/startR_local_autosubmit/ + #autosubmit_suite_dir_suite: /home/Earth/aho/startR_local_autosubmit/STARTR_CHUNKING_a68h/ + #remote_autosubmit_suite_dir: /esarchive/autosubmit/a68h/proj/ + #remote_autosubmit_suite_dir_suite: /esarchive/autosubmit/a68h/proj/STARTR_CHUNKING_a68h/ + + autosubmit_suite_dir_suite <- '' + if (on_cluster) { + if (is.null(autosubmit_suite_dir)) { + stop("Parameter 'autosubmit_suite_dir' must be specified when dispatching on a cluster.") + } + if (!is.character(autosubmit_suite_dir)) { + stop("Parameter 'autosubmit_suite_dir' must be a character string.") + } +#----------NEW----------- + autosubmit_suite_dir_suite <- paste0(autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') + if (!dir.exists(autosubmit_suite_dir_suite)) { + dir.create(autosubmit_suite_dir_suite, recursive = TRUE) + } +#--------NEW_END---------- + if (!dir.exists(autosubmit_suite_dir_suite)) { + stop("Could not find or create the directory in parameter 'autosubmit_suite_dir'.") + } + if (!is.null(autosubmit_server)) { + if (!autosubmit_server %in% c('bscesautosubmit01', 'bscesautosubmit02')) { + stop("Parameter 'autosubmit_server' must be one existing Autosubmit machine login node, 'bscesautosubmit01' or 'bscesautosubmit02'.") + } + } else { + autosubmit_server <- paste0('bscesautosubmit0', sample(1:2, 1)) + } + } + +#----------NEW----------- + # Check cluster + default_cluster <- list(queue_host = NULL, +# queue_type = 'slurm', + data_dir = NULL, +# temp_dir = NULL, + lib_dir = NULL, + init_commands = list(''), + r_module = 'R', + CDO_module = NULL, + autosubmit_module = 'autosubmit', + node_memory = NULL, # not used + cores_per_job = NULL, + job_wallclock = '01:00:00', + max_jobs = 6, + extra_queue_params = list(''), +# bidirectional = TRUE, + polling_period = 10, + special_setup = 'none', + expid = NULL, + hpc_user = NULL) + if (on_cluster) { + if (!is.list(cluster)) { + stop("Parameter 'cluster' must be a named list.") + } + if (is.null(names(cluster))) { + stop("Parameter 'cluster' must be a named list.") + } + if (any(!(names(cluster) %in% c('queue_host', 'queue_type', 'data_dir', + 'temp_dir', 'lib_dir', 'init_commands', + 'r_module', 'CDO_module', 'autosubmit_module', + 'ecflow_module', 'node_memory', + 'cores_per_job', 'job_wallclock', 'max_jobs', + 'extra_queue_params', 'bidirectional', + 'polling_period', 'special_setup', 'expid', 'hpc_user')))) { + stop("Found invalid component names in parameter 'cluster'.") + } + # Remove ecFlow components + redundant_components <- c('queue_type', 'temp_dir', 'ecflow_module', 'bidirectional') + if (any(redundant_components %in% names(cluster))) { + tmp <- redundant_components[which(redundant_components %in% names(cluster))] + warning("Cluster component ", paste(tmp, collapse = ','), " not used when Autosubmit is the workflow manager.") + cluster[[tmp]] <- NULL + } + default_cluster[names(cluster)] <- cluster + } + cluster <- default_cluster + is_data_dir_shared <- FALSE + + # Cluster compoment check + if (on_cluster) { + # queue_host + support_hpcs <- c('local', 'nord3') # names in platforms.yml + if (is.null(cluster$queue_host) || !cluster$queue_host %in% support_hpcs) { + stop("Component 'queue_host' in parameter 'cluster' must be one of the follows: ", paste(support_hpcs, collapse = ','), '.') + } + # data_dir + if (is.null(cluster[['data_dir']])) { + is_data_dir_shared <- TRUE + } else { + if (!is.character(cluster[['data_dir']])) { + stop("The component 'data_dir' of the parameter 'cluster' must be a character string.") + } + remote_data_dir <- cluster[['data_dir']] + } + # lib_dir + if (!is.null(cluster[['lib_dir']])) { + if (!is.character(cluster[['lib_dir']])) { + stop("The component 'lib_dir', of the parameter 'cluster' must be NULL or ", + "a character string.") + } + } + # init_commands + if (!is.list(cluster[['init_commands']]) || + !all(sapply(cluster[['init_commands']], is.character))) { + stop("The component 'init_commands' of the parameter 'cluster' must be a list of ", + "character strings.") + } + # r_module + if (!is.character(cluster[['r_module']])) { + stop("The component 'r_module' of the parameter 'cluster' must be a character string.") + } + if ((nchar(cluster[['r_module']]) < 1) || (grepl(' ', cluster[['r_module']]))) { + stop("The component 'r_module' of the parameter 'cluster' must have at least one character ", + "and contain no blank spaces.") + } + # CDO_module + if (!is.null(cluster[['CDO_module']])) { + if (!is.character(cluster[['CDO_module']])) { + stop("The component 'CDO_module' of the parameter 'cluster' must be a character string.") + } + if (nchar(cluster[['CDO_module']]) < 1 || grepl(' ', cluster[['CDO_module']])) { + warning("The component 'CDO_module' of parameter 'cluster' must have ", + " than 1 and only the first element will be used.") + } + cluster[['r_module']] <- paste(cluster[['r_module']], cluster[['CDO_module']]) + } + # autosubmit_module + if (!is.character(cluster[['autosubmit_module']])) { + stop("The component 'autosubmit_module' of the parameter 'cluster' must be a character string.") + } + # cores_per_job + if (is.null(cluster[['cores_per_job']])) { + cluster[['cores_per_job']] <- threads_compute + } + if (!is.numeric(cluster[['cores_per_job']])) { + stop("The component 'cores_per_job' of the parameter 'cluster' must be numeric.") + } + cluster[['cores_per_job']] <- round(cluster[['cores_per_job']]) + if (cluster[['cores_per_job']] > threads_compute) { + .message("WARNING: 'threads_compute' should be >= cluster[['cores_per_job']].") + } + # job_wallclock + tmp <- strsplit( '01:00:00', ':')[[1]] + if (!length(tmp) %in% c(2, 3) | any(!grepl("^[0-9]+$", tmp)) | any(nchar(tmp) != 2)) { + stop("The compoment 'job_wallclock' should be the format of HH:MM or HH:MM:SS.") + } + # max_jobs + if (!is.numeric(cluster[['max_jobs']])) { + stop("The component 'max_jobs' of the parameter 'cluster' must be numeric.") + } + cluster[['max_jobs']] <- round(cluster[['max_jobs']]) + # extra_queue_params + if (!is.list(cluster[['extra_queue_params']]) || + !all(sapply(cluster[['extra_queue_params']], is.character))) { + stop("The component 'extra_queue_params' of the parameter 'cluster' must be a list of ", + "character strings.") + } + # polling_period + if (!is.numeric(cluster[['polling_period']])) { + stop("The component 'polling_period' of the parameter 'cluster' must be numeric.") + } + cluster[['polling_period']] <- round(cluster[['polling_period']]) + # special_setup + if (!(cluster[['special_setup']] %in% c('none', 'marenostrum4'))) { + stop("The value provided for the component 'special_setup' of the parameter ", + "'cluster' is not recognized.") + } + # expid + if (!is.character(cluster[['expid']]) | length(cluster[['expid']]) != 1) { + stop("The component 'expid' of the parameter 'cluster' must be a character string.") + } + # hpc_user + if (!is.null(cluster$hpc_user) && (!is.character(cluster$hpc_user) | length(cluster$hpc_user) != 1)) { + stop("The component 'hpc_user' of the parameter 'cluster' must be a character string.") + } + + } + + if (on_cluster) { + remote_autosubmit_suite_dir <- file.path("/esarchive/autosubmit/", cluster[['expid']], 'proj') + remote_autosubmit_suite_dir_suite <- paste0(remote_autosubmit_suite_dir, '/STARTR_CHUNKING_', cluster[['expid']], '/') + } + + # Check silent + if (!is.logical(silent)) { + stop("Parameter 'silent' must be logical.") + } + + # Check debug + if (!is.logical(debug)) { + stop("Parameter 'debug' must be logical.") + } + if (silent) { + debug <- FALSE + } + + # Check wait + if (!is.logical(wait)) { + stop("Parameter 'wait' must be logical.") + } + + # Work out chunked dimensions and target dimensions + all_dims <- lapply(cube_headers, attr, 'Dimensions') + all_dims_merged <- NULL + for (i in all_dims) { + if (is.null(all_dims_merged)) { + all_dims_merged <- i + } else { + all_dims_merged <- .MergeArrayDims(all_dims_merged, i)[[3]] + } + } + all_dimnames <- names(all_dims_merged) + + target_dims_indices <- which(all_dimnames %in% unlist(attr(step_fun, 'TargetDims'))) + target_dims <- NULL + if (length(target_dims_indices) > 0) { + target_dims <- all_dimnames[target_dims_indices] + } + + chunked_dims <- all_dimnames + if (length(target_dims_indices) > 0) { + chunked_dims <- chunked_dims[-target_dims_indices] + } + if (length(chunked_dims) < 1) { + stop("Not possible to process input by chunks. All input dimensions are ", + "target dimensions.") + } + + if (length(cube_headers) != length(attr(step_fun, 'TargetDims'))) { + stop("Number of inputs in parameter 'cube_headers' must be equal to the ", + "number of inputs expected by the function 'step_fun'.") + } + # Check all input headers have matching dimensions + cube_index <- 1 + for (cube_header in cube_headers) { + + # Check if all the margin dims are consistent among datasets + if (!all(chunked_dims %in% names(attr(cube_header, "Dimensions")))) { + trouble_dim_name <- chunked_dims[which(!chunked_dims %in% + names(attr(cube_header, "Dimensions")))] + stop(paste0("Found margin dimension, ", toString(trouble_dim_name), + ", is not in input data ", cube_index, ".")) + } + + # Only check margin dimensions (i.e., chunked_dims) + if (!all(attr(cube_header, 'Dimensions')[chunked_dims] == all_dims_merged[names(attr(cube_header, 'Dimensions'))][chunked_dims])) { + stop("All provided 'cube_headers' must have matching dimension lengths ", + "with each other.") + } + if (!all(attr(step_fun, 'TargetDims')[[cube_index]] %in% names(attr(cube_header, 'Dimensions')))) { + stop("All provided 'cube_headers' must contain at least the target dimensions ", + "expected by 'step_fun'.") + } + cube_index <- cube_index + 1 + # work out expected result dimensions + } + + # Check chunks + default_chunks <- as.list(rep(1, length(chunked_dims))) + names(default_chunks) <- chunked_dims + if (length(chunks) == 1 && chunks == 'auto') { + chunks <- default_chunks + } + if (!is.list(chunks)) { + stop("Parameter 'chunks' must be a named list or 'auto'.") + } + if (is.null(names(chunks))) { + stop("Parameter 'chunks' must be a named list or 'auto'.") + } + if (any(!(names(chunks) %in% chunked_dims))) { + stop("All names in parameter 'chunks' must be one of the non-target dimensions ", + "present in the cubes in 'cube_headers'. The target dimensions are ", + paste(paste0("'", target_dims, "'"), collapse = ', '), ". The non-target ", + "dimensions (margins) are ", paste(paste0("'", chunked_dims, "'"), collapse = ', '), ".") + } + if (any(!(((unlist(chunks) %% 1) == 0) | (unlist(chunks) == 'all')))) { + stop("All values in parameter 'chunks' must take a numeric value or 'all'.") + } + if (any(unlist(chunks) < 1)) { + stop("All values in parameter 'chunks' must be >= 1.") + } + for (chunk_spec in 1:length(chunks)) { + if (chunks[[chunk_spec]] > all_dims_merged[names(chunks)[chunk_spec]]) { + stop("Too many chunks requested for the dimension ", names(chunks)[chunk_spec], + ". Maximum allowed is ", all_dims_merged[names(chunks)[chunk_spec]]) + } + } + default_chunks[names(chunks)] <- chunks + #NOTE: chunks here has all the margin dims, not only the chunked ones + chunks <- default_chunks + timings[['nchunks']] <- prod(unlist(chunks)) + + # Check step_fun + if (!is(step_fun, 'startR_step_fun')) { + stop("Parameter 'step_fun' must be of the class 'startR_step_fun', as returned ", + "by the function Step.") + } + + # Replace 'all's + chunks_all <- which(unlist(chunks) == 'all') + if (length(chunks_all) > 0) { + chunks[chunks_all] <- all_dims[names(chunks)[chunks_all]] + } + + if (on_cluster) { + # Copy load_process_save_chunk_AS.R into local folder +#TODO: Change the following line to read from package + chunk_script <- file("/esarchive/scratch/aho/git/aho-testtest/startR/autosubmit/load_process_save_chunk_AS.R") +# chunk_script <- file(system.file('chunking/autosubmit/load_process_save_chunk_AS.R', +# package = 'startR')) + chunk_script_lines <- readLines(chunk_script) + close(chunk_script) + chunk_script_lines <- gsub('^lib_dir <- *', paste0('lib_dir <- ', + paste(deparse(cluster[['lib_dir']]), collapse = '\n')), + chunk_script_lines) + #TODO: Change out_dir to somewhere else like expid/outputs/ + chunk_script_lines <- gsub('^out_dir <- *', paste0('out_dir <- ', + paste(deparse(remote_autosubmit_suite_dir_suite), collapse = '\n')), chunk_script_lines) + chunk_script_lines <- gsub('^debug <- *', paste0('debug <- ', paste(deparse(debug), collapse = '\n')), + chunk_script_lines) + deparsed_calls <- paste0('start_calls <- list(') + extra_path <- '' + if (cluster[['special_setup']] == 'marenostrum4') { + extra_path <- '/gpfs/archive/bsc32/' + } + for (cube_header in 1:length(cube_headers)) { + pattern_dim <- attr(cube_headers[[cube_header]], 'PatternDim') + bk_pattern_dim <- cube_headers[[cube_header]][[pattern_dim]] + bk_expected_files <- attr(cube_headers[[cube_header]], 'ExpectedFiles') + if (!is_data_dir_shared) { + cube_headers[[cube_header]][[pattern_dim]] <- paste0(remote_data_dir, '/', + extra_path, '/', cube_headers[[cube_header]][[pattern_dim]]) + for (file_n in 1:length(bk_expected_files)) { + attr(cube_headers[[cube_header]], 'ExpectedFiles')[file_n] <- paste0(remote_data_dir, '/', + extra_path, '/', attr(cube_headers[[cube_header]], 'ExpectedFiles')[file_n]) + } + } + deparsed_calls <- paste0(deparsed_calls, '\nquote(', + paste(deparse(cube_headers[[cube_header]]), collapse = '\n'), + ')') + cube_headers[[cube_header]][[pattern_dim]] <- bk_pattern_dim + attr(cube_headers[[cube_header]], 'ExpectedFiles') <- bk_expected_files + if (cube_header < length(cube_headers)) { + deparsed_calls <- paste0(deparsed_calls, ', ') + } + } + deparsed_calls <- paste0(deparsed_calls, '\n)') + chunk_script_lines <- gsub('^start_calls <- *', deparsed_calls, chunk_script_lines) + chunk_script_lines <- gsub('^start_calls_attrs <- *', paste0('start_calls_attrs <- ', paste(deparse(lapply(cube_headers, attributes)), collapse = '\n')), + chunk_script_lines) + chunk_script_lines <- gsub('^param_dimnames <- *', paste0('param_dimnames <- ', paste(deparse(chunked_dims), collapse = '\n')), + chunk_script_lines) + chunk_script_lines <- gsub('^threads_load <- *', paste0('threads_load <- ', threads_load), + chunk_script_lines) + chunk_script_lines <- gsub('^threads_compute <- *', paste0('threads_compute <- ', threads_compute), + chunk_script_lines) + chunk_script_lines <- gsub('^fun <- *', paste0('fun <- ', paste(deparse(step_fun), collapse = '\n')), + chunk_script_lines) + chunk_script_lines <- gsub('^params <- *', paste0('params <- ', paste(deparse(list(...)), collapse = '\n')), + chunk_script_lines) + writeLines(chunk_script_lines, paste0(autosubmit_suite_dir_suite, '/load_process_save_chunk_AS.R')) + + # Write and copy startR_autosubmit.sh into local folder + write_autosubmit_bash(chunks, cluster, autosubmit_suite_dir = autosubmit_suite_dir) + + # Modify conf files from template and rewrite to /esarchive/autosubmit/expid/conf/ + #TODO: remove source() and put function under R/ or just below ByChunks_autosubmit() + source("/esarchive/scratch/aho/git/aho-testtest/startR/autosubmit/write_autosubmit_conf.R") + write_autosubmit_conf(chunks, cluster, autosubmit_suite_dir) + + } # if on_cluster + + + # Iterate through chunks + chunk_array <- array(1:prod(unlist(chunks)), dim = (unlist(chunks))) + arrays_of_results <- vector('list', length(attr(step_fun, 'OutputDims'))) + names(arrays_of_results) <- names(attr(step_fun, 'OutputDims')) + for (component in 1:length(arrays_of_results)) { + arrays_of_results[[component]] <- vector('list', prod((unlist(chunks)))) + dim(arrays_of_results[[component]]) <- (unlist(chunks)) + } + if (!on_cluster) { + t_end_bychunks_setup <- Sys.time() + timings[['bychunks_setup']] <- as.numeric(difftime(t_end_bychunks_setup, + t_begin_bychunks_setup, units = 'secs')) + timings[['transfer']] <- 0 + timings[['queue']] <- 0 + timings[['job_setup']] <- 0 + timings[['transfer_back']] <- 0 + if (!silent) { + .message(paste0("Processing chunks... ", + "remaining time estimate soon...")) + } + time_before_first_chunk <- Sys.time() + time_after_first_chunk <- NULL + } + found_first_result <- FALSE + for (i in 1:length(chunk_array)) { + chunk_indices <- which(chunk_array == i, arr.ind = TRUE)[1, ] + names(chunk_indices) <- names(dim(chunk_array)) + + if (!on_cluster) { + if (!silent) { + .message(paste("Loading chunk", i, + "out of", length(chunk_array), "...")) + } + data <- vector('list', length(cube_headers)) + t_begin_load <- Sys.time() + for (input in 1:length(data)) { + start_call <- cube_headers[[input]] + dims_to_alter <- which(names(attr(start_call, 'Dimensions')) %in% names(chunks)) + names_dims_to_alter <- names(attr(start_call, 'Dimensions'))[dims_to_alter] + # If any dimension comes from split dimensions + split_dims <- attr(start_call, 'SplitDims') + + if (length(split_dims) != 0){ + + for (k in 1:length(split_dims)) { + if (any(names(split_dims[[k]]) %in% names_dims_to_alter)) { + chunks_split_dims <- rep(1, length(split_dims[[k]])) + names(chunks_split_dims) <- names(split_dims[[k]]) + chunks_indices_split_dims <- chunks_split_dims + split_dims_to_alter <- which(names(split_dims[[k]]) %in% names_dims_to_alter) + chunks_split_dims[split_dims_to_alter] <- unlist(chunks[names(split_dims[[k]])[split_dims_to_alter]]) + chunks_indices_split_dims[split_dims_to_alter] <- chunk_indices[names(split_dims[[k]])[split_dims_to_alter]] + start_call[[names(split_dims)[k]]] <- .chunk(chunks_indices_split_dims, chunks_split_dims, + eval(start_call[[names(split_dims)[k]]])) + dims_to_alter_to_remove <- which(names_dims_to_alter %in% names(split_dims[[k]])) + if (length(dims_to_alter_to_remove) > 0) { + dims_to_alter <- dims_to_alter[-dims_to_alter_to_remove] + names_dims_to_alter <- names_dims_to_alter[-dims_to_alter_to_remove] + } + } + } + } + + if (length(dims_to_alter) > 0) { + for (call_dim in names(attr(start_call, 'Dimensions'))[dims_to_alter]) { + start_call[[call_dim]] <- .chunk(chunk_indices[call_dim], chunks[[call_dim]], + eval(start_call[[call_dim]])) + } + } + start_call[['silent']] <- !debug + if (!('num_procs' %in% names(start_call))) { + start_call[['num_procs']] <- threads_load + } + data[[input]] <- eval(start_call) + } + t_end_load <- Sys.time() + timings[['load']] <- c(timings[['load']], + as.numeric(difftime(t_end_load, t_begin_load, units = 'secs'))) + if (!silent) { + .message(paste("Processing...")) + } + #TODO: Find a better way to assign the names of data. When multiple steps for Compute is available, this way may fail. + names(data) <- names(cube_headers) + t_begin_compute <- Sys.time() + result <- multiApply::Apply(data, + target_dims = attr(step_fun, 'TargetDims'), + fun = step_fun, ..., + output_dims = attr(step_fun, 'OutputDims'), + use_attributes = attr(step_fun, 'UseAttributes'), + ncores = threads_compute) + if (!found_first_result) { + names(arrays_of_results) <- names(result) + found_first_result <- TRUE + } + for (component in 1:length(result)) { + arrays_of_results[[component]][[i]] <- result[[component]] + } + rm(data) + gc() + t_end_compute <- Sys.time() + timings[['compute']] <- c(timings[['compute']], + as.numeric(difftime(t_end_compute, + t_begin_compute, units = 'secs'))) + } + + # Time estimate + if (!on_cluster) { + if (is.null(time_after_first_chunk)) { + time_after_first_chunk <- Sys.time() + if (!silent) { + estimate <- (time_after_first_chunk - + time_before_first_chunk) * + (length(chunk_array) - 1) + units(estimate) <- 'mins' + .message( + paste0("Remaining time estimate (at ", format(time_after_first_chunk), ") ", + "(neglecting merge time): ", format(estimate)) + ) + } + } + } + } + + + if (on_cluster) { + timings[['cores_per_job']] <- cluster[['cores_per_job']] + timings[['concurrent_chunks']] <- cluster[['max_jobs']] + + t_end_bychunks_setup <- Sys.time() + timings[['bychunks_setup']] <- as.numeric(difftime(t_end_bychunks_setup, + t_begin_bychunks_setup, units = 'secs')) + if (!is_data_dir_shared) { + #NOTE: Not consider this part yet + t_begin_transfer <- Sys.time() + .message("Sending involved files to the cluster file system...") + files_to_send <- NULL + #files_to_check <- NULL + for (cube_header in 1:length(cube_headers)) { + expected_files <- attr(cube_headers[[cube_header]], 'ExpectedFiles') + #files_to_check <- c(files_to_check, expected_files) + #if (cluster[['special_setup']] == 'marenostrum4') { + # expected_files <- paste0('/gpfs/archive/bsc32/', expected_files) + #} + files_to_send <- c(files_to_send, expected_files) + } + #which_files_exist <- sapply(files_to_check, file.exists) + which_files_exist <- sapply(files_to_send, file.exists) + files_to_send <- files_to_send[which_files_exist] + if (cluster[['special_setup']] == 'marenostrum4') { + file_spec <- paste(paste0("/gpfs/archive/bsc32/", + files_to_send), collapse = ' ') + system(paste0("ssh ", cluster[['queue_host']], " 'mkdir -p ", remote_data_dir, + ' ; module load transfer ; cd ', remote_autosubmit_suite_dir_suite, + ' ; dtrsync -Rrav ', '\'', file_spec, '\' "', remote_data_dir, '/"', + " ; sleep 1 ; ", + "while [[ ! $(ls dtrsync_*.out 2>/dev/null | wc -l) -ge 1 ]] ; ", + "do sleep 2 ; done", + " ; sleep 1 ; ", + 'while [[ ! $(grep "total size is" dtrsync_*.out | ', + "wc -l) -ge 1 ]] ; ", + "do sleep 5 ; done", "'")) + } else { + file_spec <- paste(files_to_send, collapse = ' :') + system(paste0("ssh ", cluster[['queue_host']], ' "mkdir -p ', + remote_data_dir, '"')) + system(paste0("rsync -Rrav '", file_spec, "' '", + cluster[['queue_host']], ":", remote_data_dir, "/'")) + } + .message("Files sent successfully.") + t_end_transfer <- Sys.time() + timings[['transfer']] <- as.numeric(difftime(t_end_transfer, t_begin_transfer, units = 'secs')) + } else { + timings[['transfer']] <- 0 + } + if (!silent) { + .message(paste0("Processing chunks... ")) + } + time_begin_first_chunk <- Sys.time() + + as_module <- cluster[['autosubmit_module']] + sys_commands <- paste0("module load ", as_module, "; ", + "autosubmit create ", suite_id, " -np; ", + "autosubmit refresh ", suite_id, "; ") + if (wait) { + sys_commands <- paste0(sys_commands, "autosubmit run ", suite_id) + } else { + sys_commands <- paste0(sys_commands, "nohup autosubmit run ", suite_id, " >/dev/null 2>&1 &") # disown? + } + + if (gsub('[[:digit:]]', "", Sys.getenv('HOSTNAME')) == 'bscesautosubmit') { + #NOTE: If we ssh to AS VM and run everything there, we don't need to ssh here + system(sys_commands) + + } else if (gsub("[[:digit:]]", "", Sys.getenv("HOSTNAME")) == "bscearth") { + # ssh from WS to AS VM to run exp + as_login <- paste0(Sys.getenv("USER"), '@', autosubmit_server, '.bsc.es') + sys_commands <- paste0('ssh ', as_login, ' "', sys_commands, '"') #'; exit"') + system(sys_commands) + + } else { + stop("Cannot identify host", Sys.getenv("HOSTNAME"), ". Where to run AS exp?") + } + + timings[['total']] <- t_begin_total + startr_exec <- list(cluster = cluster, workflow_manager = 'autosubmit', + suite_id = suite_id, chunks = chunks, + num_outputs = length(arrays_of_results), + autosubmit_suite_dir = autosubmit_suite_dir, #ecflow_server = ecflow_server, + timings = timings) + class(startr_exec) <- 'startR_exec' + + if (wait) { + result <- Collect(startr_exec, wait = TRUE, remove = T) + .message("Computation ended successfully.") + return(result) + + } else { + # if wait = F, return startr_exec and merge chunks in Collect(). + return(startr_exec) + } + } +} + diff --git a/R/Collect.R b/R/Collect.R index 4c80b03..73e06b9 100644 --- a/R/Collect.R +++ b/R/Collect.R @@ -72,10 +72,34 @@ #' #'@export Collect <- function(startr_exec, wait = TRUE, remove = TRUE) { + + # Parameter checks if (!is(startr_exec, 'startR_exec')) { stop("Parameter 'startr_exec' must be an object of the class ", - "'startR_exec', as returned by Collect(..., wait = FALSE).") + "'startR_exec', as returned by Compute(..., wait = FALSE).") } + if (!tolower(startr_exec$workflow_manager) %in% c('ecflow', 'autosubmit')) { + stop("Cannot identify the workflow manager. Check the value of 'startr_exec$workflow_manager', which should be 'ecFlow' or 'Autosubmit'.") + } + if (!is.logical(wait)) { + stop("Parameter 'wait' must be logical.") + } + if (!is.logical(remove)) { + stop("Parameter 'remove' must be logical.") + } + + if (tolower(startr_exec$workflow_manager) == 'ecflow') { + res <- Collect_ecflow(startr_exec, wait = wait, remove = remove) + } else if (tolower(startr_exec$workflow_manager) == 'autosubmit') { + res <- Collect_autosubmit(startr_exec, wait = wait, remove = remove) + } + + return(res) +} + + +Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) { + if (Sys.which('ecflow_client') == '') { stop("ecFlow must be installed in order to collect results from a ", "Compute() execution.") @@ -346,3 +370,49 @@ Collect <- function(startr_exec, wait = TRUE, remove = TRUE) { attr(result, 'startR_compute_profiling') <- timings result } + + + +Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE) { + + suite_id <- startr_exec[['suite_id']] + chunks <- startr_exec[['chunks']] + num_outputs <- startr_exec[['num_outputs']] + autosubmit_suite_dir <- startr_exec[['autosubmit_suite_dir']] + autosubmit_suite_dir_suite <- paste0(autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') + remote_autosubmit_suite_dir <- file.path("/esarchive/autosubmit/", suite_id, 'proj') + remote_autosubmit_suite_dir_suite <- paste0(remote_autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') + + + done <- FALSE + sum_received_chunks <- sum(grepl('.*\\.Rds$', list.files(remote_autosubmit_suite_dir_suite))) + + while (!done) { # If wait, try until it is done + if (sum_received_chunks / num_outputs == prod(unlist(chunks))) { + done <- TRUE + + } else if (!wait) { + stop("Computation in progress...") + } else { + Sys.sleep(cluster[['polling_period']]) + message("Computation in progress, ", sum_received_chunks, " of ", num_outputs, " chunks are done...\n", + "Check status on Autosubmit GUI: https://earth.bsc.es/autosubmitapp/experiment/", suite_id) +# Sys.sleep(min(sqrt(attempt), 5)) + } + + } # while !done + + result <- .MergeChunks(remote_autosubmit_suite_dir, suite_id, remove = remove) + if (remove) { + .warning("ATTENTION: The source chunks will be removed from the ", + "system. Store the result after Collect() ends if needed.") + } + + # Remove bigmemory objects (e.g., a68h_1_1_1_1_1 and a68h_1_1_1_1_1.desc) + file.remove( + file.path(remote_autosubmit_suite_dir_suite, + list.files(remote_autosubmit_suite_dir_suite)[grepl(paste0("^", suite_id, "_.*"), list.files(remote_autosubmit_suite_dir_suite))]) + ) + + return(result) +} diff --git a/R/Compute.R b/R/Compute.R index 1450b01..c9f7426 100644 --- a/R/Compute.R +++ b/R/Compute.R @@ -84,11 +84,11 @@ #' #'@importFrom methods is #'@export -Compute <- function(workflow, chunks = 'auto', - threads_load = 1, threads_compute = 1, - cluster = NULL, ecflow_suite_dir = NULL, - ecflow_server = NULL, silent = FALSE, debug = FALSE, - wait = TRUE) { +Compute <- function(workflow, chunks = 'auto', workflow_manager = 'ecFlow', + threads_load = 1, threads_compute = 1, + cluster = NULL, ecflow_suite_dir = NULL, ecflow_server = NULL, + autosubmit_suite_dir = NULL, autosubmit_server = NULL, + silent = FALSE, debug = FALSE, wait = TRUE) { # Check workflow if (!is(workflow, 'startR_cube') & !is(workflow, 'startR_workflow')) { stop("Parameter 'workflow' must be an object of class 'startR_cube' as ", @@ -144,16 +144,37 @@ Compute <- function(workflow, chunks = 'auto', if (!all(sapply(workflow$inputs, class) == 'startR_cube')) { stop("Workflows with only one step supported by now.") } + # Run ByChunks with the combined operation - res <- ByChunks(step_fun = operation, - cube_headers = workflow$inputs, - chunks = chunks, - threads_load = threads_load, - threads_compute = threads_compute, - cluster = cluster, - ecflow_suite_dir = ecflow_suite_dir, - ecflow_server = ecflow_server, - silent = silent, debug = debug, wait = wait) + if (!is.null(cluster)) { + if (!tolower(workflow_manager) %in% c('ecflow', 'autosubmit')) { + stop("Parameter 'workflow_manager' can only be 'ecFlow' or 'Autosubmit'.") + } + } + if (tolower(workflow_manager) == 'autosubmit') { + res <- ByChunks_autosubmit(step_fun = operation, + cube_headers = workflow$inputs, + chunks = chunks, + threads_load = threads_load, + threads_compute = threads_compute, + cluster = cluster, + autosubmit_suite_dir = autosubmit_suite_dir, + autosubmit_server = autosubmit_server, + silent = silent, debug = debug, wait = wait) + + } else { + # ecFlow or run locally + res <- ByChunks(step_fun = operation, + cube_headers = workflow$inputs, + chunks = chunks, + threads_load = threads_load, + threads_compute = threads_compute, + cluster = cluster, + ecflow_suite_dir = ecflow_suite_dir, + ecflow_server = ecflow_server, + silent = silent, debug = debug, wait = wait) + } + # TODO: carry out remaining steps locally, using multiApply # Return results res diff --git a/R/Utils.R b/R/Utils.R index 3d4d864..5bf8988 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -869,3 +869,144 @@ val <- withCallingHandlers(expr, warning = wHandler) list(value = val, warnings = myWarnings) } + +# This function writes startR_autosubmit.sh to local startR_autosubmit folder, under expID/ +write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { + # "chunks" should be the argument "chunks" in Compute() plus the redundant margin dims, + # e.g., list(dat = 1, var = 1, sdate = 1, time = 1, lat = 2, lon = 3) + + # Loop through chunks to create load script for each + for (n_chunk in 0:(prod(unlist(chunks)) - 1)) { + + # Create chunk args + chunk_names <- names(chunks) + chunk_args <- matrix(NA, 2, length(chunks)) + chunk_args[1, ] <- paste0('%JOBS.CHUNK_', n_chunk, '.', chunk_names, '%') + chunk_args[2, ] <- paste0('%JOBS.CHUNK_', n_chunk, '.', chunk_names, '_N%') + chunk_args <- paste0('(', paste(c(chunk_args), collapse = ' '), ')') + + #TODO: Change to the following line getting .sh template from package + # bash_script <- file(system.file('chunking/Autosubmit/startR_autosubmit.sh', + # package = 'startR')) + bash_script_template <- file("/home/Earth/aho/startR/inst/chunking/Autosubmit/startR_autosubmit.sh") + bash_script_lines <- readLines(bash_script_template) + close(bash_script_template) + + # Rewrite chunk_args= + bash_script_lines <- gsub('^chunk_args=*', paste0('chunk_args=', chunk_args), + bash_script_lines) + # Include init commands + bash_script_lines <- gsub('^include_init_commands', + paste0(paste0(cluster[['init_commands']], collapse = '\n'), '\n'), + + bash_script_lines) + # Rewrite include_module_load + bash_script_lines <- gsub('^include_module_load', + paste0('module load ', cluster[['r_module']]), + bash_script_lines) + + # Save modified .sh file under local$PROJECT_PATH in expdef.yml + #NOTE: dest_dir is ecflow_suite_dir_suite in ByChunks_autosubmit() + #NOTE: the file will be copied to proj/ by "autosubmit create" + dest_dir <- file.path(autosubmit_suite_dir, paste0("/STARTR_CHUNKING_", cluster$expid)) + + if (!file.exists(dest_dir)) { + dir.create(savefile_path, recursive = TRUE) + } + writeLines(bash_script_lines, paste0(dest_dir, '/startR_autosubmit_', n_chunk, '.sh')) + } +} + +# This function generates the .yml files under autosubmit conf/ +write_autosubmit_conf <- function(chunks, cluster, autosubmit_suite_dir) { + #TODO: Remove this + library(configr) + # "chunks" is from Compute() input, e.g., chunks <- list(lat = 2, lon = 3) + # "cluster" is the argument "cluster" in Compute(), to set machine configuration + # "autosubmit_suite_dir" should be the local folder that has R script, like ecflow_suite_dir in Compute() + + # Get config template files from package + #TODO: Change to package path +# template_dir <- system.file('chunking/Autosubmit/', package = 'startR') + template_dir <- "/home/Earth/aho/startR/inst/chunking/Autosubmit/" + config_files <- list.files(template_dir, pattern = "*\\.yml") + + for (i_file in config_files) { + + conf <- configr::read.config(file.path(template_dir, i_file)) + conf_type <- strsplit(i_file, split = "[.]")[[1]][1] + +############################################################ + if (conf_type == "autosubmit") { + + #Q: Should it be the total amount of chunk? + conf$config$MAXWAITINGJOBS <- as.integer(prod(unlist(chunks))) # total amount of chunk + conf$config$TOTALJOBS <- as.integer(cluster$max_jobs) + +############################################################ + } else if (conf_type == "expdef") { + conf$default$EXPID <- cluster$expid + conf$default$HPCARCH <- cluster$queue_host + # PROJECT_PATH should be where submit.sh and load....R stored --> local startR_autosubmit folder, under expID/ + conf$local$PROJECT_PATH <- file.path(autosubmit_suite_dir, paste0("STARTR_CHUNKING_", cluster$expid)) + +############################################################ + } else if (conf_type == "jobs") { + + chunks_vec <- lapply(lapply(chunks, seq, 1), rev) # list(lat = 1:2, lon = 1:3) + chunk_df <- expand.grid(chunks_vec) + nchunks <- nrow(chunk_df) + chunk_name <- paste0("CHUNK_", 0:(nchunks - 1)) + + # Fill in common configurations + jobs <- conf$JOBS + # wallclock from '01:00:00' to '01:00' + jobs[[1]]$WALLCLOCK <- substr(cluster$job_wallclock, 1, 5) + jobs[[1]]$PLATFORM <- cluster$queue_host + #Q: Is it cores_per_job? + jobs[[1]]$THREADS <- as.integer(cluster$cores_per_job) + jobs[[1]][paste0(names(chunks), "_N")] <- as.integer(unlist(chunks)) + jobs[[1]][names(chunks)] <- "" + + # Create chunks and fill in info for each chunk + if (nchunks > 1) { + jobs <- c(jobs, rep(jobs, nchunks - 1)) + names(jobs) <- chunk_name + } + for (i_chunk in 1:nchunks) { + jobs[[i_chunk]][names(chunks)] <- chunk_df[i_chunk, ] + jobs[[i_chunk]]$FILE <- paste0('startR_autosubmit_', i_chunk - 1, '.sh') + } + + conf$JOBS <- jobs + +############################################################ + } else if (conf_type == "platforms") { + if (tolower(cluster$queue_host) != "local") { + conf$Platforms[[cluster$queue_host]]$USER <- cluster$hpc_user + conf$Platforms[[cluster$queue_host]]$PROCESSORS_PER_NODE <- as.integer(cluster$cores_per_job) + if (!is.null(cluster$extra_queue_params)) { + tmp <- unlist(cluster$extra_queue_params) + for (ii in 1:length(tmp)) { + tmp[ii] <- paste0('\"', tmp[ii], '\"') + } + conf$Platforms[[cluster$queue_host]]$CUSTOM_DIRECTIVES <- paste0('[ ', paste(tmp, collapse = ','), ' ]') + } + } + +############################################################ + } else { + stop("File ", i_file, " is not considered in this function.") + } + +############################################################ + # Output directory + dest_dir <- paste0("/esarchive/autosubmit/", cluster$expid, "/conf/") + dest_file <- paste0(conf_type, "_", cluster$expid, ".yml") + + # Write config file inside autosubmit dir + write.config(conf, paste0(dest_dir, dest_file), write.type = "yaml") + Sys.chmod(paste0(dest_dir, dest_file), mode = "755", use_umask = F) + + } # for loop each file +} diff --git a/inst/chunking/Autosubmit/autosubmit.yml b/inst/chunking/Autosubmit/autosubmit.yml new file mode 100644 index 0000000..8b129a0 --- /dev/null +++ b/inst/chunking/Autosubmit/autosubmit.yml @@ -0,0 +1,16 @@ +config: + AUTOSUBMIT_VERSION: 4.0.0b0 + MAXWAITINGJOBS: # Should it be the total amount of chunk? + TOTALJOBS: + SAFETYSLEEPTIME: 10 + RETRIALS: 0 +#wrappers: +# wrapper_sim: +# TYPE: "vertical" +# JOBS_IN_WRAPPER: "SIM" +mail: + NOTIFICATIONS: False + TO: +storage: + TYPE: "pkl" + COPY_REMOTE_LOGS: True diff --git a/inst/chunking/Autosubmit/expdef.yml b/inst/chunking/Autosubmit/expdef.yml new file mode 100644 index 0000000..a97141c --- /dev/null +++ b/inst/chunking/Autosubmit/expdef.yml @@ -0,0 +1,33 @@ +default: + EXPID: #a659 + HPCARCH: #nord3v2, local +experiment: + DATELIST: 20220401 + MEMBERS: "fc0" + CHUNKSIZEUNIT: month + CHUNKSIZE: 4 + NUMCHUNKS: 2 + CHUNKINI: '' + CALENDAR: standard +project: + PROJECT_TYPE: local + PROJECT_DESTINATION: '' +git: + PROJECT_ORIGIN: '' #https://xxx + PROJECT_BRANCH: '' #master + PROJECT_COMMIT: '' + PROJECT_SUBMODULES: '' + FETCH_SINGLE_BRANCH: True +svn: + PROJECT_URL: '' + PROJECT_REVISION: '' +local: + PROJECT_PATH: #'/esarchive/scratch/aho/tmp/startR_as/my_project' +project_files: + FILE_PROJECT_CONF: '' + FILE_JOBS_CONF: '' + JOB_SCRIPTS_TYPE: '' +rerun: + RERUN: FALSE + RERUN_JOBLIST: '' +#Q: Are these all needed and correct? diff --git a/inst/chunking/Autosubmit/jobs.yml b/inst/chunking/Autosubmit/jobs.yml new file mode 100644 index 0000000..3ff4d0b --- /dev/null +++ b/inst/chunking/Autosubmit/jobs.yml @@ -0,0 +1,9 @@ +JOBS: + CHUNK_0: + PLATFORM: #LOCAL + RUNNING: once + WALLCLOCK: #00:05 + THREADS: + FILE: startR_autosubmit.sh #templates/sleep_5.sh +# DIM: +# DIM_N: diff --git a/inst/chunking/Autosubmit/load_process_save_chunk_AS.R b/inst/chunking/Autosubmit/load_process_save_chunk_AS.R new file mode 100644 index 0000000..f7211d5 --- /dev/null +++ b/inst/chunking/Autosubmit/load_process_save_chunk_AS.R @@ -0,0 +1,132 @@ +lib_dir <- +if (!is.null(lib_dir)) { + if (!dir.exists(lib_dir)) { + stop("The specified 'lib_dir' does not exist.") + } + .libPaths(new = lib_dir) +} +library(startR) + +out_dir <- + +debug <- +start_calls <- +start_calls_attrs <- +param_dimnames <- +fun <- +params <- +threads_load <- +threads_compute <- + +task_path <- commandArgs(TRUE)[2] + +args <- as.integer(commandArgs(TRUE)[-c(1, 2)]) + +total_specified_dims <- length(args) / 2 +chunk_indices <- args[((1:total_specified_dims) - 1) * 2 + 1] +names(chunk_indices) <- param_dimnames +chunks <- as.list(args[((1:total_specified_dims) - 1) * 2 + 2]) +names(chunks) <- param_dimnames + +t_begin_load <- Sys.time() +data <- vector('list', length(start_calls)) +for (input in 1:length(data)) { + start_call <- start_calls[[input]] + call_dims <- names(start_calls_attrs[[input]][['Dimensions']]) + dims_to_alter <- which(call_dims %in% param_dimnames) + names_dims_to_alter <- call_dims[dims_to_alter] + # If any dimension comes from split dimensions + split_dims <- start_calls_attrs[[input]][['SplitDims']] + for (k in 1:length(split_dims)) { + if (any(names(split_dims[[k]]) %in% names_dims_to_alter)) { + chunks_split_dims <- rep(1, length(split_dims[[k]])) + names(chunks_split_dims) <- names(split_dims[[k]]) + chunks_indices_split_dims <- chunks_split_dims + split_dims_to_alter <- which(names(split_dims[[k]]) %in% names_dims_to_alter) + chunks_split_dims[split_dims_to_alter] <- unlist(chunks[names(split_dims[[k]])[split_dims_to_alter]]) + chunks_indices_split_dims[split_dims_to_alter] <- chunk_indices[names(split_dims[[k]])[split_dims_to_alter]] + start_call[[names(split_dims)[k]]] <- startR:::.chunk(chunks_indices_split_dims, chunks_split_dims, + eval(start_call[[names(split_dims)[k]]])) + dims_to_alter_to_remove <- which(names_dims_to_alter %in% names(split_dims[[k]])) + if (length(dims_to_alter_to_remove) > 0) { + dims_to_alter <- dims_to_alter[-dims_to_alter_to_remove] + names_dims_to_alter <- names_dims_to_alter[-dims_to_alter_to_remove] + } + } + } + if (length(dims_to_alter) > 0) { + for (call_dim in names_dims_to_alter) { + start_call[[call_dim]] <- startR:::.chunk(chunk_indices[call_dim], chunks[[call_dim]], + eval(start_call[[call_dim]])) + } + } + if (!('num_procs' %in% names(start_call))) { + start_call[['num_procs']] <- threads_load + } + # Creates a name for the temporal file using the chunks numbers: + ## ecFlow should be like "_4737920362_1_1_1_1_1_1_" + ## autosubmit should be like "a659_1_1_1_1_1_1" + + nameMemoryObject <- paste0(task_path, '_', paste(chunk_indices, collapse='_')) #task_path is EXPID actually + + start_call[['ObjectBigmemory']] <- nameMemoryObject + data[[input]] <- tryCatch(eval(start_call), + # Handler when an error occurs: + error = function(e) { + message(paste("The data cannot be loaded.")) + message("See the original error message:") + message(e) + message("\n Current files in /dev/shm:") + noreturn <- lapply(list.files("/dev/shm"), function (x) { + info <- file.info(paste0("/dev/shm/", x)) + message(paste("file:", rownames(info), + "size:", info$size, + "uname:", info$uname))}) + message(getwd()) + file.remove(nameMemoryObject) + file.remove(paste0(nameMemoryObject, ".desc")) + message(paste("Files", nameMemoryObject, "has been removed.")) + stop("The job has failed while loading data. See original error reported above.") + }) + warning(attributes(data[[input]])$ObjectBigmemory) +} +t_end_load <- Sys.time() +t_load <- as.numeric(difftime(t_end_load, t_begin_load, units = 'secs')) + +t_begin_compute <- Sys.time() +if (!is.null(attr(fun, 'UseLibraries'))) { + for (i in seq_along(attr(fun, 'UseLibraries'))) { + require(attr(fun, 'UseLibraries')[i], character.only = TRUE) + } +} +chunk_indices_apply <- setNames(as.integer(chunk_indices), names(chunk_indices)) +chunk_indices_apply <- chunk_indices_apply[names(chunks)[which(chunks > 1)]] +Apply <- multiApply::Apply +res <- do.call("Apply", + c( + list(data, + target_dims = attr(fun, 'TargetDims'), + fun = fun, + output_dims = attr(fun, 'OutputDims'), + use_attributes = attr(fun, 'UseAttributes'), + extra_info = list(chunk_indices = chunk_indices_apply), + ncores = threads_compute), + params + ) + ) +rm(data) +gc() + +for (component in names(res)) { + filename <- paste0(component, '__') + for (i in 1:total_specified_dims) { + filename <- paste0(filename, param_dimnames[i], '_', chunk_indices[i], '__') + } + # Saving in a temporary file, then renaming. This way, the polling mechanism + # won't transfer back results before the save is completed. + saveRDS(res[[component]], file = paste0(out_dir, '/', filename, '.Rds.tmp')) + file.rename(paste0(out_dir, '/', filename, '.Rds.tmp'), + paste0(out_dir, '/', filename, '.Rds')) +} +rm(res) +gc() diff --git a/inst/chunking/Autosubmit/platforms.yml b/inst/chunking/Autosubmit/platforms.yml new file mode 100644 index 0000000..f8d8f70 --- /dev/null +++ b/inst/chunking/Autosubmit/platforms.yml @@ -0,0 +1,14 @@ +Platforms: + nord3: + TYPE: SLURM + HOST: nord4.bsc.es #Q: Should we have more login nodes? + PROJECT: bsc32 + ADD_PROJECT_TO_HOST: "false" + USER: #bsc32734 + PROCESSORS_PER_NODE: #16 + SERIAL_QUEUE: debug + QUEUE: bsc_es + SCRATCH_DIR: /gpfs/scratch + CUSTOM_DIRECTIVES: # "['#SBATCH --exclusive']" "['#SBATCH --constraint=medmem']" +# MAX_WALLCLOCK: '48:00' +#Q: ARE THESE SETTING CORRECT? diff --git a/inst/chunking/Autosubmit/startR_autosubmit.sh b/inst/chunking/Autosubmit/startR_autosubmit.sh new file mode 100644 index 0000000..99347ea --- /dev/null +++ b/inst/chunking/Autosubmit/startR_autosubmit.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +######## AUTOSUBMIT INPUTS ####### +proj_dir=%PROJDIR% +task_path=%DEFAULT.EXPID% +chunknum=%JOBNAME% #e.g., a68h_CHUNK_0 +chunknum="${chunknum:5}" #e.g., CHUNK_0 +################################## + +# Modified by write_bash.R +# e.g., chunk_args=(%JOBS."${chunknum}".dat% %JOBS."${chunknum}".dat_N% %JOBS."${chunknum}".var% %JOBS."${chunknum}".var_N% %JOBS."${chunknum}".sdate% %JOBS."${chunknum}".sdate_N%) +chunk_args= + +include_init_commands +include_module_load + +#Should move to the path that has load_process_save_chunk_AS.R +cd ${proj_dir} +#cd /esarchive/autosubmit/%EXPID%/proj/STARTR_CHUNKING_${task_path} + +#Q: Which path under /autosubmit/ can save random things? + +#e.g., Rscript load_process_save_chunk.R --args $task_path 1 1 1 1 2 2 1 1 1 2 1 2 +Rscript load_process_save_chunk_AS.R --args ${task_path} ${chunk_args[@]} + -- GitLab From 79c6b357d9c387c48c1f425bae1e18df26c5e9d1 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 27 Jul 2023 17:30:03 +0200 Subject: [PATCH 02/32] Change testing phase path to /esarchive/scratch/ --- R/ByChunks_autosubmit.R | 6 ++---- R/Utils.R | 4 ++-- inst/chunking/Autosubmit/expdef.yml | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index a246676..82773e6 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -450,8 +450,8 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', if (on_cluster) { # Copy load_process_save_chunk_AS.R into local folder #TODO: Change the following line to read from package - chunk_script <- file("/esarchive/scratch/aho/git/aho-testtest/startR/autosubmit/load_process_save_chunk_AS.R") -# chunk_script <- file(system.file('chunking/autosubmit/load_process_save_chunk_AS.R', + chunk_script <- file("/esarchive/scratch/aho/git/startR/inst/chunking/Autosubmit/load_process_save_chunk_AS.R") +# chunk_script <- file(system.file('chunking/Autosubmit/load_process_save_chunk_AS.R', # package = 'startR')) chunk_script_lines <- readLines(chunk_script) close(chunk_script) @@ -509,8 +509,6 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', write_autosubmit_bash(chunks, cluster, autosubmit_suite_dir = autosubmit_suite_dir) # Modify conf files from template and rewrite to /esarchive/autosubmit/expid/conf/ - #TODO: remove source() and put function under R/ or just below ByChunks_autosubmit() - source("/esarchive/scratch/aho/git/aho-testtest/startR/autosubmit/write_autosubmit_conf.R") write_autosubmit_conf(chunks, cluster, autosubmit_suite_dir) } # if on_cluster diff --git a/R/Utils.R b/R/Utils.R index 5bf8988..fe49148 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -888,7 +888,7 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { #TODO: Change to the following line getting .sh template from package # bash_script <- file(system.file('chunking/Autosubmit/startR_autosubmit.sh', # package = 'startR')) - bash_script_template <- file("/home/Earth/aho/startR/inst/chunking/Autosubmit/startR_autosubmit.sh") + bash_script_template <- file("/esarchive/scratch/aho/startR/inst/chunking/Autosubmit/startR_autosubmit.sh") bash_script_lines <- readLines(bash_script_template) close(bash_script_template) @@ -928,7 +928,7 @@ write_autosubmit_conf <- function(chunks, cluster, autosubmit_suite_dir) { # Get config template files from package #TODO: Change to package path # template_dir <- system.file('chunking/Autosubmit/', package = 'startR') - template_dir <- "/home/Earth/aho/startR/inst/chunking/Autosubmit/" + template_dir <- "/esarchive/scratch/aho/startR/inst/chunking/Autosubmit/" config_files <- list.files(template_dir, pattern = "*\\.yml") for (i_file in config_files) { diff --git a/inst/chunking/Autosubmit/expdef.yml b/inst/chunking/Autosubmit/expdef.yml index a97141c..624040d 100644 --- a/inst/chunking/Autosubmit/expdef.yml +++ b/inst/chunking/Autosubmit/expdef.yml @@ -22,7 +22,7 @@ svn: PROJECT_URL: '' PROJECT_REVISION: '' local: - PROJECT_PATH: #'/esarchive/scratch/aho/tmp/startR_as/my_project' + PROJECT_PATH: #'/home/Earth/aho/startR_local_autosubmit/' project_files: FILE_PROJECT_CONF: '' FILE_JOBS_CONF: '' -- GitLab From 3679ac082e94ce5f618561aea2b6059a9eb10276 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 27 Jul 2023 17:35:00 +0200 Subject: [PATCH 03/32] Correct path --- R/Utils.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/Utils.R b/R/Utils.R index fe49148..31f4ed7 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -888,7 +888,7 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { #TODO: Change to the following line getting .sh template from package # bash_script <- file(system.file('chunking/Autosubmit/startR_autosubmit.sh', # package = 'startR')) - bash_script_template <- file("/esarchive/scratch/aho/startR/inst/chunking/Autosubmit/startR_autosubmit.sh") + bash_script_template <- file("/esarchive/scratch/aho/git/startR/inst/chunking/Autosubmit/startR_autosubmit.sh") bash_script_lines <- readLines(bash_script_template) close(bash_script_template) @@ -928,7 +928,7 @@ write_autosubmit_conf <- function(chunks, cluster, autosubmit_suite_dir) { # Get config template files from package #TODO: Change to package path # template_dir <- system.file('chunking/Autosubmit/', package = 'startR') - template_dir <- "/esarchive/scratch/aho/startR/inst/chunking/Autosubmit/" + template_dir <- "/esarchive/scratch/aho/git/startR/inst/chunking/Autosubmit/" config_files <- list.files(template_dir, pattern = "*\\.yml") for (i_file in config_files) { -- GitLab From 860a1fce225a3869d7d359f7ab369fbadc5d930b Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 28 Jul 2023 15:21:22 +0200 Subject: [PATCH 04/32] Reorganize chunking folder; rename ByChunks() and load_process_save_chunk.R --- R/ByChunks_autosubmit.R | 8 +-- R/{ByChunks.R => ByChunks_ecflow.R} | 61 +++++++++++++------ R/Compute.R | 18 +++--- ...R => load_process_save_chunk_autosubmit.R} | 0 inst/chunking/Autosubmit/startR_autosubmit.sh | 6 +- inst/chunking/{ => ecFlow}/Chunk.ecf | 2 +- inst/chunking/{ => ecFlow}/clean_devshm.sh | 0 inst/chunking/{ => ecFlow}/head.h | 0 .../load_process_save_chunk_ecflow.R} | 0 inst/chunking/{ => ecFlow}/lsf.h | 0 inst/chunking/{ => ecFlow}/pbs.h | 0 inst/chunking/{ => ecFlow}/slurm.h | 0 inst/chunking/{ => ecFlow}/tail.h | 0 13 files changed, 58 insertions(+), 37 deletions(-) rename R/{ByChunks.R => ByChunks_ecflow.R} (93%) rename inst/chunking/Autosubmit/{load_process_save_chunk_AS.R => load_process_save_chunk_autosubmit.R} (100%) rename inst/chunking/{ => ecFlow}/Chunk.ecf (82%) rename inst/chunking/{ => ecFlow}/clean_devshm.sh (100%) rename inst/chunking/{ => ecFlow}/head.h (100%) rename inst/chunking/{load_process_save_chunk.R => ecFlow/load_process_save_chunk_ecflow.R} (100%) rename inst/chunking/{ => ecFlow}/lsf.h (100%) rename inst/chunking/{ => ecFlow}/pbs.h (100%) rename inst/chunking/{ => ecFlow}/slurm.h (100%) rename inst/chunking/{ => ecFlow}/tail.h (100%) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 82773e6..1d0a84b 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -448,10 +448,10 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', } if (on_cluster) { - # Copy load_process_save_chunk_AS.R into local folder + # Copy load_process_save_chunk_autosubmit.R into local folder #TODO: Change the following line to read from package - chunk_script <- file("/esarchive/scratch/aho/git/startR/inst/chunking/Autosubmit/load_process_save_chunk_AS.R") -# chunk_script <- file(system.file('chunking/Autosubmit/load_process_save_chunk_AS.R', + chunk_script <- file("/esarchive/scratch/aho/git/startR/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R") +# chunk_script <- file(system.file('chunking/Autosubmit/load_process_save_chunk_autosubmit.R', # package = 'startR')) chunk_script_lines <- readLines(chunk_script) close(chunk_script) @@ -503,7 +503,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', chunk_script_lines) chunk_script_lines <- gsub('^params <- *', paste0('params <- ', paste(deparse(list(...)), collapse = '\n')), chunk_script_lines) - writeLines(chunk_script_lines, paste0(autosubmit_suite_dir_suite, '/load_process_save_chunk_AS.R')) + writeLines(chunk_script_lines, paste0(autosubmit_suite_dir_suite, '/load_process_save_chunk_autosubmit.R')) # Write and copy startR_autosubmit.sh into local folder write_autosubmit_bash(chunks, cluster, autosubmit_suite_dir = autosubmit_suite_dir) diff --git a/R/ByChunks.R b/R/ByChunks_ecflow.R similarity index 93% rename from R/ByChunks.R rename to R/ByChunks_ecflow.R index 547b6ea..5dc306d 100644 --- a/R/ByChunks.R +++ b/R/ByChunks_ecflow.R @@ -82,13 +82,13 @@ #'@import multiApply #'@importFrom methods is #'@noRd -ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', - threads_load = 2, threads_compute = 1, - cluster = NULL, - ecflow_suite_dir = NULL, - ecflow_server = NULL, - silent = FALSE, debug = FALSE, - wait = TRUE) { +ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', + threads_load = 2, threads_compute = 1, + cluster = NULL, + ecflow_suite_dir = NULL, + ecflow_server = NULL, + silent = FALSE, debug = FALSE, + wait = TRUE) { # Build object to store profiling timings t_begin_total <- Sys.time() t_begin_bychunks_setup <- t_begin_total @@ -436,9 +436,11 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', ". Make sure passwordless ", "access is properly set in both directions.")) - # Copy load_process_save_chunk.R into shared folder - chunk_script <- file(system.file('chunking/load_process_save_chunk.R', - package = 'startR')) + # Copy load_process_save_chunk_ecflow.R into shared folder + #TODO: Change to package file + chunk_script <- file("/esarchive/scratch/aho/git/startR/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R") +# chunk_script <- file(system.file('chunking/ecFlow/load_process_save_chunk_ecflow.R', +# package = 'startR')) chunk_script_lines <- readLines(chunk_script) close(chunk_script) chunk_script_lines <- gsub('^lib_dir <- *', paste0('lib_dir <- ', @@ -488,11 +490,13 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', chunk_script_lines) chunk_script_lines <- gsub('^params <- *', paste0('params <- ', paste(deparse(list(...)), collapse = '\n')), chunk_script_lines) - writeLines(chunk_script_lines, paste0(ecflow_suite_dir_suite, '/load_process_save_chunk.R')) + writeLines(chunk_script_lines, paste0(ecflow_suite_dir_suite, '/load_process_save_chunk_ecflow.R')) # Copy Chunk.ecf into shared folder - chunk_ecf_script <- file(system.file('chunking/Chunk.ecf', - package = 'startR')) + #TODO: Change to package file + chunk_ecf_script <- file("/esarchive/scratch/aho/git/startR/inst/chunking/ecFlow/Chunk.ecf") +# chunk_ecf_script <- file(system.file('chunking/ecFlow/Chunk.ecf', +# package = 'startR')) chunk_ecf_script_lines <- readLines(chunk_ecf_script) close(chunk_ecf_script) if (cluster[['queue_type']] == 'host') { @@ -530,8 +534,8 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', # } else { # transfer_back_line <- '' # } - chunk_ecf_script_lines <- gsub('^Rscript load_process_save_chunk.R --args \\$task_path insert_indices', - paste0('Rscript load_process_save_chunk.R --args $task_path ', paste(ecf_vars, collapse = ' ')), + chunk_ecf_script_lines <- gsub('^Rscript load_process_save_chunk_ecflow.R --args \\$task_path insert_indices', + paste0('Rscript load_process_save_chunk_ecflow.R --args $task_path ', paste(ecf_vars, collapse = ' ')), chunk_ecf_script_lines) #chunk_ecf_script_lines <- gsub('^include_transfer_back_and_rm', transfer_back_line, chunk_ecf_script_lines) writeLines(chunk_ecf_script_lines, paste0(ecflow_suite_dir_suite, '/Chunk.ecf')) @@ -557,7 +561,9 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', # Copy queue header into shared folder #file.copy(system.file(paste0('chunking/', cluster[['queue_type']], '.h'), package = 'startR'), # ecflow_suite_dir_suite) - chunk_queue_header <- file(system.file(paste0('chunking/', cluster[['queue_type']], '.h'), package = 'startR')) + #TODO: Change to package file + chunk_queue_header <- file(paste0("/esarchive/scratch/aho/git/startR/inst/chunking/ecFlow/", cluster[['queue_type']], '.h')) +# chunk_queue_header <- file(system.file(paste0('chunking/ecFlow/', cluster[['queue_type']], '.h'), package = 'startR')) chunk_queue_header_lines <- readLines(chunk_queue_header) close(chunk_queue_header) chunk_queue_header_lines <- gsub('^include_extra_queue_params', @@ -566,10 +572,13 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', writeLines(chunk_queue_header_lines, paste0(ecflow_suite_dir_suite, '/', cluster[['queue_type']], '.h')) # Copy headers - file.copy(system.file('chunking/head.h', package = 'startR'), - ecflow_suite_dir_suite) - file.copy(system.file('chunking/tail.h', package = 'startR'), - ecflow_suite_dir_suite) + #TODO: Change to package file + file.copy(file("/esarchive/scratch/aho/git/startR/inst/chunking/ecFlow/head.h"), ecflow_suite_dir_suite) + file.copy(file("/esarchive/scratch/aho/git/startR/inst/chunking/ecFlow/tail.h"), ecflow_suite_dir_suite) +# file.copy(system.file('chunking/ecFlow/head.h', package = 'startR'), +# ecflow_suite_dir_suite) +# file.copy(system.file('chunking/ecFlow/tail.h', package = 'startR'), +# ecflow_suite_dir_suite) #file.copy(system.file('chunking/clean_devshm.sh', package = 'startR'), # ecflow_suite_dir_suite) } @@ -1008,3 +1017,15 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', } #TODO: check result dimensions match expected dimensions } + +ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', + threads_load = 2, threads_compute = 1, + cluster = NULL, + ecflow_suite_dir = NULL, + ecflow_server = NULL, + silent = FALSE, debug = FALSE, + wait = TRUE) { + + stop(.Deprecated("ByChunks_ecflow")) +} + diff --git a/R/Compute.R b/R/Compute.R index c9f7426..81f83e6 100644 --- a/R/Compute.R +++ b/R/Compute.R @@ -164,15 +164,15 @@ Compute <- function(workflow, chunks = 'auto', workflow_manager = 'ecFlow', } else { # ecFlow or run locally - res <- ByChunks(step_fun = operation, - cube_headers = workflow$inputs, - chunks = chunks, - threads_load = threads_load, - threads_compute = threads_compute, - cluster = cluster, - ecflow_suite_dir = ecflow_suite_dir, - ecflow_server = ecflow_server, - silent = silent, debug = debug, wait = wait) + res <- ByChunks_ecflow(step_fun = operation, + cube_headers = workflow$inputs, + chunks = chunks, + threads_load = threads_load, + threads_compute = threads_compute, + cluster = cluster, + ecflow_suite_dir = ecflow_suite_dir, + ecflow_server = ecflow_server, + silent = silent, debug = debug, wait = wait) } # TODO: carry out remaining steps locally, using multiApply diff --git a/inst/chunking/Autosubmit/load_process_save_chunk_AS.R b/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R similarity index 100% rename from inst/chunking/Autosubmit/load_process_save_chunk_AS.R rename to inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R diff --git a/inst/chunking/Autosubmit/startR_autosubmit.sh b/inst/chunking/Autosubmit/startR_autosubmit.sh index 99347ea..d0c16d5 100644 --- a/inst/chunking/Autosubmit/startR_autosubmit.sh +++ b/inst/chunking/Autosubmit/startR_autosubmit.sh @@ -14,12 +14,12 @@ chunk_args= include_init_commands include_module_load -#Should move to the path that has load_process_save_chunk_AS.R +#Should move to the path that has load_process_save_chunk_autosubmit.R cd ${proj_dir} #cd /esarchive/autosubmit/%EXPID%/proj/STARTR_CHUNKING_${task_path} #Q: Which path under /autosubmit/ can save random things? -#e.g., Rscript load_process_save_chunk.R --args $task_path 1 1 1 1 2 2 1 1 1 2 1 2 -Rscript load_process_save_chunk_AS.R --args ${task_path} ${chunk_args[@]} +#e.g., Rscript load_process_save_chunk_autosubmit.R --args $task_path 1 1 1 1 2 2 1 1 1 2 1 2 +Rscript load_process_save_chunk_autosubmit.R --args ${task_path} ${chunk_args[@]} diff --git a/inst/chunking/Chunk.ecf b/inst/chunking/ecFlow/Chunk.ecf similarity index 82% rename from inst/chunking/Chunk.ecf rename to inst/chunking/ecFlow/Chunk.ecf index 60bd051..5a265fb 100644 --- a/inst/chunking/Chunk.ecf +++ b/inst/chunking/ecFlow/Chunk.ecf @@ -12,7 +12,7 @@ set -vx cd %REMOTE_ECF_HOME% task_path=%REMOTE_ECF_HOME%/%ECF_NAME% -Rscript load_process_save_chunk.R --args $task_path insert_indices +Rscript load_process_save_chunk_ecflow.R --args $task_path insert_indices #include_transfer_back_and_rm #clean temporal folder diff --git a/inst/chunking/clean_devshm.sh b/inst/chunking/ecFlow/clean_devshm.sh similarity index 100% rename from inst/chunking/clean_devshm.sh rename to inst/chunking/ecFlow/clean_devshm.sh diff --git a/inst/chunking/head.h b/inst/chunking/ecFlow/head.h similarity index 100% rename from inst/chunking/head.h rename to inst/chunking/ecFlow/head.h diff --git a/inst/chunking/load_process_save_chunk.R b/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R similarity index 100% rename from inst/chunking/load_process_save_chunk.R rename to inst/chunking/ecFlow/load_process_save_chunk_ecflow.R diff --git a/inst/chunking/lsf.h b/inst/chunking/ecFlow/lsf.h similarity index 100% rename from inst/chunking/lsf.h rename to inst/chunking/ecFlow/lsf.h diff --git a/inst/chunking/pbs.h b/inst/chunking/ecFlow/pbs.h similarity index 100% rename from inst/chunking/pbs.h rename to inst/chunking/ecFlow/pbs.h diff --git a/inst/chunking/slurm.h b/inst/chunking/ecFlow/slurm.h similarity index 100% rename from inst/chunking/slurm.h rename to inst/chunking/ecFlow/slurm.h diff --git a/inst/chunking/tail.h b/inst/chunking/ecFlow/tail.h similarity index 100% rename from inst/chunking/tail.h rename to inst/chunking/ecFlow/tail.h -- GitLab From aed5a4f51ec9a466dfe3e037d0b093576968c352 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 28 Jul 2023 16:44:45 +0200 Subject: [PATCH 05/32] minor bugfix --- R/Collect.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/Collect.R b/R/Collect.R index 73e06b9..70b52db 100644 --- a/R/Collect.R +++ b/R/Collect.R @@ -394,8 +394,8 @@ Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE) { } else if (!wait) { stop("Computation in progress...") } else { - Sys.sleep(cluster[['polling_period']]) - message("Computation in progress, ", sum_received_chunks, " of ", num_outputs, " chunks are done...\n", + Sys.sleep(startr_exec$cluster[['polling_period']]) + message("Computation in progress, ", sum_received_chunks, " of ", prod(unlist(chunks)), " chunks are done...\n", "Check status on Autosubmit GUI: https://earth.bsc.es/autosubmitapp/experiment/", suite_id) # Sys.sleep(min(sqrt(attempt), 5)) } -- GitLab From 1bc09e11789895c2e2d8d669696f993e965a7e1c Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 31 Jul 2023 13:28:17 +0200 Subject: [PATCH 06/32] Add run_dir --- R/ByChunks_autosubmit.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 1d0a84b..f276284 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -192,7 +192,8 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', polling_period = 10, special_setup = 'none', expid = NULL, - hpc_user = NULL) + hpc_user = NULL, + run_dir = NULL) if (on_cluster) { if (!is.list(cluster)) { stop("Parameter 'cluster' must be a named list.") @@ -206,7 +207,8 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', 'ecflow_module', 'node_memory', 'cores_per_job', 'job_wallclock', 'max_jobs', 'extra_queue_params', 'bidirectional', - 'polling_period', 'special_setup', 'expid', 'hpc_user')))) { + 'polling_period', 'special_setup', 'expid', 'hpc_user', + 'run_dir')))) { stop("Found invalid component names in parameter 'cluster'.") } # Remove ecFlow components @@ -697,6 +699,9 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', sys_commands <- paste0("module load ", as_module, "; ", "autosubmit create ", suite_id, " -np; ", "autosubmit refresh ", suite_id, "; ") + if (!is.null(cluster$run_dir)) { + sys_commands <- paste0("cd ", cluster$run_dir, "; ", sys_commands) + } if (wait) { sys_commands <- paste0(sys_commands, "autosubmit run ", suite_id) } else { -- GitLab From 9786b0c45d461d2fe4160aeaaf9834a57c0f94fb Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 31 Jul 2023 14:33:02 +0200 Subject: [PATCH 07/32] Cannot have run_dir; autosubmit run under proj/ (PROJECT_PATH) automatically --- R/ByChunks_autosubmit.R | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index f276284..e0417e9 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -192,8 +192,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', polling_period = 10, special_setup = 'none', expid = NULL, - hpc_user = NULL, - run_dir = NULL) + hpc_user = NULL) if (on_cluster) { if (!is.list(cluster)) { stop("Parameter 'cluster' must be a named list.") @@ -207,8 +206,8 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', 'ecflow_module', 'node_memory', 'cores_per_job', 'job_wallclock', 'max_jobs', 'extra_queue_params', 'bidirectional', - 'polling_period', 'special_setup', 'expid', 'hpc_user', - 'run_dir')))) { + 'polling_period', 'special_setup', 'expid', 'hpc_user' + )))) { stop("Found invalid component names in parameter 'cluster'.") } # Remove ecFlow components @@ -699,9 +698,6 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', sys_commands <- paste0("module load ", as_module, "; ", "autosubmit create ", suite_id, " -np; ", "autosubmit refresh ", suite_id, "; ") - if (!is.null(cluster$run_dir)) { - sys_commands <- paste0("cd ", cluster$run_dir, "; ", sys_commands) - } if (wait) { sys_commands <- paste0(sys_commands, "autosubmit run ", suite_id) } else { -- GitLab From 6fe80d79012c58d977f9a2da0a4524bbc8ee72f0 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 3 Aug 2023 16:09:28 +0200 Subject: [PATCH 08/32] Use yaml instead of configr --- R/Utils.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/Utils.R b/R/Utils.R index 31f4ed7..69a8e03 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -929,11 +929,11 @@ write_autosubmit_conf <- function(chunks, cluster, autosubmit_suite_dir) { #TODO: Change to package path # template_dir <- system.file('chunking/Autosubmit/', package = 'startR') template_dir <- "/esarchive/scratch/aho/git/startR/inst/chunking/Autosubmit/" - config_files <- list.files(template_dir, pattern = "*\\.yml") + config_files <- list.files(template_dir, pattern = "*\\.yml$") for (i_file in config_files) { - conf <- configr::read.config(file.path(template_dir, i_file)) + conf <- yaml::read_yaml(file.path(template_dir, i_file)) conf_type <- strsplit(i_file, split = "[.]")[[1]][1] ############################################################ @@ -1005,7 +1005,7 @@ write_autosubmit_conf <- function(chunks, cluster, autosubmit_suite_dir) { dest_file <- paste0(conf_type, "_", cluster$expid, ".yml") # Write config file inside autosubmit dir - write.config(conf, paste0(dest_dir, dest_file), write.type = "yaml") + yaml::write_yaml(conf, paste0(dest_dir, dest_file)) Sys.chmod(paste0(dest_dir, dest_file), mode = "755", use_umask = F) } # for loop each file -- GitLab From 8e5b55bee2487a2d335c2530d7753229ff3b68a5 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 3 Aug 2023 16:10:26 +0200 Subject: [PATCH 09/32] Add yaml to Suggest --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2d569fe..173baca 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -35,7 +35,8 @@ Imports: Suggests: stats, utils, - testthat + testthat, + yaml License: GPL-3 URL: https://earth.bsc.es/gitlab/es/startR/ BugReports: https://earth.bsc.es/gitlab/es/startR/-/issues -- GitLab From 2e9c6d70e9049676c7b946b61cbd08d9c3227923 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 3 Aug 2023 19:37:08 +0200 Subject: [PATCH 10/32] Add check to see if jobs failed. If yes, stop the function --- R/ByChunks_autosubmit.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index e0417e9..5d0bfc4 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -718,6 +718,10 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', stop("Cannot identify host", Sys.getenv("HOSTNAME"), ". Where to run AS exp?") } + # Check the size of tmp/ASLOGS/jobs_failed_status.log. If it is not 0, the jobs failed. + failed_file_size <- system(paste0("du /esarchive/autosubmit/", suite_id, "/tmp/ASLOGS/jobs_failed_status.log"), intern = T) + if (substr(failed_file_size, 1, 1) != 0) stop("Some Autosubmit jobs failed. Check GUI and logs.") + timings[['total']] <- t_begin_total startr_exec <- list(cluster = cluster, workflow_manager = 'autosubmit', suite_id = suite_id, chunks = chunks, -- GitLab From 25eeb54de91abd089d026969660fc105a226403a Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 4 Aug 2023 10:00:59 +0200 Subject: [PATCH 11/32] Change threads_load default to 1 (be consistent with documentation) --- R/ByChunks_autosubmit.R | 2 +- R/ByChunks_ecflow.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 5d0bfc4..094d77b 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -83,7 +83,7 @@ #'@importFrom methods is #'@noRd ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', - threads_load = 2, threads_compute = 1, + threads_load = 1, threads_compute = 1, cluster = NULL, autosubmit_suite_dir = NULL, autosubmit_server = NULL, silent = FALSE, debug = FALSE, wait = TRUE) { diff --git a/R/ByChunks_ecflow.R b/R/ByChunks_ecflow.R index 5dc306d..74f0932 100644 --- a/R/ByChunks_ecflow.R +++ b/R/ByChunks_ecflow.R @@ -83,7 +83,7 @@ #'@importFrom methods is #'@noRd ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', - threads_load = 2, threads_compute = 1, + threads_load = 1, threads_compute = 1, cluster = NULL, ecflow_suite_dir = NULL, ecflow_server = NULL, -- GitLab From 64f9d736c8bfed30e573ca9e3df59dd7e27f3e2f Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 8 Aug 2023 13:13:31 +0200 Subject: [PATCH 12/32] Remove bigmemory files after failure; remove folder after job finished; add 'run_dir' --- R/ByChunks_autosubmit.R | 33 +++++++++++++++---- R/Collect.R | 30 +++++++++++++---- R/Utils.R | 13 +++++++- inst/chunking/Autosubmit/startR_autosubmit.sh | 10 +++--- 4 files changed, 67 insertions(+), 19 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 094d77b..70e750e 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -154,12 +154,10 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', if (!is.character(autosubmit_suite_dir)) { stop("Parameter 'autosubmit_suite_dir' must be a character string.") } -#----------NEW----------- autosubmit_suite_dir_suite <- paste0(autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') if (!dir.exists(autosubmit_suite_dir_suite)) { dir.create(autosubmit_suite_dir_suite, recursive = TRUE) } -#--------NEW_END---------- if (!dir.exists(autosubmit_suite_dir_suite)) { stop("Could not find or create the directory in parameter 'autosubmit_suite_dir'.") } @@ -172,7 +170,6 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', } } -#----------NEW----------- # Check cluster default_cluster <- list(queue_host = NULL, # queue_type = 'slurm', @@ -192,7 +189,10 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', polling_period = 10, special_setup = 'none', expid = NULL, - hpc_user = NULL) + hpc_user = NULL, +#------------NEW----------- + run_dir = NULL) +#---------NEW_END-------------- if (on_cluster) { if (!is.list(cluster)) { stop("Parameter 'cluster' must be a named list.") @@ -206,7 +206,10 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', 'ecflow_module', 'node_memory', 'cores_per_job', 'job_wallclock', 'max_jobs', 'extra_queue_params', 'bidirectional', - 'polling_period', 'special_setup', 'expid', 'hpc_user' + 'polling_period', 'special_setup', 'expid', 'hpc_user', +#------------NEW----------- + 'run_dir' +#---------NEW_END-------------- )))) { stop("Found invalid component names in parameter 'cluster'.") } @@ -720,7 +723,25 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', # Check the size of tmp/ASLOGS/jobs_failed_status.log. If it is not 0, the jobs failed. failed_file_size <- system(paste0("du /esarchive/autosubmit/", suite_id, "/tmp/ASLOGS/jobs_failed_status.log"), intern = T) - if (substr(failed_file_size, 1, 1) != 0) stop("Some Autosubmit jobs failed. Check GUI and logs.") +#------------NEW---------------- + if (substr(failed_file_size, 1, 1) != 0) { + # Remove bigmemory objects (e.g., a68h_1_1 and a68h_1_1.desc) if they exist + # If run_dir is specified, the files are under run_dir; if not, files are under proj/STARTR_CHUNKING_xxxx/ + if (!is.null(cluster[['run_dir']])) { + file.remove( + file.path(run_dir, + list.files(run_dir)[grepl(paste0("^", suite_id, "_.*"), list.files(run_dir))]) + ) + } else { + file.remove( + file.path(remote_autosubmit_suite_dir_suite, + list.files(remote_autosubmit_suite_dir_suite)[grepl(paste0("^", suite_id, "_.*"), list.files(remote_autosubmit_suite_dir_suite))]) + ) + } + + stop("Some Autosubmit jobs failed. Check GUI and logs.") + } +#-----------NEW_END---------- timings[['total']] <- t_begin_total startr_exec <- list(cluster = cluster, workflow_manager = 'autosubmit', diff --git a/R/Collect.R b/R/Collect.R index 70b52db..73eaa07 100644 --- a/R/Collect.R +++ b/R/Collect.R @@ -19,8 +19,10 @@ #' merged array. The default value is TRUE. #'@param remove A logical value deciding whether to remove of all data results #' received from the HPC (and stored under 'ecflow_suite_dir', the parameter in -#' Compute()) after being collected. To preserve the data and Collect() it as -#' many times as desired, set remove to FALSE. The default value is TRUE. +#' Compute()) after being collected, as well as the local job folder under +#' 'ecflow_suite_dir' or 'autosubmit_suite_dir'. To preserve the data and +#' Collect() it as many times as desired, set remove to FALSE. The default +#' value is TRUE. #'@return A list of merged data array. #' #'@examples @@ -382,7 +384,7 @@ Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE) { autosubmit_suite_dir_suite <- paste0(autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') remote_autosubmit_suite_dir <- file.path("/esarchive/autosubmit/", suite_id, 'proj') remote_autosubmit_suite_dir_suite <- paste0(remote_autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') - + run_dir <- startr_exec[['run_dir']] done <- FALSE sum_received_chunks <- sum(grepl('.*\\.Rds$', list.files(remote_autosubmit_suite_dir_suite))) @@ -406,13 +408,27 @@ Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE) { if (remove) { .warning("ATTENTION: The source chunks will be removed from the ", "system. Store the result after Collect() ends if needed.") +#--------NEW---------- + unlink(paste0(autosubmit_suite_dir_suite), + recursive = TRUE) +#---------NEW_END-------------- } # Remove bigmemory objects (e.g., a68h_1_1_1_1_1 and a68h_1_1_1_1_1.desc) - file.remove( - file.path(remote_autosubmit_suite_dir_suite, - list.files(remote_autosubmit_suite_dir_suite)[grepl(paste0("^", suite_id, "_.*"), list.files(remote_autosubmit_suite_dir_suite))]) - ) +#-----------NEW------------ + # If run_dir is specified, the files are under run_dir; if not, files are under proj/STARTR_CHUNKING_xxxx/ + if (!is.null(run_dir)) { + file.remove( + file.path(run_dir, + list.files(run_dir)[grepl(paste0("^", suite_id, "_.*"), list.files(run_dir))]) + ) + } else { + file.remove( + file.path(remote_autosubmit_suite_dir_suite, + list.files(remote_autosubmit_suite_dir_suite)[grepl(paste0("^", suite_id, "_.*"), list.files(remote_autosubmit_suite_dir_suite))]) + ) + } +#---------NEW_END---------- return(result) } diff --git a/R/Utils.R b/R/Utils.R index 69a8e03..37fb6f2 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -904,7 +904,18 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { bash_script_lines <- gsub('^include_module_load', paste0('module load ', cluster[['r_module']]), bash_script_lines) - +#----------NEW--------- + # Rewrite cd run_dir + # If run_dir is not specified, the script will run under ${proj_dir} + if (!is.null(cluster[['run_dir']])) { + bash_script_lines <- gsub('^cd_run_dir', + paste0('cd ', cluster[['run_dir']]), + bash_script_lines) + } else { + bash_script_lines <- gsub('^cd_run_dir', '#cd_run_dir', + bash_script_lines) + } +#---------NEW_END---------- # Save modified .sh file under local$PROJECT_PATH in expdef.yml #NOTE: dest_dir is ecflow_suite_dir_suite in ByChunks_autosubmit() #NOTE: the file will be copied to proj/ by "autosubmit create" diff --git a/inst/chunking/Autosubmit/startR_autosubmit.sh b/inst/chunking/Autosubmit/startR_autosubmit.sh index d0c16d5..63d9e61 100644 --- a/inst/chunking/Autosubmit/startR_autosubmit.sh +++ b/inst/chunking/Autosubmit/startR_autosubmit.sh @@ -14,12 +14,12 @@ chunk_args= include_init_commands include_module_load -#Should move to the path that has load_process_save_chunk_autosubmit.R -cd ${proj_dir} -#cd /esarchive/autosubmit/%EXPID%/proj/STARTR_CHUNKING_${task_path} +##Should move to the path that has load_process_save_chunk_autosubmit.R +#cd ${proj_dir} -#Q: Which path under /autosubmit/ can save random things? +# move to run_dir +cd_run_dir #e.g., Rscript load_process_save_chunk_autosubmit.R --args $task_path 1 1 1 1 2 2 1 1 1 2 1 2 -Rscript load_process_save_chunk_autosubmit.R --args ${task_path} ${chunk_args[@]} +Rscript ${proj_dir}/load_process_save_chunk_autosubmit.R --args ${task_path} ${chunk_args[@]} -- GitLab From 15c67ede2b64d50283ca4c6afc856480bae2f722 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 8 Aug 2023 16:02:30 +0200 Subject: [PATCH 13/32] Correct run_dir --- R/Collect.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Collect.R b/R/Collect.R index 73eaa07..fc89ed5 100644 --- a/R/Collect.R +++ b/R/Collect.R @@ -384,7 +384,7 @@ Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE) { autosubmit_suite_dir_suite <- paste0(autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') remote_autosubmit_suite_dir <- file.path("/esarchive/autosubmit/", suite_id, 'proj') remote_autosubmit_suite_dir_suite <- paste0(remote_autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') - run_dir <- startr_exec[['run_dir']] + run_dir <- startr_exec$cluster[['run_dir']] done <- FALSE sum_received_chunks <- sum(grepl('.*\\.Rds$', list.files(remote_autosubmit_suite_dir_suite))) -- GitLab From 75cb16ecf8dd3f1c2ac79dd94a2be4e7cf182dee Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 8 Aug 2023 18:08:31 +0200 Subject: [PATCH 14/32] Add names to data inputs so Apply won't return warnings later --- .../Autosubmit/load_process_save_chunk_autosubmit.R | 6 ++++++ inst/chunking/ecFlow/load_process_save_chunk_ecflow.R | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R b/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R index f7211d5..1f8b6f9 100644 --- a/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R +++ b/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R @@ -30,6 +30,12 @@ names(chunks) <- param_dimnames t_begin_load <- Sys.time() data <- vector('list', length(start_calls)) +#---------NEW-------------- +# Add data names if data input has names +if (!is.null(names(start_calls_attrs))) { + names(data) <- names(start_calls_attrs) +} +#--------NEW_END----------- for (input in 1:length(data)) { start_call <- start_calls[[input]] call_dims <- names(start_calls_attrs[[input]][['Dimensions']]) diff --git a/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R b/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R index b7b73a9..ee3aa04 100644 --- a/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R +++ b/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R @@ -37,6 +37,12 @@ t_job_setup <- as.numeric(difftime(t_end_job_setup, t_begin_job_setup, units = ' t_begin_load <- Sys.time() data <- vector('list', length(start_calls)) +#---------NEW-------------- +# Add data names if data input has names +if (!is.null(names(start_calls_attrs))) { + names(data) <- names(start_calls_attrs) +} +#--------NEW_END----------- for (input in 1:length(data)) { start_call <- start_calls[[input]] call_dims <- names(start_calls_attrs[[input]][['Dimensions']]) -- GitLab From ee215fbb1a9878c8defdcb9dea5a2e9e75e6648d Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 8 Aug 2023 18:15:52 +0200 Subject: [PATCH 15/32] Change temp dir --- R/ByChunks_autosubmit.R | 2 +- R/ByChunks_ecflow.R | 10 +++++----- R/Utils.R | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 70e750e..0c6be79 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -454,7 +454,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', if (on_cluster) { # Copy load_process_save_chunk_autosubmit.R into local folder #TODO: Change the following line to read from package - chunk_script <- file("/esarchive/scratch/aho/git/startR/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R") + chunk_script <- file("/esarchive/scratch/aho/tmp/chunking/Autosubmit/load_process_save_chunk_autosubmit.R") # chunk_script <- file(system.file('chunking/Autosubmit/load_process_save_chunk_autosubmit.R', # package = 'startR')) chunk_script_lines <- readLines(chunk_script) diff --git a/R/ByChunks_ecflow.R b/R/ByChunks_ecflow.R index 74f0932..d251015 100644 --- a/R/ByChunks_ecflow.R +++ b/R/ByChunks_ecflow.R @@ -438,7 +438,7 @@ ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', # Copy load_process_save_chunk_ecflow.R into shared folder #TODO: Change to package file - chunk_script <- file("/esarchive/scratch/aho/git/startR/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R") + chunk_script <- file("/esarchive/scratch/aho/tmp/chunking/ecFlow/load_process_save_chunk_ecflow.R") # chunk_script <- file(system.file('chunking/ecFlow/load_process_save_chunk_ecflow.R', # package = 'startR')) chunk_script_lines <- readLines(chunk_script) @@ -494,7 +494,7 @@ ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', # Copy Chunk.ecf into shared folder #TODO: Change to package file - chunk_ecf_script <- file("/esarchive/scratch/aho/git/startR/inst/chunking/ecFlow/Chunk.ecf") + chunk_ecf_script <- file("/esarchive/scratch/aho/tmp/chunking/ecFlow/Chunk.ecf") # chunk_ecf_script <- file(system.file('chunking/ecFlow/Chunk.ecf', # package = 'startR')) chunk_ecf_script_lines <- readLines(chunk_ecf_script) @@ -562,7 +562,7 @@ ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', #file.copy(system.file(paste0('chunking/', cluster[['queue_type']], '.h'), package = 'startR'), # ecflow_suite_dir_suite) #TODO: Change to package file - chunk_queue_header <- file(paste0("/esarchive/scratch/aho/git/startR/inst/chunking/ecFlow/", cluster[['queue_type']], '.h')) + chunk_queue_header <- file(paste0("/esarchive/scratch/aho/tmp/chunking/ecFlow/", cluster[['queue_type']], '.h')) # chunk_queue_header <- file(system.file(paste0('chunking/ecFlow/', cluster[['queue_type']], '.h'), package = 'startR')) chunk_queue_header_lines <- readLines(chunk_queue_header) close(chunk_queue_header) @@ -573,8 +573,8 @@ ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', # Copy headers #TODO: Change to package file - file.copy(file("/esarchive/scratch/aho/git/startR/inst/chunking/ecFlow/head.h"), ecflow_suite_dir_suite) - file.copy(file("/esarchive/scratch/aho/git/startR/inst/chunking/ecFlow/tail.h"), ecflow_suite_dir_suite) + file.copy(file("/esarchive/scratch/aho/tmp/chunking/ecFlow/head.h"), ecflow_suite_dir_suite) + file.copy(file("/esarchive/scratch/aho/tmp/chunking/ecFlow/tail.h"), ecflow_suite_dir_suite) # file.copy(system.file('chunking/ecFlow/head.h', package = 'startR'), # ecflow_suite_dir_suite) # file.copy(system.file('chunking/ecFlow/tail.h', package = 'startR'), diff --git a/R/Utils.R b/R/Utils.R index 37fb6f2..2d9cc5e 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -888,7 +888,7 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { #TODO: Change to the following line getting .sh template from package # bash_script <- file(system.file('chunking/Autosubmit/startR_autosubmit.sh', # package = 'startR')) - bash_script_template <- file("/esarchive/scratch/aho/git/startR/inst/chunking/Autosubmit/startR_autosubmit.sh") + bash_script_template <- file("/esarchive/scratch/aho/tmp/chunking/Autosubmit/startR_autosubmit.sh") bash_script_lines <- readLines(bash_script_template) close(bash_script_template) @@ -939,7 +939,7 @@ write_autosubmit_conf <- function(chunks, cluster, autosubmit_suite_dir) { # Get config template files from package #TODO: Change to package path # template_dir <- system.file('chunking/Autosubmit/', package = 'startR') - template_dir <- "/esarchive/scratch/aho/git/startR/inst/chunking/Autosubmit/" + template_dir <- "/esarchive/scratch/aho/tmp/chunking/Autosubmit/" config_files <- list.files(template_dir, pattern = "*\\.yml$") for (i_file in config_files) { -- GitLab From 748bf10a1ad837ea81dff50a63a9eaa266b9e877 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 8 Aug 2023 18:18:39 +0200 Subject: [PATCH 16/32] rename write_autosubmit_conf to avoid conflict with SUNSET --- R/ByChunks_autosubmit.R | 2 +- R/Utils.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 0c6be79..4850f82 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -513,7 +513,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', write_autosubmit_bash(chunks, cluster, autosubmit_suite_dir = autosubmit_suite_dir) # Modify conf files from template and rewrite to /esarchive/autosubmit/expid/conf/ - write_autosubmit_conf(chunks, cluster, autosubmit_suite_dir) + write_autosubmit_confs(chunks, cluster, autosubmit_suite_dir) } # if on_cluster diff --git a/R/Utils.R b/R/Utils.R index 2d9cc5e..87f36eb 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -929,7 +929,7 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { } # This function generates the .yml files under autosubmit conf/ -write_autosubmit_conf <- function(chunks, cluster, autosubmit_suite_dir) { +write_autosubmit_confs <- function(chunks, cluster, autosubmit_suite_dir) { #TODO: Remove this library(configr) # "chunks" is from Compute() input, e.g., chunks <- list(lat = 2, lon = 3) -- GitLab From 01899116775967babdd5d8f2dffc547829b86f7c Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 9 Aug 2023 09:42:41 +0200 Subject: [PATCH 17/32] Correct run_dir for file cleaning --- R/ByChunks_autosubmit.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 4850f82..fb3e2b7 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -729,8 +729,8 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', # If run_dir is specified, the files are under run_dir; if not, files are under proj/STARTR_CHUNKING_xxxx/ if (!is.null(cluster[['run_dir']])) { file.remove( - file.path(run_dir, - list.files(run_dir)[grepl(paste0("^", suite_id, "_.*"), list.files(run_dir))]) + file.path(cluster[['run_dir']], + list.files(cluster[['run_dir']])[grepl(paste0("^", suite_id, "_.*"), list.files(cluster[['run_dir']]))]) ) } else { file.remove( -- GitLab From ca103df1de1a64d52324bdf50417e001bcb0cb2c Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 9 Aug 2023 13:21:02 +0200 Subject: [PATCH 18/32] Create temp dir if autosubmit_suite_dir is not provided. Move to proj_dir if run_dir is not specified. --- R/ByChunks_autosubmit.R | 19 +++++++++++-------- R/ByChunks_ecflow.R | 6 +++--- R/Collect.R | 4 ---- R/Utils.R | 5 ++--- .../load_process_save_chunk_autosubmit.R | 2 -- .../ecFlow/load_process_save_chunk_ecflow.R | 2 -- 6 files changed, 16 insertions(+), 22 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index fb3e2b7..4b45198 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -137,7 +137,6 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', on_cluster <- !is.null(cluster) - # Check autosubmit_suite_dir suite_id <- cluster[['expid']] #NOTE: @@ -148,8 +147,17 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', autosubmit_suite_dir_suite <- '' if (on_cluster) { + # autosubmit_suite_dir if (is.null(autosubmit_suite_dir)) { - stop("Parameter 'autosubmit_suite_dir' must be specified when dispatching on a cluster.") +# stop("Parameter 'autosubmit_suite_dir' must be specified when dispatching on a cluster.") + # Create a tmp folder as autosubmit_suite_dir + autosubmit_suite_dir <- file.path(getwd(), "startR_autosubmit_temp") + if (!dir.exists(autosubmit_suite_dir)) { + dir.create("startR_autosubmit_temp", recursive = FALSE) + } + .warning(paste0("Parameter 'autosubmit_suite_dir' is not specified. Create a temporary ", + "folder under current directory: ", autosubmit_suite_dir, "/. Make sure ", + "that Autosubmit machine can find this path.")) } if (!is.character(autosubmit_suite_dir)) { stop("Parameter 'autosubmit_suite_dir' must be a character string.") @@ -161,6 +169,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', if (!dir.exists(autosubmit_suite_dir_suite)) { stop("Could not find or create the directory in parameter 'autosubmit_suite_dir'.") } + # autosubmit_server if (!is.null(autosubmit_server)) { if (!autosubmit_server %in% c('bscesautosubmit01', 'bscesautosubmit02')) { stop("Parameter 'autosubmit_server' must be one existing Autosubmit machine login node, 'bscesautosubmit01' or 'bscesautosubmit02'.") @@ -190,9 +199,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', special_setup = 'none', expid = NULL, hpc_user = NULL, -#------------NEW----------- run_dir = NULL) -#---------NEW_END-------------- if (on_cluster) { if (!is.list(cluster)) { stop("Parameter 'cluster' must be a named list.") @@ -207,9 +214,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', 'cores_per_job', 'job_wallclock', 'max_jobs', 'extra_queue_params', 'bidirectional', 'polling_period', 'special_setup', 'expid', 'hpc_user', -#------------NEW----------- 'run_dir' -#---------NEW_END-------------- )))) { stop("Found invalid component names in parameter 'cluster'.") } @@ -723,7 +728,6 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', # Check the size of tmp/ASLOGS/jobs_failed_status.log. If it is not 0, the jobs failed. failed_file_size <- system(paste0("du /esarchive/autosubmit/", suite_id, "/tmp/ASLOGS/jobs_failed_status.log"), intern = T) -#------------NEW---------------- if (substr(failed_file_size, 1, 1) != 0) { # Remove bigmemory objects (e.g., a68h_1_1 and a68h_1_1.desc) if they exist # If run_dir is specified, the files are under run_dir; if not, files are under proj/STARTR_CHUNKING_xxxx/ @@ -741,7 +745,6 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', stop("Some Autosubmit jobs failed. Check GUI and logs.") } -#-----------NEW_END---------- timings[['total']] <- t_begin_total startr_exec <- list(cluster = cluster, workflow_manager = 'autosubmit', diff --git a/R/ByChunks_ecflow.R b/R/ByChunks_ecflow.R index d251015..793052c 100644 --- a/R/ByChunks_ecflow.R +++ b/R/ByChunks_ecflow.R @@ -195,7 +195,7 @@ ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', redundant_components <- c('autosubmit_module', 'expid', 'hpc_user') if (any(redundant_components %in% names(cluster))) { tmp <- redundant_components[which(redundant_components %in% names(cluster))] - warning("Cluster component ", paste(tmp, collapse = ','), " not used when ecFlow is the workflow manager.") + .warning(paste0("Cluster component ", paste(tmp, collapse = ','), " not used when ecFlow is the workflow manager.")) cluster[[tmp]] <- NULL } default_cluster[names(cluster)] <- cluster @@ -266,8 +266,8 @@ ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', stop("The component 'CDO_module' of the parameter 'cluster' must be a character string.") } if (nchar(cluster[['CDO_module']]) < 1 || grepl(' ', cluster[['CDO_module']])) { - warning("The component 'CDO_module' of parameter 'cluster' must have ", - " than 1 and only the first element will be used.") + .warning(paste0("The component 'CDO_module' of parameter 'cluster' must have ", + " than 1 and only the first element will be used.")) } cluster[['r_module']] <- paste(cluster[['r_module']], cluster[['CDO_module']]) } diff --git a/R/Collect.R b/R/Collect.R index fc89ed5..b00ae6c 100644 --- a/R/Collect.R +++ b/R/Collect.R @@ -408,14 +408,11 @@ Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE) { if (remove) { .warning("ATTENTION: The source chunks will be removed from the ", "system. Store the result after Collect() ends if needed.") -#--------NEW---------- unlink(paste0(autosubmit_suite_dir_suite), recursive = TRUE) -#---------NEW_END-------------- } # Remove bigmemory objects (e.g., a68h_1_1_1_1_1 and a68h_1_1_1_1_1.desc) -#-----------NEW------------ # If run_dir is specified, the files are under run_dir; if not, files are under proj/STARTR_CHUNKING_xxxx/ if (!is.null(run_dir)) { file.remove( @@ -428,7 +425,6 @@ Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE) { list.files(remote_autosubmit_suite_dir_suite)[grepl(paste0("^", suite_id, "_.*"), list.files(remote_autosubmit_suite_dir_suite))]) ) } -#---------NEW_END---------- return(result) } diff --git a/R/Utils.R b/R/Utils.R index 87f36eb..b0478cf 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -904,7 +904,6 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { bash_script_lines <- gsub('^include_module_load', paste0('module load ', cluster[['r_module']]), bash_script_lines) -#----------NEW--------- # Rewrite cd run_dir # If run_dir is not specified, the script will run under ${proj_dir} if (!is.null(cluster[['run_dir']])) { @@ -912,10 +911,10 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { paste0('cd ', cluster[['run_dir']]), bash_script_lines) } else { - bash_script_lines <- gsub('^cd_run_dir', '#cd_run_dir', + bash_script_lines <- gsub('^cd_run_dir', 'cd ${proj_dir}', bash_script_lines) } -#---------NEW_END---------- + # Save modified .sh file under local$PROJECT_PATH in expdef.yml #NOTE: dest_dir is ecflow_suite_dir_suite in ByChunks_autosubmit() #NOTE: the file will be copied to proj/ by "autosubmit create" diff --git a/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R b/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R index 1f8b6f9..8762eeb 100644 --- a/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R +++ b/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R @@ -30,12 +30,10 @@ names(chunks) <- param_dimnames t_begin_load <- Sys.time() data <- vector('list', length(start_calls)) -#---------NEW-------------- # Add data names if data input has names if (!is.null(names(start_calls_attrs))) { names(data) <- names(start_calls_attrs) } -#--------NEW_END----------- for (input in 1:length(data)) { start_call <- start_calls[[input]] call_dims <- names(start_calls_attrs[[input]][['Dimensions']]) diff --git a/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R b/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R index ee3aa04..1bc5d6d 100644 --- a/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R +++ b/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R @@ -37,12 +37,10 @@ t_job_setup <- as.numeric(difftime(t_end_job_setup, t_begin_job_setup, units = ' t_begin_load <- Sys.time() data <- vector('list', length(start_calls)) -#---------NEW-------------- # Add data names if data input has names if (!is.null(names(start_calls_attrs))) { names(data) <- names(start_calls_attrs) } -#--------NEW_END----------- for (input in 1:length(data)) { start_call <- start_calls[[input]] call_dims <- names(start_calls_attrs[[input]][['Dimensions']]) -- GitLab From d079d2172aef4cd5970fdbbfa717b59d4a28d178 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 10 Aug 2023 17:43:18 +0200 Subject: [PATCH 19/32] Create exp if expid is not provided. Clean the code. --- R/ByChunks_autosubmit.R | 831 ++++++++++++++++++---------------------- R/Compute.R | 34 +- R/Utils.R | 2 +- 3 files changed, 384 insertions(+), 483 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 4b45198..8789963 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -88,6 +88,12 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', autosubmit_suite_dir = NULL, autosubmit_server = NULL, silent = FALSE, debug = FALSE, wait = TRUE) { + #NOTE: + #autosubmit_suite_dir: /home/Earth/aho/startR_local_autosubmit/ + #autosubmit_suite_dir_suite: /home/Earth/aho/startR_local_autosubmit/STARTR_CHUNKING_a68h/ + #remote_autosubmit_suite_dir: /esarchive/autosubmit/a68h/proj/ + #remote_autosubmit_suite_dir_suite: /esarchive/autosubmit/a68h/proj/STARTR_CHUNKING_a68h/ + # Build object to store profiling timings t_begin_total <- Sys.time() t_begin_bychunks_setup <- t_begin_total @@ -108,7 +114,14 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', MergeArrays <- .MergeArrays - # Check input headers + # Sanity checks + ## step_fun + if (!is(step_fun, 'startR_step_fun')) { + stop("Parameter 'step_fun' must be of the class 'startR_step_fun', as returned ", + "by the function Step.") + } + + ## cube_headers if (is(cube_headers, 'startR_cube')) { cube_headers <- list(cube_headers) } @@ -117,13 +130,12 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', stop("All objects passed in 'cube_headers' must be of class 'startR_cube', ", "as returned by Start().") } - - # Check step_fun - if (!is.function(step_fun)) { - stop("Parameter 'step_fun' must be a function.") + if (length(cube_headers) != length(attr(step_fun, 'TargetDims'))) { + stop("Number of inputs in parameter 'cube_headers' must be equal to the ", + "number of inputs expected by the function 'step_fun'.") } - # Check cores + ## threads_load and threads_compute if (!is.numeric(threads_load)) { stop("Parameter 'threads_load' must be a numeric value.") } @@ -134,52 +146,50 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', threads_compute <- round(threads_compute) timings[['threads_load']] <- threads_load timings[['threads_compute']] <- threads_compute - - on_cluster <- !is.null(cluster) - - suite_id <- cluster[['expid']] - - #NOTE: - #autosubmit_suite_dir: /home/Earth/aho/startR_local_autosubmit/ - #autosubmit_suite_dir_suite: /home/Earth/aho/startR_local_autosubmit/STARTR_CHUNKING_a68h/ - #remote_autosubmit_suite_dir: /esarchive/autosubmit/a68h/proj/ - #remote_autosubmit_suite_dir_suite: /esarchive/autosubmit/a68h/proj/STARTR_CHUNKING_a68h/ + + ## autosubmit_suite_dir + if (is.null(autosubmit_suite_dir)) { + # Create a tmp folder as autosubmit_suite_dir + autosubmit_suite_dir <- file.path(getwd(), "startR_autosubmit_temp") + if (!dir.exists(autosubmit_suite_dir)) { + dir.create("startR_autosubmit_temp", recursive = FALSE) + } + .warning(paste0("Parameter 'autosubmit_suite_dir' is not specified. Create a temporary ", + "folder under current directory: ", autosubmit_suite_dir, "/. Make sure ", + "that Autosubmit machine can find this path.")) + } + if (!is.character(autosubmit_suite_dir)) { + stop("Parameter 'autosubmit_suite_dir' must be a character string.") + } - autosubmit_suite_dir_suite <- '' - if (on_cluster) { - # autosubmit_suite_dir - if (is.null(autosubmit_suite_dir)) { -# stop("Parameter 'autosubmit_suite_dir' must be specified when dispatching on a cluster.") - # Create a tmp folder as autosubmit_suite_dir - autosubmit_suite_dir <- file.path(getwd(), "startR_autosubmit_temp") - if (!dir.exists(autosubmit_suite_dir)) { - dir.create("startR_autosubmit_temp", recursive = FALSE) - } - .warning(paste0("Parameter 'autosubmit_suite_dir' is not specified. Create a temporary ", - "folder under current directory: ", autosubmit_suite_dir, "/. Make sure ", - "that Autosubmit machine can find this path.")) - } - if (!is.character(autosubmit_suite_dir)) { - stop("Parameter 'autosubmit_suite_dir' must be a character string.") - } - autosubmit_suite_dir_suite <- paste0(autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') - if (!dir.exists(autosubmit_suite_dir_suite)) { - dir.create(autosubmit_suite_dir_suite, recursive = TRUE) - } - if (!dir.exists(autosubmit_suite_dir_suite)) { - stop("Could not find or create the directory in parameter 'autosubmit_suite_dir'.") - } - # autosubmit_server - if (!is.null(autosubmit_server)) { - if (!autosubmit_server %in% c('bscesautosubmit01', 'bscesautosubmit02')) { - stop("Parameter 'autosubmit_server' must be one existing Autosubmit machine login node, 'bscesautosubmit01' or 'bscesautosubmit02'.") - } - } else { - autosubmit_server <- paste0('bscesautosubmit0', sample(1:2, 1)) + ## autosubmit_server + if (!is.null(autosubmit_server)) { + if (!autosubmit_server %in% c('bscesautosubmit01', 'bscesautosubmit02')) { + stop("Parameter 'autosubmit_server' must be one existing Autosubmit machine login node, 'bscesautosubmit01' or 'bscesautosubmit02'.") } + } else { + autosubmit_server <- paste0('bscesautosubmit0', sample(1:2, 1)) } - # Check cluster + ## silent + if (!is.logical(silent)) { + stop("Parameter 'silent' must be logical.") + } + + ## debug + if (!is.logical(debug)) { + stop("Parameter 'debug' must be logical.") + } + if (silent) { + debug <- FALSE + } + + ## wait + if (!is.logical(wait)) { + stop("Parameter 'wait' must be logical.") + } + + ## cluster default_cluster <- list(queue_host = NULL, # queue_type = 'slurm', data_dir = NULL, @@ -200,158 +210,167 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', expid = NULL, hpc_user = NULL, run_dir = NULL) - if (on_cluster) { - if (!is.list(cluster)) { - stop("Parameter 'cluster' must be a named list.") - } - if (is.null(names(cluster))) { - stop("Parameter 'cluster' must be a named list.") - } - if (any(!(names(cluster) %in% c('queue_host', 'queue_type', 'data_dir', - 'temp_dir', 'lib_dir', 'init_commands', - 'r_module', 'CDO_module', 'autosubmit_module', - 'ecflow_module', 'node_memory', - 'cores_per_job', 'job_wallclock', 'max_jobs', - 'extra_queue_params', 'bidirectional', - 'polling_period', 'special_setup', 'expid', 'hpc_user', - 'run_dir' - )))) { - stop("Found invalid component names in parameter 'cluster'.") - } - # Remove ecFlow components - redundant_components <- c('queue_type', 'temp_dir', 'ecflow_module', 'bidirectional') - if (any(redundant_components %in% names(cluster))) { - tmp <- redundant_components[which(redundant_components %in% names(cluster))] - warning("Cluster component ", paste(tmp, collapse = ','), " not used when Autosubmit is the workflow manager.") - cluster[[tmp]] <- NULL - } - default_cluster[names(cluster)] <- cluster + if (!is.list(cluster) || is.null(names(cluster))) { + stop("Parameter 'cluster' must be a named list.") + } + if (any(!(names(cluster) %in% c('queue_host', 'queue_type', 'data_dir', + 'temp_dir', 'lib_dir', 'init_commands', + 'r_module', 'CDO_module', 'autosubmit_module', + 'ecflow_module', 'node_memory', + 'cores_per_job', 'job_wallclock', 'max_jobs', + 'extra_queue_params', 'bidirectional', + 'polling_period', 'special_setup', 'expid', 'hpc_user', + 'run_dir' + )))) { + stop("Found invalid component names in parameter 'cluster'.") } + # Remove ecFlow components + redundant_components <- c('queue_type', 'temp_dir', 'ecflow_module', 'bidirectional') + if (any(redundant_components %in% names(cluster))) { + tmp <- redundant_components[which(redundant_components %in% names(cluster))] + .warning(paste0("Cluster component ", paste(tmp, collapse = ','), + " not used when Autosubmit is the workflow manager.")) + cluster[[tmp]] <- NULL + } + default_cluster[names(cluster)] <- cluster cluster <- default_cluster - is_data_dir_shared <- FALSE - # Cluster compoment check - if (on_cluster) { - # queue_host - support_hpcs <- c('local', 'nord3') # names in platforms.yml - if (is.null(cluster$queue_host) || !cluster$queue_host %in% support_hpcs) { - stop("Component 'queue_host' in parameter 'cluster' must be one of the follows: ", paste(support_hpcs, collapse = ','), '.') - } - # data_dir - if (is.null(cluster[['data_dir']])) { - is_data_dir_shared <- TRUE - } else { - if (!is.character(cluster[['data_dir']])) { - stop("The component 'data_dir' of the parameter 'cluster' must be a character string.") - } - remote_data_dir <- cluster[['data_dir']] - } - # lib_dir - if (!is.null(cluster[['lib_dir']])) { - if (!is.character(cluster[['lib_dir']])) { - stop("The component 'lib_dir', of the parameter 'cluster' must be NULL or ", - "a character string.") - } - } - # init_commands - if (!is.list(cluster[['init_commands']]) || - !all(sapply(cluster[['init_commands']], is.character))) { - stop("The component 'init_commands' of the parameter 'cluster' must be a list of ", - "character strings.") - } - # r_module - if (!is.character(cluster[['r_module']])) { - stop("The component 'r_module' of the parameter 'cluster' must be a character string.") - } - if ((nchar(cluster[['r_module']]) < 1) || (grepl(' ', cluster[['r_module']]))) { - stop("The component 'r_module' of the parameter 'cluster' must have at least one character ", - "and contain no blank spaces.") - } - # CDO_module - if (!is.null(cluster[['CDO_module']])) { - if (!is.character(cluster[['CDO_module']])) { - stop("The component 'CDO_module' of the parameter 'cluster' must be a character string.") - } - if (nchar(cluster[['CDO_module']]) < 1 || grepl(' ', cluster[['CDO_module']])) { - warning("The component 'CDO_module' of parameter 'cluster' must have ", - " than 1 and only the first element will be used.") - } - cluster[['r_module']] <- paste(cluster[['r_module']], cluster[['CDO_module']]) - } - # autosubmit_module - if (!is.character(cluster[['autosubmit_module']])) { - stop("The component 'autosubmit_module' of the parameter 'cluster' must be a character string.") - } - # cores_per_job - if (is.null(cluster[['cores_per_job']])) { - cluster[['cores_per_job']] <- threads_compute - } - if (!is.numeric(cluster[['cores_per_job']])) { - stop("The component 'cores_per_job' of the parameter 'cluster' must be numeric.") - } - cluster[['cores_per_job']] <- round(cluster[['cores_per_job']]) - if (cluster[['cores_per_job']] > threads_compute) { - .message("WARNING: 'threads_compute' should be >= cluster[['cores_per_job']].") - } - # job_wallclock - tmp <- strsplit( '01:00:00', ':')[[1]] - if (!length(tmp) %in% c(2, 3) | any(!grepl("^[0-9]+$", tmp)) | any(nchar(tmp) != 2)) { - stop("The compoment 'job_wallclock' should be the format of HH:MM or HH:MM:SS.") - } - # max_jobs - if (!is.numeric(cluster[['max_jobs']])) { - stop("The component 'max_jobs' of the parameter 'cluster' must be numeric.") + ### queue_host + support_hpcs <- c('local', 'nord3') # names in platforms.yml + if (is.null(cluster$queue_host) || !cluster$queue_host %in% support_hpcs) { + stop("Cluster component 'queue_host' must be one of the follows: ", + paste(support_hpcs, collapse = ','), '.') + } + + ### data_dir + is_data_dir_shared <- FALSE + if (is.null(cluster[['data_dir']])) { + is_data_dir_shared <- TRUE + } else { + if (!is.character(cluster[['data_dir']])) { + stop("The component 'data_dir' of the parameter 'cluster' must be a character string.") } - cluster[['max_jobs']] <- round(cluster[['max_jobs']]) - # extra_queue_params - if (!is.list(cluster[['extra_queue_params']]) || - !all(sapply(cluster[['extra_queue_params']], is.character))) { - stop("The component 'extra_queue_params' of the parameter 'cluster' must be a list of ", - "character strings.") + remote_data_dir <- cluster[['data_dir']] + } + ### lib_dir + if (!is.null(cluster[['lib_dir']])) { + if (!is.character(cluster[['lib_dir']])) { + stop("The component 'lib_dir', of the parameter 'cluster' must be NULL or ", + "a character string.") } - # polling_period - if (!is.numeric(cluster[['polling_period']])) { - stop("The component 'polling_period' of the parameter 'cluster' must be numeric.") + } + ### init_commands + if (!is.list(cluster[['init_commands']]) || + !all(sapply(cluster[['init_commands']], is.character))) { + stop("The component 'init_commands' of the parameter 'cluster' must be a list of ", + "character strings.") + } + ### r_module + if (!is.character(cluster[['r_module']])) { + stop("The component 'r_module' of the parameter 'cluster' must be a character string.") + } + if ((nchar(cluster[['r_module']]) < 1) || (grepl(' ', cluster[['r_module']]))) { + stop("The component 'r_module' of the parameter 'cluster' must have at least one character ", + "and contain no blank spaces.") + } + ### CDO_module + if (!is.null(cluster[['CDO_module']])) { + if (!is.character(cluster[['CDO_module']])) { + stop("The component 'CDO_module' of the parameter 'cluster' must be a character string.") } - cluster[['polling_period']] <- round(cluster[['polling_period']]) - # special_setup - if (!(cluster[['special_setup']] %in% c('none', 'marenostrum4'))) { - stop("The value provided for the component 'special_setup' of the parameter ", - "'cluster' is not recognized.") + if (nchar(cluster[['CDO_module']]) < 1 || grepl(' ', cluster[['CDO_module']])) { + warning("The component 'CDO_module' of parameter 'cluster' must have ", + " than 1 and only the first element will be used.") } - # expid + cluster[['r_module']] <- paste(cluster[['r_module']], cluster[['CDO_module']]) + } + ### autosubmit_module + if (!is.character(cluster[['autosubmit_module']])) { + stop("The component 'autosubmit_module' of the parameter 'cluster' must be a character string.") + } + ### cores_per_job + if (is.null(cluster[['cores_per_job']])) { + cluster[['cores_per_job']] <- threads_compute + } + if (!is.numeric(cluster[['cores_per_job']])) { + stop("The component 'cores_per_job' of the parameter 'cluster' must be numeric.") + } + cluster[['cores_per_job']] <- round(cluster[['cores_per_job']]) + if (cluster[['cores_per_job']] > threads_compute) { + .message("WARNING: 'threads_compute' should be >= cluster[['cores_per_job']].") + } + ### job_wallclock + tmp <- strsplit( '01:00:00', ':')[[1]] + if (!length(tmp) %in% c(2, 3) | any(!grepl("^[0-9]+$", tmp)) | any(nchar(tmp) != 2)) { + stop("The compoment 'job_wallclock' should be the format of HH:MM or HH:MM:SS.") + } + ### max_jobs + if (!is.numeric(cluster[['max_jobs']])) { + stop("The component 'max_jobs' of the parameter 'cluster' must be numeric.") + } + cluster[['max_jobs']] <- round(cluster[['max_jobs']]) + ### extra_queue_params + if (!is.list(cluster[['extra_queue_params']]) || + !all(sapply(cluster[['extra_queue_params']], is.character))) { + stop("The component 'extra_queue_params' of the parameter 'cluster' must be a list of ", + "character strings.") + } + ### polling_period + if (!is.numeric(cluster[['polling_period']])) { + stop("The component 'polling_period' of the parameter 'cluster' must be numeric.") + } + cluster[['polling_period']] <- round(cluster[['polling_period']]) + ### special_setup + if (!(cluster[['special_setup']] %in% c('none', 'marenostrum4'))) { + stop("The value provided for the component 'special_setup' of the parameter ", + "'cluster' is not recognized.") + } + ### expid + as_module <- cluster[['autosubmit_module']] + if (is.null(cluster[['expid']])) { + text <- system( + paste0("module load ", as_module, "; ", + "autosubmit expid -H local -d 'startR computation'"), + intern = T) + cluster[['expid']] <- strsplit( + text[grep("The new experiment", text)], + "\"")[[1]][2] + message(paste0("ATTENTION: The new experiment '", cluster[['expid']], + "' is created. Please note it down.")) + } else { if (!is.character(cluster[['expid']]) | length(cluster[['expid']]) != 1) { stop("The component 'expid' of the parameter 'cluster' must be a character string.") } - # hpc_user - if (!is.null(cluster$hpc_user) && (!is.character(cluster$hpc_user) | length(cluster$hpc_user) != 1)) { - stop("The component 'hpc_user' of the parameter 'cluster' must be a character string.") + if (!dir.exists(file.path("/esarchive/autosubmit", cluster[['expid']]))) { + stop("Cluster component 'expid' is not found under /esarchive/autosubmit/.") } - } + suite_id <- cluster[['expid']] - if (on_cluster) { - remote_autosubmit_suite_dir <- file.path("/esarchive/autosubmit/", cluster[['expid']], 'proj') - remote_autosubmit_suite_dir_suite <- paste0(remote_autosubmit_suite_dir, '/STARTR_CHUNKING_', cluster[['expid']], '/') + ### hpc_user + if (!is.null(cluster$hpc_user) && (!is.character(cluster$hpc_user) | length(cluster$hpc_user) != 1)) { + stop("Cluster component 'hpc_user' must be a character string.") } - - # Check silent - if (!is.logical(silent)) { - stop("Parameter 'silent' must be logical.") + ### run_dir + if (!is.null(cluster$run_dir)) { + if (!dir.exists(cluster$run_dir)) { + stop("Cluster component 'run_dir' ", cluster$run_dir," is not found.") + } } - - # Check debug - if (!is.logical(debug)) { - stop("Parameter 'debug' must be logical.") + +#============================================== + + autosubmit_suite_dir_suite <- paste0(autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') + if (!dir.exists(autosubmit_suite_dir_suite)) { + dir.create(autosubmit_suite_dir_suite, recursive = TRUE) } - if (silent) { - debug <- FALSE - } - - # Check wait - if (!is.logical(wait)) { - stop("Parameter 'wait' must be logical.") + if (!dir.exists(autosubmit_suite_dir_suite)) { + stop("Could not find or create the directory in parameter 'autosubmit_suite_dir'.") } + + remote_autosubmit_suite_dir <- file.path("/esarchive/autosubmit/", suite_id, 'proj') + remote_autosubmit_suite_dir_suite <- paste0(remote_autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') + # Work out chunked dimensions and target dimensions all_dims <- lapply(cube_headers, attr, 'Dimensions') @@ -380,10 +399,6 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', "target dimensions.") } - if (length(cube_headers) != length(attr(step_fun, 'TargetDims'))) { - stop("Number of inputs in parameter 'cube_headers' must be equal to the ", - "number of inputs expected by the function 'step_fun'.") - } # Check all input headers have matching dimensions cube_index <- 1 for (cube_header in cube_headers) { @@ -444,84 +459,78 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', chunks <- default_chunks timings[['nchunks']] <- prod(unlist(chunks)) - # Check step_fun - if (!is(step_fun, 'startR_step_fun')) { - stop("Parameter 'step_fun' must be of the class 'startR_step_fun', as returned ", - "by the function Step.") - } - # Replace 'all's chunks_all <- which(unlist(chunks) == 'all') if (length(chunks_all) > 0) { chunks[chunks_all] <- all_dims[names(chunks)[chunks_all]] } - if (on_cluster) { - # Copy load_process_save_chunk_autosubmit.R into local folder + # Copy load_process_save_chunk_autosubmit.R into local folder #TODO: Change the following line to read from package - chunk_script <- file("/esarchive/scratch/aho/tmp/chunking/Autosubmit/load_process_save_chunk_autosubmit.R") -# chunk_script <- file(system.file('chunking/Autosubmit/load_process_save_chunk_autosubmit.R', -# package = 'startR')) - chunk_script_lines <- readLines(chunk_script) - close(chunk_script) - chunk_script_lines <- gsub('^lib_dir <- *', paste0('lib_dir <- ', - paste(deparse(cluster[['lib_dir']]), collapse = '\n')), - chunk_script_lines) - #TODO: Change out_dir to somewhere else like expid/outputs/ - chunk_script_lines <- gsub('^out_dir <- *', paste0('out_dir <- ', - paste(deparse(remote_autosubmit_suite_dir_suite), collapse = '\n')), chunk_script_lines) - chunk_script_lines <- gsub('^debug <- *', paste0('debug <- ', paste(deparse(debug), collapse = '\n')), - chunk_script_lines) - deparsed_calls <- paste0('start_calls <- list(') - extra_path <- '' - if (cluster[['special_setup']] == 'marenostrum4') { - extra_path <- '/gpfs/archive/bsc32/' - } - for (cube_header in 1:length(cube_headers)) { - pattern_dim <- attr(cube_headers[[cube_header]], 'PatternDim') - bk_pattern_dim <- cube_headers[[cube_header]][[pattern_dim]] - bk_expected_files <- attr(cube_headers[[cube_header]], 'ExpectedFiles') - if (!is_data_dir_shared) { - cube_headers[[cube_header]][[pattern_dim]] <- paste0(remote_data_dir, '/', - extra_path, '/', cube_headers[[cube_header]][[pattern_dim]]) - for (file_n in 1:length(bk_expected_files)) { - attr(cube_headers[[cube_header]], 'ExpectedFiles')[file_n] <- paste0(remote_data_dir, '/', - extra_path, '/', attr(cube_headers[[cube_header]], 'ExpectedFiles')[file_n]) - } - } - deparsed_calls <- paste0(deparsed_calls, '\nquote(', - paste(deparse(cube_headers[[cube_header]]), collapse = '\n'), - ')') - cube_headers[[cube_header]][[pattern_dim]] <- bk_pattern_dim - attr(cube_headers[[cube_header]], 'ExpectedFiles') <- bk_expected_files - if (cube_header < length(cube_headers)) { - deparsed_calls <- paste0(deparsed_calls, ', ') + chunk_script <- file("/esarchive/scratch/aho/tmp/chunking/Autosubmit/load_process_save_chunk_autosubmit.R") +# chunk_script <- file(system.file('chunking/Autosubmit/load_process_save_chunk_autosubmit.R', +# package = 'startR')) + chunk_script_lines <- readLines(chunk_script) + close(chunk_script) + chunk_script_lines <- gsub('^lib_dir <- *', paste0('lib_dir <- ', + paste(deparse(cluster[['lib_dir']]), collapse = '\n')), + chunk_script_lines) + #TODO: Change out_dir to somewhere else like expid/outputs/ + chunk_script_lines <- gsub('^out_dir <- *', paste0('out_dir <- ', + paste(deparse(remote_autosubmit_suite_dir_suite), collapse = '\n')), chunk_script_lines) + chunk_script_lines <- gsub('^debug <- *', paste0('debug <- ', paste(deparse(debug), collapse = '\n')), + chunk_script_lines) + deparsed_calls <- paste0('start_calls <- list(') + extra_path <- '' + if (cluster[['special_setup']] == 'marenostrum4') { + extra_path <- '/gpfs/archive/bsc32/' + } + for (cube_header in 1:length(cube_headers)) { + pattern_dim <- attr(cube_headers[[cube_header]], 'PatternDim') + bk_pattern_dim <- cube_headers[[cube_header]][[pattern_dim]] + bk_expected_files <- attr(cube_headers[[cube_header]], 'ExpectedFiles') + if (!is_data_dir_shared) { + cube_headers[[cube_header]][[pattern_dim]] <- paste0(remote_data_dir, '/', + extra_path, '/', cube_headers[[cube_header]][[pattern_dim]]) + for (file_n in 1:length(bk_expected_files)) { + attr(cube_headers[[cube_header]], 'ExpectedFiles')[file_n] <- paste0(remote_data_dir, '/', + extra_path, '/', attr(cube_headers[[cube_header]], 'ExpectedFiles')[file_n]) } } - deparsed_calls <- paste0(deparsed_calls, '\n)') - chunk_script_lines <- gsub('^start_calls <- *', deparsed_calls, chunk_script_lines) - chunk_script_lines <- gsub('^start_calls_attrs <- *', paste0('start_calls_attrs <- ', paste(deparse(lapply(cube_headers, attributes)), collapse = '\n')), - chunk_script_lines) - chunk_script_lines <- gsub('^param_dimnames <- *', paste0('param_dimnames <- ', paste(deparse(chunked_dims), collapse = '\n')), - chunk_script_lines) - chunk_script_lines <- gsub('^threads_load <- *', paste0('threads_load <- ', threads_load), - chunk_script_lines) - chunk_script_lines <- gsub('^threads_compute <- *', paste0('threads_compute <- ', threads_compute), - chunk_script_lines) - chunk_script_lines <- gsub('^fun <- *', paste0('fun <- ', paste(deparse(step_fun), collapse = '\n')), - chunk_script_lines) - chunk_script_lines <- gsub('^params <- *', paste0('params <- ', paste(deparse(list(...)), collapse = '\n')), - chunk_script_lines) - writeLines(chunk_script_lines, paste0(autosubmit_suite_dir_suite, '/load_process_save_chunk_autosubmit.R')) + deparsed_calls <- paste0(deparsed_calls, '\nquote(', + paste(deparse(cube_headers[[cube_header]]), collapse = '\n'), + ')') + cube_headers[[cube_header]][[pattern_dim]] <- bk_pattern_dim + attr(cube_headers[[cube_header]], 'ExpectedFiles') <- bk_expected_files + if (cube_header < length(cube_headers)) { + deparsed_calls <- paste0(deparsed_calls, ', ') + } + } + deparsed_calls <- paste0(deparsed_calls, '\n)') + chunk_script_lines <- gsub('^start_calls <- *', deparsed_calls, chunk_script_lines) + chunk_script_lines <- gsub('^start_calls_attrs <- *', + paste0('start_calls_attrs <- ', + paste(deparse(lapply(cube_headers, attributes)), collapse = '\n')), + chunk_script_lines) + chunk_script_lines <- gsub('^param_dimnames <- *', + paste0('param_dimnames <- ', + paste(deparse(chunked_dims), collapse = '\n')), + chunk_script_lines) + chunk_script_lines <- gsub('^threads_load <- *', paste0('threads_load <- ', threads_load), + chunk_script_lines) + chunk_script_lines <- gsub('^threads_compute <- *', paste0('threads_compute <- ', threads_compute), + chunk_script_lines) + chunk_script_lines <- gsub('^fun <- *', paste0('fun <- ', paste(deparse(step_fun), collapse = '\n')), + chunk_script_lines) + chunk_script_lines <- gsub('^params <- *', paste0('params <- ', paste(deparse(list(...)), collapse = '\n')), + chunk_script_lines) + writeLines(chunk_script_lines, paste0(autosubmit_suite_dir_suite, '/load_process_save_chunk_autosubmit.R')) - # Write and copy startR_autosubmit.sh into local folder - write_autosubmit_bash(chunks, cluster, autosubmit_suite_dir = autosubmit_suite_dir) - - # Modify conf files from template and rewrite to /esarchive/autosubmit/expid/conf/ - write_autosubmit_confs(chunks, cluster, autosubmit_suite_dir) + # Write and copy startR_autosubmit.sh into local folder + write_autosubmit_bash(chunks, cluster, autosubmit_suite_dir = autosubmit_suite_dir) - } # if on_cluster - + # Modify conf files from template and rewrite to /esarchive/autosubmit/expid/conf/ + write_autosubmit_confs(chunks, cluster, autosubmit_suite_dir) # Iterate through chunks chunk_array <- array(1:prod(unlist(chunks)), dim = (unlist(chunks))) @@ -531,238 +540,124 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', arrays_of_results[[component]] <- vector('list', prod((unlist(chunks)))) dim(arrays_of_results[[component]]) <- (unlist(chunks)) } - if (!on_cluster) { - t_end_bychunks_setup <- Sys.time() - timings[['bychunks_setup']] <- as.numeric(difftime(t_end_bychunks_setup, - t_begin_bychunks_setup, units = 'secs')) - timings[['transfer']] <- 0 - timings[['queue']] <- 0 - timings[['job_setup']] <- 0 - timings[['transfer_back']] <- 0 - if (!silent) { - .message(paste0("Processing chunks... ", - "remaining time estimate soon...")) - } - time_before_first_chunk <- Sys.time() - time_after_first_chunk <- NULL - } found_first_result <- FALSE for (i in 1:length(chunk_array)) { chunk_indices <- which(chunk_array == i, arr.ind = TRUE)[1, ] names(chunk_indices) <- names(dim(chunk_array)) - - if (!on_cluster) { - if (!silent) { - .message(paste("Loading chunk", i, - "out of", length(chunk_array), "...")) - } - data <- vector('list', length(cube_headers)) - t_begin_load <- Sys.time() - for (input in 1:length(data)) { - start_call <- cube_headers[[input]] - dims_to_alter <- which(names(attr(start_call, 'Dimensions')) %in% names(chunks)) - names_dims_to_alter <- names(attr(start_call, 'Dimensions'))[dims_to_alter] - # If any dimension comes from split dimensions - split_dims <- attr(start_call, 'SplitDims') - - if (length(split_dims) != 0){ - - for (k in 1:length(split_dims)) { - if (any(names(split_dims[[k]]) %in% names_dims_to_alter)) { - chunks_split_dims <- rep(1, length(split_dims[[k]])) - names(chunks_split_dims) <- names(split_dims[[k]]) - chunks_indices_split_dims <- chunks_split_dims - split_dims_to_alter <- which(names(split_dims[[k]]) %in% names_dims_to_alter) - chunks_split_dims[split_dims_to_alter] <- unlist(chunks[names(split_dims[[k]])[split_dims_to_alter]]) - chunks_indices_split_dims[split_dims_to_alter] <- chunk_indices[names(split_dims[[k]])[split_dims_to_alter]] - start_call[[names(split_dims)[k]]] <- .chunk(chunks_indices_split_dims, chunks_split_dims, - eval(start_call[[names(split_dims)[k]]])) - dims_to_alter_to_remove <- which(names_dims_to_alter %in% names(split_dims[[k]])) - if (length(dims_to_alter_to_remove) > 0) { - dims_to_alter <- dims_to_alter[-dims_to_alter_to_remove] - names_dims_to_alter <- names_dims_to_alter[-dims_to_alter_to_remove] - } - } - } - } - - if (length(dims_to_alter) > 0) { - for (call_dim in names(attr(start_call, 'Dimensions'))[dims_to_alter]) { - start_call[[call_dim]] <- .chunk(chunk_indices[call_dim], chunks[[call_dim]], - eval(start_call[[call_dim]])) - } - } - start_call[['silent']] <- !debug - if (!('num_procs' %in% names(start_call))) { - start_call[['num_procs']] <- threads_load - } - data[[input]] <- eval(start_call) - } - t_end_load <- Sys.time() - timings[['load']] <- c(timings[['load']], - as.numeric(difftime(t_end_load, t_begin_load, units = 'secs'))) - if (!silent) { - .message(paste("Processing...")) - } - #TODO: Find a better way to assign the names of data. When multiple steps for Compute is available, this way may fail. - names(data) <- names(cube_headers) - t_begin_compute <- Sys.time() - result <- multiApply::Apply(data, - target_dims = attr(step_fun, 'TargetDims'), - fun = step_fun, ..., - output_dims = attr(step_fun, 'OutputDims'), - use_attributes = attr(step_fun, 'UseAttributes'), - ncores = threads_compute) - if (!found_first_result) { - names(arrays_of_results) <- names(result) - found_first_result <- TRUE - } - for (component in 1:length(result)) { - arrays_of_results[[component]][[i]] <- result[[component]] - } - rm(data) - gc() - t_end_compute <- Sys.time() - timings[['compute']] <- c(timings[['compute']], - as.numeric(difftime(t_end_compute, - t_begin_compute, units = 'secs'))) - } - - # Time estimate - if (!on_cluster) { - if (is.null(time_after_first_chunk)) { - time_after_first_chunk <- Sys.time() - if (!silent) { - estimate <- (time_after_first_chunk - - time_before_first_chunk) * - (length(chunk_array) - 1) - units(estimate) <- 'mins' - .message( - paste0("Remaining time estimate (at ", format(time_after_first_chunk), ") ", - "(neglecting merge time): ", format(estimate)) - ) - } - } - } } - if (on_cluster) { - timings[['cores_per_job']] <- cluster[['cores_per_job']] - timings[['concurrent_chunks']] <- cluster[['max_jobs']] + timings[['cores_per_job']] <- cluster[['cores_per_job']] + timings[['concurrent_chunks']] <- cluster[['max_jobs']] - t_end_bychunks_setup <- Sys.time() - timings[['bychunks_setup']] <- as.numeric(difftime(t_end_bychunks_setup, - t_begin_bychunks_setup, units = 'secs')) - if (!is_data_dir_shared) { + t_end_bychunks_setup <- Sys.time() + timings[['bychunks_setup']] <- as.numeric(difftime(t_end_bychunks_setup, + t_begin_bychunks_setup, units = 'secs')) + if (!is_data_dir_shared) { #NOTE: Not consider this part yet - t_begin_transfer <- Sys.time() - .message("Sending involved files to the cluster file system...") - files_to_send <- NULL - #files_to_check <- NULL - for (cube_header in 1:length(cube_headers)) { - expected_files <- attr(cube_headers[[cube_header]], 'ExpectedFiles') - #files_to_check <- c(files_to_check, expected_files) - #if (cluster[['special_setup']] == 'marenostrum4') { - # expected_files <- paste0('/gpfs/archive/bsc32/', expected_files) - #} - files_to_send <- c(files_to_send, expected_files) - } - #which_files_exist <- sapply(files_to_check, file.exists) - which_files_exist <- sapply(files_to_send, file.exists) - files_to_send <- files_to_send[which_files_exist] - if (cluster[['special_setup']] == 'marenostrum4') { - file_spec <- paste(paste0("/gpfs/archive/bsc32/", - files_to_send), collapse = ' ') - system(paste0("ssh ", cluster[['queue_host']], " 'mkdir -p ", remote_data_dir, - ' ; module load transfer ; cd ', remote_autosubmit_suite_dir_suite, - ' ; dtrsync -Rrav ', '\'', file_spec, '\' "', remote_data_dir, '/"', - " ; sleep 1 ; ", - "while [[ ! $(ls dtrsync_*.out 2>/dev/null | wc -l) -ge 1 ]] ; ", - "do sleep 2 ; done", - " ; sleep 1 ; ", - 'while [[ ! $(grep "total size is" dtrsync_*.out | ', - "wc -l) -ge 1 ]] ; ", - "do sleep 5 ; done", "'")) - } else { - file_spec <- paste(files_to_send, collapse = ' :') - system(paste0("ssh ", cluster[['queue_host']], ' "mkdir -p ', - remote_data_dir, '"')) - system(paste0("rsync -Rrav '", file_spec, "' '", - cluster[['queue_host']], ":", remote_data_dir, "/'")) - } - .message("Files sent successfully.") - t_end_transfer <- Sys.time() - timings[['transfer']] <- as.numeric(difftime(t_end_transfer, t_begin_transfer, units = 'secs')) - } else { - timings[['transfer']] <- 0 - } - if (!silent) { - .message(paste0("Processing chunks... ")) - } - time_begin_first_chunk <- Sys.time() - - as_module <- cluster[['autosubmit_module']] - sys_commands <- paste0("module load ", as_module, "; ", - "autosubmit create ", suite_id, " -np; ", - "autosubmit refresh ", suite_id, "; ") - if (wait) { - sys_commands <- paste0(sys_commands, "autosubmit run ", suite_id) + t_begin_transfer <- Sys.time() + .message("Sending involved files to the cluster file system...") + files_to_send <- NULL + #files_to_check <- NULL + for (cube_header in 1:length(cube_headers)) { + expected_files <- attr(cube_headers[[cube_header]], 'ExpectedFiles') + #files_to_check <- c(files_to_check, expected_files) + #if (cluster[['special_setup']] == 'marenostrum4') { + # expected_files <- paste0('/gpfs/archive/bsc32/', expected_files) + #} + files_to_send <- c(files_to_send, expected_files) + } + #which_files_exist <- sapply(files_to_check, file.exists) + which_files_exist <- sapply(files_to_send, file.exists) + files_to_send <- files_to_send[which_files_exist] + if (cluster[['special_setup']] == 'marenostrum4') { + file_spec <- paste(paste0("/gpfs/archive/bsc32/", + files_to_send), collapse = ' ') + system(paste0("ssh ", cluster[['queue_host']], " 'mkdir -p ", remote_data_dir, + ' ; module load transfer ; cd ', remote_autosubmit_suite_dir_suite, + ' ; dtrsync -Rrav ', '\'', file_spec, '\' "', remote_data_dir, '/"', + " ; sleep 1 ; ", + "while [[ ! $(ls dtrsync_*.out 2>/dev/null | wc -l) -ge 1 ]] ; ", + "do sleep 2 ; done", + " ; sleep 1 ; ", + 'while [[ ! $(grep "total size is" dtrsync_*.out | ', + "wc -l) -ge 1 ]] ; ", + "do sleep 5 ; done", "'")) } else { - sys_commands <- paste0(sys_commands, "nohup autosubmit run ", suite_id, " >/dev/null 2>&1 &") # disown? - } + file_spec <- paste(files_to_send, collapse = ' :') + system(paste0("ssh ", cluster[['queue_host']], ' "mkdir -p ', + remote_data_dir, '"')) + system(paste0("rsync -Rrav '", file_spec, "' '", + cluster[['queue_host']], ":", remote_data_dir, "/'")) + } + .message("Files sent successfully.") + t_end_transfer <- Sys.time() + timings[['transfer']] <- as.numeric(difftime(t_end_transfer, t_begin_transfer, units = 'secs')) + } else { + timings[['transfer']] <- 0 + } + if (!silent) { + .message(paste0("Processing chunks... ")) + } + time_begin_first_chunk <- Sys.time() + sys_commands <- paste0("module load ", as_module, "; ", + "autosubmit create ", suite_id, " -np; ", + "autosubmit refresh ", suite_id, "; ") + if (wait) { + sys_commands <- paste0(sys_commands, "autosubmit run ", suite_id) + } else { + sys_commands <- paste0(sys_commands, "nohup autosubmit run ", suite_id, " >/dev/null 2>&1 &") # disown? + } + if (gsub('[[:digit:]]', "", Sys.getenv('HOSTNAME')) == 'bscesautosubmit') { + #NOTE: If we ssh to AS VM and run everything there, we don't need to ssh here + system(sys_commands) - if (gsub('[[:digit:]]', "", Sys.getenv('HOSTNAME')) == 'bscesautosubmit') { - #NOTE: If we ssh to AS VM and run everything there, we don't need to ssh here - system(sys_commands) + } else if (gsub("[[:digit:]]", "", Sys.getenv("HOSTNAME")) == "bscearth") { + # ssh from WS to AS VM to run exp + as_login <- paste0(Sys.getenv("USER"), '@', autosubmit_server, '.bsc.es') + sys_commands <- paste0('ssh ', as_login, ' "', sys_commands, '"') #'; exit"') + system(sys_commands) - } else if (gsub("[[:digit:]]", "", Sys.getenv("HOSTNAME")) == "bscearth") { - # ssh from WS to AS VM to run exp - as_login <- paste0(Sys.getenv("USER"), '@', autosubmit_server, '.bsc.es') - sys_commands <- paste0('ssh ', as_login, ' "', sys_commands, '"') #'; exit"') - system(sys_commands) + } else { + stop("Cannot identify host", Sys.getenv("HOSTNAME"), ". Where to run AS exp?") + } + # Check the size of tmp/ASLOGS/jobs_failed_status.log. If it is not 0, the jobs failed. + failed_file_size <- system(paste0("du /esarchive/autosubmit/", suite_id, "/tmp/ASLOGS/jobs_failed_status.log"), intern = T) + if (substr(failed_file_size, 1, 1) != 0) { + # Remove bigmemory objects (e.g., a68h_1_1 and a68h_1_1.desc) if they exist + # If run_dir is specified, the files are under run_dir; if not, files are under proj/STARTR_CHUNKING_xxxx/ + if (!is.null(cluster[['run_dir']])) { + file.remove( + file.path(cluster[['run_dir']], + list.files(cluster[['run_dir']])[grepl(paste0("^", suite_id, "_.*"), list.files(cluster[['run_dir']]))]) + ) } else { - stop("Cannot identify host", Sys.getenv("HOSTNAME"), ". Where to run AS exp?") + file.remove( + file.path(remote_autosubmit_suite_dir_suite, + list.files(remote_autosubmit_suite_dir_suite)[grepl(paste0("^", suite_id, "_.*"), list.files(remote_autosubmit_suite_dir_suite))]) + ) } - # Check the size of tmp/ASLOGS/jobs_failed_status.log. If it is not 0, the jobs failed. - failed_file_size <- system(paste0("du /esarchive/autosubmit/", suite_id, "/tmp/ASLOGS/jobs_failed_status.log"), intern = T) - if (substr(failed_file_size, 1, 1) != 0) { - # Remove bigmemory objects (e.g., a68h_1_1 and a68h_1_1.desc) if they exist - # If run_dir is specified, the files are under run_dir; if not, files are under proj/STARTR_CHUNKING_xxxx/ - if (!is.null(cluster[['run_dir']])) { - file.remove( - file.path(cluster[['run_dir']], - list.files(cluster[['run_dir']])[grepl(paste0("^", suite_id, "_.*"), list.files(cluster[['run_dir']]))]) - ) - } else { - file.remove( - file.path(remote_autosubmit_suite_dir_suite, - list.files(remote_autosubmit_suite_dir_suite)[grepl(paste0("^", suite_id, "_.*"), list.files(remote_autosubmit_suite_dir_suite))]) - ) - } + stop("Some Autosubmit jobs failed. Check GUI and logs.") + } - stop("Some Autosubmit jobs failed. Check GUI and logs.") - } + timings[['total']] <- t_begin_total + startr_exec <- list(cluster = cluster, workflow_manager = 'autosubmit', + suite_id = suite_id, chunks = chunks, + num_outputs = length(arrays_of_results), + autosubmit_suite_dir = autosubmit_suite_dir, #ecflow_server = ecflow_server, + timings = timings) + class(startr_exec) <- 'startR_exec' - timings[['total']] <- t_begin_total - startr_exec <- list(cluster = cluster, workflow_manager = 'autosubmit', - suite_id = suite_id, chunks = chunks, - num_outputs = length(arrays_of_results), - autosubmit_suite_dir = autosubmit_suite_dir, #ecflow_server = ecflow_server, - timings = timings) - class(startr_exec) <- 'startR_exec' + if (wait) { + result <- Collect(startr_exec, wait = TRUE, remove = T) + .message("Computation ended successfully.") + return(result) - if (wait) { - result <- Collect(startr_exec, wait = TRUE, remove = T) - .message("Computation ended successfully.") - return(result) + } else { + # if wait = F, return startr_exec and merge chunks in Collect(). + return(startr_exec) + } - } else { - # if wait = F, return startr_exec and merge chunks in Collect(). - return(startr_exec) - } - } } - diff --git a/R/Compute.R b/R/Compute.R index 81f83e6..981f28b 100644 --- a/R/Compute.R +++ b/R/Compute.R @@ -28,6 +28,7 @@ #' Check \href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{Practical guide on GitLab} for more #' details and examples. Only needed when the computation is not run locally. #' The default value is NULL. +#'@param workflow_manager Can be NULL, 'ecFlow' or 'Autosubmit'. The default is 'ecFlow'. #'@param ecflow_suite_dir A character string indicating the path to a folder in #' the local workstation where to store temporary files generated for the #' automatic management of the workflow. Only needed when the execution is run @@ -145,24 +146,18 @@ Compute <- function(workflow, chunks = 'auto', workflow_manager = 'ecFlow', stop("Workflows with only one step supported by now.") } - # Run ByChunks with the combined operation + # Run ByChunks with the chosen operation if (!is.null(cluster)) { - if (!tolower(workflow_manager) %in% c('ecflow', 'autosubmit')) { + if (is.null(workflow_manager)) { + stop("Specify parameter 'workflow_manager' as 'ecFlow' or 'Autosubmit'.") + } else if (!tolower(workflow_manager) %in% c('ecflow', 'autosubmit')) { stop("Parameter 'workflow_manager' can only be 'ecFlow' or 'Autosubmit'.") } + } else { # run locally + workflow_manager <- 'ecflow' } - if (tolower(workflow_manager) == 'autosubmit') { - res <- ByChunks_autosubmit(step_fun = operation, - cube_headers = workflow$inputs, - chunks = chunks, - threads_load = threads_load, - threads_compute = threads_compute, - cluster = cluster, - autosubmit_suite_dir = autosubmit_suite_dir, - autosubmit_server = autosubmit_server, - silent = silent, debug = debug, wait = wait) - } else { + if (tolower(workflow_manager) == 'ecflow') { # ecFlow or run locally res <- ByChunks_ecflow(step_fun = operation, cube_headers = workflow$inputs, @@ -173,7 +168,18 @@ Compute <- function(workflow, chunks = 'auto', workflow_manager = 'ecFlow', ecflow_suite_dir = ecflow_suite_dir, ecflow_server = ecflow_server, silent = silent, debug = debug, wait = wait) - } + } else { + res <- ByChunks_autosubmit(step_fun = operation, + cube_headers = workflow$inputs, + chunks = chunks, + threads_load = threads_load, + threads_compute = threads_compute, + cluster = cluster, + autosubmit_suite_dir = autosubmit_suite_dir, + autosubmit_server = autosubmit_server, + silent = silent, debug = debug, wait = wait) + + } # TODO: carry out remaining steps locally, using multiApply # Return results diff --git a/R/Utils.R b/R/Utils.R index b0478cf..c1d738f 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -921,7 +921,7 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { dest_dir <- file.path(autosubmit_suite_dir, paste0("/STARTR_CHUNKING_", cluster$expid)) if (!file.exists(dest_dir)) { - dir.create(savefile_path, recursive = TRUE) + dir.create(dest_fir, recursive = TRUE) } writeLines(bash_script_lines, paste0(dest_dir, '/startR_autosubmit_', n_chunk, '.sh')) } -- GitLab From 44cd30232d28d7232bd54f23b5048ba8e72c3f1a Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 11 Aug 2023 15:27:47 +0200 Subject: [PATCH 20/32] Update document --- R/ByChunks_autosubmit.R | 34 +++++++++++++++++----------------- R/ByChunks_ecflow.R | 13 +++++++------ R/Collect.R | 29 ++++++++++++++--------------- 3 files changed, 38 insertions(+), 38 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 8789963..af411d9 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -2,7 +2,8 @@ #' #'This is an internal function used in Compute(), executing the operation by #'the chunks specified in Compute(). It also returns the configuration details -#'and profiling information. +#'and profiling information. It is used when the workflow manager is +#'Autosubmit. #' #'@param step_fun A function with the class 'startR_step_fun' containing the #' details of operation. @@ -19,19 +20,17 @@ #'@param threads_compute An integer indicating the number of execution threads #' to use for the computation. The default value is 1. #'@param cluster A list of components that define the configuration of the -#' machine to be run on. The comoponents vary from the different machines. -#' Check \href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab} for more -#' details and examples. -#' Only needed when the computation is not run locally. The default value is -#' NULL. -#'@param ecflow_suite_dir A character string indicating the path to a folder in -#' the local workstation where to store temporary files generated for the -#' automatic management of the workflow. Only needed when the execution is run -#' remotely. The default value is NULL. -#'@param ecflow_server A named vector indicating the host and port of the -#' EC-Flow server. The vector form should be -#' \code{c(host = 'hostname', port = port_number)}. Only needed when the -#' execution is run remotely. The default value is NULL. +#' machine to be run on. The comoponents vary from different machines. Check +#' \href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{practical guide} +#' for more details and examples. +#'@param autosubmit_suite_dir A character string indicating the path to a folder +#' where to store temporary files generated for the automatic management of the +#' workflow manager. This path should be available in local workstation as well +#' as autosubmit machine. The default value is NULL, and a temporary folder +#' will be created. +#'@param autosubmit_server A character vector indicating the login node of the +#' autosubmit machine. It can be "bscesautosubmit01" or "bscesautosubmit02". +#' The default value is NULL, and the node will be randomly chosen. #'@param silent A logical value deciding whether to print the computation #' progress (FALSE) on the R session or not (TRUE). It only works when the #' execution runs locally or the parameter 'wait' is TRUE. The default value @@ -51,8 +50,9 @@ #' attached as attributes to the returned list of arrays. #' #'@examples -#' # ByChunks() is internally used in Compute(), not intended to be used by -#' # users. The example just illustrates the inputs of ByChunks(). +#' # ByChunks_autosubmit() is internally used in Compute(), not intended to be +#' # used by users. The example just illustrates the inputs of +#' # ByChunks_autosubmit(). #' # data_path <- system.file('extdata', package = 'startR') #' # path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') #' # sdates <- c('200011', '200012') @@ -77,7 +77,7 @@ #' # output_dims = 'latitude', #' # use_libraries = c('multiApply'), #' # use_attributes = list(data = "Variables")) -#' #ByChunks(step, data) +#' #ByChunks_autosubmit(step, data) #' #'@import multiApply #'@importFrom methods is diff --git a/R/ByChunks_ecflow.R b/R/ByChunks_ecflow.R index 793052c..cb5c95b 100644 --- a/R/ByChunks_ecflow.R +++ b/R/ByChunks_ecflow.R @@ -19,9 +19,10 @@ #'@param threads_compute An integer indicating the number of execution threads #' to use for the computation. The default value is 1. #'@param cluster A list of components that define the configuration of the -#' machine to be run on. The comoponents vary from the different machines. -#' Check \href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab} for more -#' details and examples. +#' machine to be run on. The comoponents vary from the different machines. +#' Check +#' \href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{practical guide} +#' for more examples. #' Only needed when the computation is not run locally. The default value is #' NULL. #'@param ecflow_suite_dir A character string indicating the path to a folder in @@ -51,8 +52,8 @@ #' attached as attributes to the returned list of arrays. #' #'@examples -#' # ByChunks() is internally used in Compute(), not intended to be used by -#' # users. The example just illustrates the inputs of ByChunks(). +#' # ByChunks_ecflow() is internally used in Compute(), not intended to be used +#' # by users. The example just illustrates the inputs of ByChunks_ecflow(). #' # data_path <- system.file('extdata', package = 'startR') #' # path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') #' # sdates <- c('200011', '200012') @@ -77,7 +78,7 @@ #' # output_dims = 'latitude', #' # use_libraries = c('multiApply'), #' # use_attributes = list(data = "Variables")) -#' #ByChunks(step, data) +#' #ByChunks_ecflow(step, data) #' #'@import multiApply #'@importFrom methods is diff --git a/R/Collect.R b/R/Collect.R index b00ae6c..6d752f5 100644 --- a/R/Collect.R +++ b/R/Collect.R @@ -1,11 +1,11 @@ #'Collect and merge the computation results #' #'The final step of the startR workflow after the data operation. It is used when -#'the parameter 'wait' of Compute() is FALSE, and the functionality includes -#'updating the job status shown on the EC-Flow GUI and collecting all the chunks -#'of results as one data array when the execution is done. See more details on -#'\href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab}. -#' +#'the parameter 'wait' of Compute() is FALSE. It combines all the chunks of the +#'results as one data array when the execution is done. See more details on +#'\href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{practical guide}. +#'Collect() calls Collect_ecflow() or Collect_autosubmit() according to the +#'chosen workflow manager. #'@param startr_exec An R object returned by Compute() when the parameter 'wait' #' of Compute() is FALSE. It can be directly from a Compute() call or read from #' the RDS file. @@ -13,16 +13,15 @@ #' Collect() call to finish (TRUE) or not (FALSE). If TRUE, it will be a #' blocking call, in which Collect() will retrieve information from the HPC, #' including signals and outputs, each polling_period seconds. The the status -#' can be monitored on the EC-Flow GUI. Collect() will not return until the -#' results of all chunks have been received. If FALSE, Collect() will crash with -#' an error if the execution has not finished yet, otherwise it will return the -#' merged array. The default value is TRUE. -#'@param remove A logical value deciding whether to remove of all data results -#' received from the HPC (and stored under 'ecflow_suite_dir', the parameter in -#' Compute()) after being collected, as well as the local job folder under -#' 'ecflow_suite_dir' or 'autosubmit_suite_dir'. To preserve the data and -#' Collect() it as many times as desired, set remove to FALSE. The default -#' value is TRUE. +#' can be monitored on the workflow manager GUI. Collect() will not return +#' until the results of all the chunks have been received. If FALSE, Collect() +#' return an error if the execution has not finished, otherwise it will return +#' the merged array. The default value is TRUE. +#'@param remove A logical value deciding whether to remove of all chunk results +#' received from the HPC after data being collected, as well as the local job +#' folder under 'ecflow_suite_dir' or 'autosubmit_suite_dir'. To preserve the +#' data and Collect() them as many times as desired, set remove to FALSE. The +#' default value is TRUE. #'@return A list of merged data array. #' #'@examples -- GitLab From 8da2a993678b54e1649c6dc043533599ed153282 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 11 Aug 2023 16:26:09 +0200 Subject: [PATCH 21/32] Update documentation for Autosubmit --- inst/doc/practical_guide.md | 67 +++++++++++++++++++++++++++++++++++-- man/CDORemapper.Rd | 2 +- man/Collect.Rd | 26 +++++++------- man/Compute.Rd | 5 +++ 4 files changed, 84 insertions(+), 16 deletions(-) diff --git a/inst/doc/practical_guide.md b/inst/doc/practical_guide.md index 70b29a6..2ad00b5 100644 --- a/inst/doc/practical_guide.md +++ b/inst/doc/practical_guide.md @@ -14,7 +14,8 @@ If you would like to start using startR rightaway on the BSC infrastructure, you 2. [**Step() and AddStep()**](#4-2-step-and-addstep) 3. [**Compute()**](#4-3-compute) 1. [**Compute() locally**](#4-3-1-compute-locally) - 2. [**Compute() on HPCs**](#4-3-2-compute-on-hpcs) + 2. [**Compute() on HPCs with ecFlow**](#4-3-2-compute-on-hpcs-with-ecflow) + 3. [**Compute() on HPCs with Autosubmit**](#4-3-2-compute-on-hpcs-with-autosubmit) 4. [**Collect() and the EC-Flow GUI**](#4-4-collect-and-the-ec-flow-gui) 5. [**Additional information**](#5-additional-information) 1. [**How to choose the number of chunks, jobs and cores**](#5-1-how-to-choose-the-number-of-chunks-jobs-and-cores) @@ -566,9 +567,12 @@ res <- Compute(wf, * max: 8.03660178184509 ``` -#### 4-3-2. Compute() on HPCs +#### 4-3-2. Compute() on HPCs with ecFlow -In order to run the computation on a HPC, you will need to make sure the passwordless connection with the login node of that HPC is configured, as shown at the beginning of this guide. If possible, in both directions. Also, you will need to know whether there is a shared file system between your workstation and that HPC, and will need information on the number of nodes, cores per node, threads per core, RAM memory per node, and type of workload used by that HPC (Slurm, PBS and LSF supported). +We can use workflow manager (ecFlow or Autosubmit) to dispatch computation jobs on a HPC. +To use Autosubmit, check the next session. +You will need to make sure that the passwordless connection with the login node of that HPC is configured, as shown at the beginning of this guide. +If possible, in both directions. Also, you will need to know whether there is a shared file system between your workstation and that HPC, and will need information on the number of nodes, cores per node, threads per core, RAM memory per node, and type of workload used by that HPC (Slurm, PBS and LSF supported). You will need to add two parameters to your `Compute()` call: `cluster` and `ecflow_suite_dir`. @@ -689,6 +693,63 @@ As mentioned above in the definition of the `cluster` parameters, it is strongly You can find the `cluster` configuration for other HPCs at the end of this guide [Compute() cluster templates](#compute-cluster-templates) +#### 4-3-3. Compute() on HPCs with Autosubmit + +To use Autosubmit as workflow manager, add the following parameters to your `Compute()` call: + `cluster`, `autosubmit_suite_dir`, and `autosubmit_server`. + +`autosubmit_suite_dir`is the path where to store temporary files generated for +Autosubmit to establish the workflow. It should be found in both workstation and the Autosubmit machine. + +`autosubmit_server` is the login node of the Autosubmit machine, i.e., 'bscesautosubmit01'or 'bscesautosubmit02'. + +The parameter `cluster` expects a list of components that provide the configuration of Autosubmit machine. For now, the supported platforms are 'local' (run on Autosubmit machine) and 'nord3' (Autosubmit submits jobs to Nord3). +You can see one example of cluster configuration below. + +```r + res <- Compute(wf, chunks = list(sdate = 2), + threads_compute = 4, threads_load = 2, + cluster = list( + queue_host = 'nord3', + expid = , + hpc_user = "bsc32xxx", + r_module = "R/4.1.2-foss-2019b", + CDO_module = "CDO/1.9.8-foss-2019b", + autosubmit_module = 'autosubmit/4.0.0b-foss-2015a-Python-3.7.3', + cores_per_job = 4, + job_wallclock = '01:00:00', + max_jobs = 4 + ), + workflow_manager = 'autosubmit', + autosubmit_suite_dir = "/home/Earth//startR_local_autosubmit/", + autosubmit_server = 'bscesautosubmit01', + wait = TRUE + ) +``` + +The cluster components and options are explained next: + +- `queue_host`: Must match the platform name in Autosubmit configuration file `platforms.yml`, or 'local'. The provided platforms are: 'nord3'. +- `expid`: The Autosubmit experiment to run the computation. You can create the experiment beforehand or let startR create one for you by not specifying this componenet. +To have the good practice, note down the expid if it is automatically created by startR and re-use/delete it afterwards. + - `hpc_user`: Your user ID on the HPC (i.e., "bsc32xxx"). It is required if "queue_host" is not 'local'. +- `data_dir`: The path to the data repository if the data is not shared. +- `lib_dir`: directory on the HPC where the startR R package and other required R packages are installed, accessible from all HPC nodes. These installed packages must be compatible with the R module specified in `r_module`. This parameter is optional; only required when the libraries are not installed in the R module. +- `init_commands`: The initial commands in bash script before R script runs. For example, the modules required by computation can be loaded here. +- `r_module`: name of the UNIX environment module to be used for R. If not specified, `module load R` will be used. +- `CDO_module`: name of the UNIX environment module to be used for CDO. If not specified, it is NULL and no CDO module will be loaded. Make sure to assign it if `tranform` is required in Start(). +- `autosubmit_module`: The name of the Autosubmit module. If not specified, `module load autosubmit` will be used. +- `cores_per_job`: number of computing cores to be requested when submitting the job for each chunk to the HPC queue. Each node may be capable of supporting more than one computing thread. +- `job_wallclock`: amount of time to reserve the resources when submitting the job for each chunk. Must follow the specific format required by the specified `queue_type`. +- `max_jobs`: maximum number of jobs (chunks) to be queued simultaneously onto the HPC queue. Submitting too many jobs could overload the bandwidth between the HPC nodes and the storage system, or could overload the queue system. +- `extra_queue_params`: list of character strings for additional queue headers for the jobs +to be submitted to the HPC. For example, to constraint using medmem node ('#SBATCH --constraint=medmem'); to request an exclusive mode ('#SBATCH --exclusive'). +- `polling_period`: when the connection is unidirectional, the workstation will ask the HPC login node for results each `polling_period` seconds. An excessively small value can overload the login node or result in temporary banning. +- `special_setup`: name of the machine if the computation requires an special setup. Only Marenostrum 4 needs this parameter (e.g. special_setup = 'marenostrum4'). + +After the `Compute()` call is executed, you can monitor the status on [Autosubmit GUI](https://earth.bsc.es/autosubmitapp/). + + ### 4-4. Collect() and the EC-Flow GUI Usually, in use cases where large data inputs are involved, it is convenient to add the parameter `wait = FALSE` to your `Compute()` call. With this parameter, `Compute()` will immediately return an object with information about your startR execution. You will be able to store this object onto disk. After doing that, you will not need to worry in case your workstation turns off in the middle of the computation. You will be able to close your R session, and collect the results later on with the `Collect()` function. diff --git a/man/CDORemapper.Rd b/man/CDORemapper.Rd index 024ce32..5ced7cd 100644 --- a/man/CDORemapper.Rd +++ b/man/CDORemapper.Rd @@ -51,7 +51,7 @@ perform the interpolation, hence CDO is required to be installed. data_path <- system.file('extdata', package = 'startR') path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') sdates <- c('200011') - \donttest{ + \dontrun{ data <- Start(dat = list(list(path = path_obs)), var = 'tos', sdate = sdates, diff --git a/man/Collect.Rd b/man/Collect.Rd index 97b529b..d90caca 100644 --- a/man/Collect.Rd +++ b/man/Collect.Rd @@ -15,25 +15,27 @@ the RDS file.} Collect() call to finish (TRUE) or not (FALSE). If TRUE, it will be a blocking call, in which Collect() will retrieve information from the HPC, including signals and outputs, each polling_period seconds. The the status -can be monitored on the EC-Flow GUI. Collect() will not return until the -results of all chunks have been received. If FALSE, Collect() will crash with -an error if the execution has not finished yet, otherwise it will return the -merged array. The default value is TRUE.} +can be monitored on the workflow manager GUI. Collect() will not return +until the results of all the chunks have been received. If FALSE, Collect() +return an error if the execution has not finished, otherwise it will return +the merged array. The default value is TRUE.} -\item{remove}{A logical value deciding whether to remove of all data results -received from the HPC (and stored under 'ecflow_suite_dir', the parameter in -Compute()) after being collected. To preserve the data and Collect() it as -many times as desired, set remove to FALSE. The default value is TRUE.} +\item{remove}{A logical value deciding whether to remove of all chunk results +received from the HPC after data being collected, as well as the local job +folder under 'ecflow_suite_dir' or 'autosubmit_suite_dir'. To preserve the +data and Collect() them as many times as desired, set remove to FALSE. The +default value is TRUE.} } \value{ A list of merged data array. } \description{ The final step of the startR workflow after the data operation. It is used when -the parameter 'wait' of Compute() is FALSE, and the functionality includes -updating the job status shown on the EC-Flow GUI and collecting all the chunks -of results as one data array when the execution is done. See more details on -\href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab}. +the parameter 'wait' of Compute() is FALSE. It combines all the chunks of the +results as one data array when the execution is done. See more details on +\href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{practical guide}. +Collect() calls Collect_ecflow() or Collect_autosubmit() according to the +chosen workflow manager. } \examples{ data_path <- system.file('extdata', package = 'startR') diff --git a/man/Compute.Rd b/man/Compute.Rd index 5b03abd..f6ad867 100644 --- a/man/Compute.Rd +++ b/man/Compute.Rd @@ -7,11 +7,14 @@ Compute( workflow, chunks = "auto", + workflow_manager = "ecFlow", threads_load = 1, threads_compute = 1, cluster = NULL, ecflow_suite_dir = NULL, ecflow_server = NULL, + autosubmit_suite_dir = NULL, + autosubmit_server = NULL, silent = FALSE, debug = FALSE, wait = TRUE @@ -28,6 +31,8 @@ those not required as the target dimension in function Step(). The default value is 'auto', which lists all the non-target dimensions and each one has one chunk.} +\item{workflow_manager}{Can be NULL, 'ecFlow' or 'Autosubmit'. The default is 'ecFlow'.} + \item{threads_load}{An integer indicating the number of execution threads to use for the data retrieval stage. The default value is 1.} -- GitLab From 76cd9e1c8c3f9d38d0cb31c7f0b67e1b732074cc Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 11 Aug 2023 16:40:04 +0200 Subject: [PATCH 22/32] fix link --- inst/doc/practical_guide.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/inst/doc/practical_guide.md b/inst/doc/practical_guide.md index 2ad00b5..dead996 100644 --- a/inst/doc/practical_guide.md +++ b/inst/doc/practical_guide.md @@ -15,7 +15,7 @@ If you would like to start using startR rightaway on the BSC infrastructure, you 3. [**Compute()**](#4-3-compute) 1. [**Compute() locally**](#4-3-1-compute-locally) 2. [**Compute() on HPCs with ecFlow**](#4-3-2-compute-on-hpcs-with-ecflow) - 3. [**Compute() on HPCs with Autosubmit**](#4-3-2-compute-on-hpcs-with-autosubmit) + 3. [**Compute() on HPCs with Autosubmit**](#4-3-3-compute-on-hpcs-with-autosubmit) 4. [**Collect() and the EC-Flow GUI**](#4-4-collect-and-the-ec-flow-gui) 5. [**Additional information**](#5-additional-information) 1. [**How to choose the number of chunks, jobs and cores**](#5-1-how-to-choose-the-number-of-chunks-jobs-and-cores) @@ -695,10 +695,10 @@ You can find the `cluster` configuration for other HPCs at the end of this guide #### 4-3-3. Compute() on HPCs with Autosubmit -To use Autosubmit as workflow manager, add the following parameters to your `Compute()` call: +To use Autosubmit as workflow manager, add the following parameters to your Compute() call: `cluster`, `autosubmit_suite_dir`, and `autosubmit_server`. -`autosubmit_suite_dir`is the path where to store temporary files generated for +`autosubmit_suite_dir` is the path where to store temporary files generated for Autosubmit to establish the workflow. It should be found in both workstation and the Autosubmit machine. `autosubmit_server` is the login node of the Autosubmit machine, i.e., 'bscesautosubmit01'or 'bscesautosubmit02'. @@ -747,7 +747,7 @@ to be submitted to the HPC. For example, to constraint using medmem node ('#SBAT - `polling_period`: when the connection is unidirectional, the workstation will ask the HPC login node for results each `polling_period` seconds. An excessively small value can overload the login node or result in temporary banning. - `special_setup`: name of the machine if the computation requires an special setup. Only Marenostrum 4 needs this parameter (e.g. special_setup = 'marenostrum4'). -After the `Compute()` call is executed, you can monitor the status on [Autosubmit GUI](https://earth.bsc.es/autosubmitapp/). +After the Compute() call is executed, you can monitor the status on [Autosubmit GUI](https://earth.bsc.es/autosubmitapp/). ### 4-4. Collect() and the EC-Flow GUI -- GitLab From 6b4dbeef1c8b48ffb220d8452628acfa60f649a3 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 17 Aug 2023 11:41:44 +0200 Subject: [PATCH 23/32] Contraint max waiting job to 366 --- R/Utils.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/Utils.R b/R/Utils.R index c1d738f..c693db4 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -951,6 +951,8 @@ write_autosubmit_confs <- function(chunks, cluster, autosubmit_suite_dir) { #Q: Should it be the total amount of chunk? conf$config$MAXWAITINGJOBS <- as.integer(prod(unlist(chunks))) # total amount of chunk + #NOTE: Nord3 max. amount of queued jobs is 366 + if (conf$config$MAXWAITINGJOBS > 366) conf$config$MAXWAITINGJOBS <- 366 conf$config$TOTALJOBS <- as.integer(cluster$max_jobs) ############################################################ -- GitLab From ef114bc0c5da472a58d8160e066571af55a408d9 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 17 Aug 2023 18:02:46 +0200 Subject: [PATCH 24/32] Use threads_compute instead of cores_per_job --- R/ByChunks_autosubmit.R | 25 ++++++++++++++----------- R/Utils.R | 7 +++---- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index af411d9..133f467 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -289,15 +289,18 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', stop("The component 'autosubmit_module' of the parameter 'cluster' must be a character string.") } ### cores_per_job - if (is.null(cluster[['cores_per_job']])) { - cluster[['cores_per_job']] <- threads_compute - } - if (!is.numeric(cluster[['cores_per_job']])) { - stop("The component 'cores_per_job' of the parameter 'cluster' must be numeric.") - } - cluster[['cores_per_job']] <- round(cluster[['cores_per_job']]) - if (cluster[['cores_per_job']] > threads_compute) { - .message("WARNING: 'threads_compute' should be >= cluster[['cores_per_job']].") +# if (is.null(cluster[['cores_per_job']])) { +# cluster[['cores_per_job']] <- threads_compute +# } +# if (!is.numeric(cluster[['cores_per_job']])) { +# stop("The component 'cores_per_job' of the parameter 'cluster' must be numeric.") +# } +# cluster[['cores_per_job']] <- round(cluster[['cores_per_job']]) +# if (cluster[['cores_per_job']] > threads_compute) { +# .message("WARNING: 'threads_compute' should be >= cluster[['cores_per_job']].") +# } + if (!is.null(cluster[['cores_per_job']])) { + .warning("The component 'cores_per_job' in cluster list is not used. Please specify the cores by parameter 'threads_compute'.") } ### job_wallclock tmp <- strsplit( '01:00:00', ':')[[1]] @@ -530,7 +533,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', write_autosubmit_bash(chunks, cluster, autosubmit_suite_dir = autosubmit_suite_dir) # Modify conf files from template and rewrite to /esarchive/autosubmit/expid/conf/ - write_autosubmit_confs(chunks, cluster, autosubmit_suite_dir) + write_autosubmit_confs(chunks, threads_compute, cluster, autosubmit_suite_dir) # Iterate through chunks chunk_array <- array(1:prod(unlist(chunks)), dim = (unlist(chunks))) @@ -547,7 +550,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', } - timings[['cores_per_job']] <- cluster[['cores_per_job']] +# timings[['cores_per_job']] <- cluster[['cores_per_job']] timings[['concurrent_chunks']] <- cluster[['max_jobs']] t_end_bychunks_setup <- Sys.time() diff --git a/R/Utils.R b/R/Utils.R index c693db4..f496333 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -928,7 +928,7 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { } # This function generates the .yml files under autosubmit conf/ -write_autosubmit_confs <- function(chunks, cluster, autosubmit_suite_dir) { +write_autosubmit_confs <- function(chunks, threads_compute, cluster, autosubmit_suite_dir) { #TODO: Remove this library(configr) # "chunks" is from Compute() input, e.g., chunks <- list(lat = 2, lon = 3) @@ -975,8 +975,7 @@ write_autosubmit_confs <- function(chunks, cluster, autosubmit_suite_dir) { # wallclock from '01:00:00' to '01:00' jobs[[1]]$WALLCLOCK <- substr(cluster$job_wallclock, 1, 5) jobs[[1]]$PLATFORM <- cluster$queue_host - #Q: Is it cores_per_job? - jobs[[1]]$THREADS <- as.integer(cluster$cores_per_job) + jobs[[1]]$THREADS <- as.integer(threads_compute) jobs[[1]][paste0(names(chunks), "_N")] <- as.integer(unlist(chunks)) jobs[[1]][names(chunks)] <- "" @@ -996,7 +995,7 @@ write_autosubmit_confs <- function(chunks, cluster, autosubmit_suite_dir) { } else if (conf_type == "platforms") { if (tolower(cluster$queue_host) != "local") { conf$Platforms[[cluster$queue_host]]$USER <- cluster$hpc_user - conf$Platforms[[cluster$queue_host]]$PROCESSORS_PER_NODE <- as.integer(cluster$cores_per_job) + conf$Platforms[[cluster$queue_host]]$PROCESSORS_PER_NODE <- as.integer(threads_compute) if (!is.null(cluster$extra_queue_params)) { tmp <- unlist(cluster$extra_queue_params) for (ii in 1:length(tmp)) { -- GitLab From 2d08116a6295a5bcbd51e97d8dc9c7e0aace3320 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 21 Aug 2023 19:10:19 +0200 Subject: [PATCH 25/32] Fix unit tests for deleted files --- tests/testthat/test-Compute-chunk_split_dim.R | 10 +- ...st-Start-implicit_dependency_by_selector.R | 136 +++++++++--------- tests/testthat/test-Start-multiple-sdates.R | 4 +- tests/testthat/test-Start-split-merge.R | 6 +- 4 files changed, 78 insertions(+), 78 deletions(-) diff --git a/tests/testthat/test-Compute-chunk_split_dim.R b/tests/testthat/test-Compute-chunk_split_dim.R index 09da160..0e82126 100644 --- a/tests/testthat/test-Compute-chunk_split_dim.R +++ b/tests/testthat/test-Compute-chunk_split_dim.R @@ -116,13 +116,13 @@ c(lon = 3, dat = 1, var = 1, sdate = 4, time = 3, lat = 2) test_that("2. The files are repeated", { -ecmwf_path_hc <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily/$var$_f24h/$sdate$/$var$_$syear$.nc') +ecmwf_path_hc <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily_mean/$var$_f6h/$sdate$/$var$_$syear$.nc') obs_path <-paste0('/esarchive/recon/ecmwf/era5/daily/$var$-r240x121/$var$_$file_date$.nc') sdates.seq <- c("20161222","20161229", "20170105","20170112") suppressWarnings( hcst <- Start(dat = ecmwf_path_hc, - var = 'sfcWind', + var = 'tas', sdate = sdates.seq, syear = indices(1:2), #'all', time = 'all', @@ -207,15 +207,15 @@ aperm(res3, c(7, 2, 3, 4, 1, 5, 6)) ) expect_equal( dim(res1), -c(time = 47, dat = 1, var = 1, latitude = 1, longitude = 2, sdate = 4, syear = 2) +c(time = 46, dat = 1, var = 1, latitude = 1, longitude = 2, sdate = 4, syear = 2) ) expect_equal( dim(res2), -c(sdate = 4, dat = 1, var = 1, latitude = 1, longitude = 2, syear = 2, time = 47) +c(sdate = 4, dat = 1, var = 1, latitude = 1, longitude = 2, syear = 2, time = 46) ) expect_equal( dim(res3), -c(longitude = 2, dat = 1, var = 1, latitude = 1, sdate = 4, syear = 2, time = 47) +c(longitude = 2, dat = 1, var = 1, latitude = 1, sdate = 4, syear = 2, time = 46) ) diff --git a/tests/testthat/test-Start-implicit_dependency_by_selector.R b/tests/testthat/test-Start-implicit_dependency_by_selector.R index 10e5545..fd82aa7 100644 --- a/tests/testthat/test-Start-implicit_dependency_by_selector.R +++ b/tests/testthat/test-Start-implicit_dependency_by_selector.R @@ -8,74 +8,74 @@ #--------------------------------------------------- context("Start() implicit dependency by selector dimension") - -test_that("1. region with different index between files", { - -path <- paste0('/esarchive/exp/ecearth/a35b/diags/DCPP/EC-Earth-Consortium/', - 'EC-Earth3-HR/dcppA-hindcast/r1i1p1f1/Omon/$var$_mixed/gn/v20201107/', - '$var$_Omon_EC-Earth3-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_$chunk$.nc') - -# two sdates have different index for Nino3. -region <- array('Nino3', dim = c(sdate = 2, region = 1)) - -suppressWarnings( -data <- Start(dat = path, - var = 'tosmean', - sdate = c('1993', '2013'), - chunk = indices(1:2), - chunk_depends = 'sdate', - region = region, - time = 'all', - time_across = 'chunk', - merge_across_dims = TRUE, - return_vars = list(time = c('sdate', 'chunk'), - region = 'sdate'), - retrieve = T) -) -suppressWarnings( -data1 <- Start(dat = path, - var = 'tosmean', - sdate = c('1993'), - chunk = indices(1:2), - chunk_depends = 'sdate', - region = 'Nino3', - time = 'all', #c(1:length(forecast_month)), - time_across = 'chunk', - merge_across_dims = TRUE, - return_vars = list(time = c('sdate', 'chunk'), - region = NULL), - retrieve = T) -) -suppressWarnings( -data2 <- Start(dat = path, - var = 'tosmean', - sdate = c('2013'), - chunk = indices(1:2), - chunk_depends = 'sdate', - region = 'Nino3', - time = 'all', #c(1:length(forecast_month)), - time_across = 'chunk', - merge_across_dims = TRUE, - return_vars = list(time = c('sdate', 'chunk'), - region = NULL), - retrieve = T) -) - -expect_equal( -dim(data), -c(dat = 1, var = 1, sdate = 2, region = 1, time = 2) -) -expect_equal( -data[1, 1, 1, 1, ], -data1[1, 1, 1, 1, ] -) -expect_equal( -data[1, 1, 2, 1, ], -data2[1, 1, 1, 1, ] -) - - -}) +#NOTE: The files don't exist anymore. +#test_that("1. region with different index between files", { +# +#path <- paste0('/esarchive/exp/ecearth/a35b/diags/DCPP/EC-Earth-Consortium/', +# 'EC-Earth3-HR/dcppA-hindcast/r1i1p1f1/Omon/$var$_mixed/gn/v20201107/', +# '$var$_Omon_EC-Earth3-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_$chunk$.nc') +# +## two sdates have different index for Nino3. +#region <- array('Nino3', dim = c(sdate = 2, region = 1)) +# +#suppressWarnings( +#data <- Start(dat = path, +# var = 'tosmean', +# sdate = c('1993', '2013'), +# chunk = indices(1:2), +# chunk_depends = 'sdate', +# region = region, +# time = 'all', +# time_across = 'chunk', +# merge_across_dims = TRUE, +# return_vars = list(time = c('sdate', 'chunk'), +# region = 'sdate'), +# retrieve = T) +#) +#suppressWarnings( +#data1 <- Start(dat = path, +# var = 'tosmean', +# sdate = c('1993'), +# chunk = indices(1:2), +# chunk_depends = 'sdate', +# region = 'Nino3', +# time = 'all', #c(1:length(forecast_month)), +# time_across = 'chunk', +# merge_across_dims = TRUE, +# return_vars = list(time = c('sdate', 'chunk'), +# region = NULL), +# retrieve = T) +#) +#suppressWarnings( +#data2 <- Start(dat = path, +# var = 'tosmean', +# sdate = c('2013'), +# chunk = indices(1:2), +# chunk_depends = 'sdate', +# region = 'Nino3', +# time = 'all', #c(1:length(forecast_month)), +# time_across = 'chunk', +# merge_across_dims = TRUE, +# return_vars = list(time = c('sdate', 'chunk'), +# region = NULL), +# retrieve = T) +#) +# +#expect_equal( +#dim(data), +#c(dat = 1, var = 1, sdate = 2, region = 1, time = 2) +#) +#expect_equal( +#data[1, 1, 1, 1, ], +#data1[1, 1, 1, 1, ] +#) +#expect_equal( +#data[1, 1, 2, 1, ], +#data2[1, 1, 1, 1, ] +#) +# +# +#}) test_that("2. time depends on sdate", { diff --git a/tests/testthat/test-Start-multiple-sdates.R b/tests/testthat/test-Start-multiple-sdates.R index d0c4bd3..d4ab2b2 100644 --- a/tests/testthat/test-Start-multiple-sdates.R +++ b/tests/testthat/test-Start-multiple-sdates.R @@ -7,10 +7,10 @@ context("Start() multiple sdate with split + merge dim") # It might happen when reading experimental data with many start dates, # and the corresponding observations are required to have the same data structure. -ecmwf_path_hc <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily/$var$_f24h/$sdate$/$var$_$syear$.nc') +ecmwf_path_hc <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily_mean/$var$_f6h/$sdate$/$var$_$syear$.nc') obs_path <-paste0('/esarchive/recon/ecmwf/era5/daily/$var$-r240x121/$var$_$file_date$.nc') -var_name <- 'sfcWind' +var_name <- 'tas' var100_name <- 'windagl100' sdates.seq <- c("20161222","20161229","20170105","20170112") diff --git a/tests/testthat/test-Start-split-merge.R b/tests/testthat/test-Start-split-merge.R index 8793296..7133adc 100644 --- a/tests/testthat/test-Start-split-merge.R +++ b/tests/testthat/test-Start-split-merge.R @@ -1,7 +1,7 @@ context("Start() split + merge dim and value check") -var_name <- 'sfcWind' -path.exp <- '/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily/$var$_f24h/$sdate$/$var$_$syear$.nc' +var_name <- 'tas' +path.exp <- '/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily_mean/$var$_f6h/$sdate$/$var$_$syear$.nc' suppressWarnings( hcst <- Start(dat = path.exp, @@ -24,7 +24,7 @@ file_date <- sort(unique(gsub('-', '', sapply(as.character(dates), substr, 1, 7)))) path.obs <- '/esarchive/recon/ecmwf/era5/1hourly/$var$/$var$_$file_date$.nc' - +var_name <- "sfcWind" test_that("1. split + merge + narm", { suppressWarnings( -- GitLab From d9e74f738e47df6cb1111764c64b0e279ac6444e Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 23 Aug 2023 18:54:21 +0200 Subject: [PATCH 26/32] Change unit test files to my scratch: /esarchive/scratch/aho/startR_unittest_files --- tests/testthat/test-AddStep-DimNames.R | 2 +- tests/testthat/test-Compute-CDORemap.R | 1 + tests/testthat/test-Compute-NumChunks.R | 5 +- .../testthat/test-Compute-chunk_depend_dim.R | 10 ++- tests/testthat/test-Compute-chunk_split_dim.R | 8 +- tests/testthat/test-Compute-extra_params.R | 2 + .../test-Compute-inconsistent_target_dim.R | 2 + .../testthat/test-Compute-irregular_regrid.R | 2 + tests/testthat/test-Compute-timedim.R | 1 + tests/testthat/test-Compute-transform_all.R | 7 +- .../testthat/test-Compute-transform_indices.R | 3 + .../testthat/test-Compute-transform_values.R | 38 +++++++-- tests/testthat/test-Compute-two_data.R | 4 + tests/testthat/test-Compute-use_attribute.R | 1 + .../testthat/test-Start-DCPP-across-depends.R | 14 +++- tests/testthat/test-Start-calendar.R | 11 +++ tests/testthat/test-Start-depends_values.R | 1 + .../testthat/test-Start-first_file_missing.R | 5 +- .../test-Start-global-lon-across_meridian.R | 1 + ...st-Start-implicit_dependency_by_selector.R | 2 + .../testthat/test-Start-implicit_inner_dim.R | 1 + .../testthat/test-Start-indices_list_vector.R | 15 ++-- .../testthat/test-Start-largest_dims_length.R | 6 +- .../test-Start-line_order-consistency.R | 1 + tests/testthat/test-Start-metadata_dims.R | 23 ++++-- .../test-Start-metadata_filedim_dependency.R | 1 + .../testthat/test-Start-metadata_reshaping.R | 81 +++++++++++-------- tests/testthat/test-Start-multiple-sdates.R | 6 +- .../test-Start-path_glob_permissive.R | 23 +++--- tests/testthat/test-Start-reorder-lat.R | 15 ++-- tests/testthat/test-Start-reorder-latCoarse.R | 1 + .../test-Start-reorder-lon-180to180.R | 1 + ...st-Start-reorder-lon-transform_-180to180.R | 1 + .../test-Start-reorder-lon-transform_0to360.R | 2 + tests/testthat/test-Start-reorder-lon0to360.R | 1 + .../test-Start-reorder-lon0to360Coarse.R | 1 + tests/testthat/test-Start-reorder-retrieve.R | 2 + tests/testthat/test-Start-reorder_all.R | 2 + tests/testthat/test-Start-reorder_indices.R | 2 + tests/testthat/test-Start-reshape.R | 8 +- tests/testthat/test-Start-return_vars_name.R | 1 + tests/testthat/test-Start-split-merge.R | 3 + tests/testthat/test-Start-time_unit.R | 6 +- tests/testthat/test-Start-transform-all.R | 8 +- tests/testthat/test-Start-transform-border.R | 23 +++--- .../test-Start-transform-lat-Sort-all.R | 3 +- .../test-Start-transform-lat-Sort-indices.R | 2 + .../test-Start-transform-lat-Sort-values.R | 2 + ...test-Start-transform-lon-across_meridian.R | 2 + .../testthat/test-Start-transform-metadata.R | 2 + .../test-Start-transform-three-selectors.R | 2 + tests/testthat/test-Start-two_dats.R | 2 + .../testthat/test-Start-values_list_vector.R | 20 +++-- 53 files changed, 280 insertions(+), 109 deletions(-) diff --git a/tests/testthat/test-AddStep-DimNames.R b/tests/testthat/test-AddStep-DimNames.R index 2fe6b39..647ca2f 100644 --- a/tests/testthat/test-AddStep-DimNames.R +++ b/tests/testthat/test-AddStep-DimNames.R @@ -3,7 +3,7 @@ context("Error with bad dimensions tests.") test_that("Single File - Local execution", { suppressWarnings( - data <- Start(dataset = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$sdate$$month$.nc', + data <- Start(dataset = '/esarchive/scratch/aho/startR_unittest_files/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$sdate$$month$.nc', var = 'tas', sdate = '2000', month = indices(1), diff --git a/tests/testthat/test-Compute-CDORemap.R b/tests/testthat/test-Compute-CDORemap.R index 28df234..b1479e2 100644 --- a/tests/testthat/test-Compute-CDORemap.R +++ b/tests/testthat/test-Compute-CDORemap.R @@ -3,6 +3,7 @@ context("Compute use CDORemap") test_that("ex2_3", { repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, diff --git a/tests/testthat/test-Compute-NumChunks.R b/tests/testthat/test-Compute-NumChunks.R index 9e626e4..5d9a775 100644 --- a/tests/testthat/test-Compute-NumChunks.R +++ b/tests/testthat/test-Compute-NumChunks.R @@ -3,8 +3,11 @@ context("Number of chunks tests.") test_that("Single File - Local execution", { +path <- '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$sdate$$month$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -data <- Start(dataset = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$sdate$$month$.nc', +data <- Start(dataset = path, var = 'tas', sdate = '2000', month = indices(1), diff --git a/tests/testthat/test-Compute-chunk_depend_dim.R b/tests/testthat/test-Compute-chunk_depend_dim.R index ce92b94..9c78764 100644 --- a/tests/testthat/test-Compute-chunk_depend_dim.R +++ b/tests/testthat/test-Compute-chunk_depend_dim.R @@ -12,6 +12,8 @@ path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/', 'HadGEM3-GC31-MM/dcppA-hindcast/', 'r1i1p1f2/Omon/tos/gn/v20200417/', '$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc') +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + sdates <- c('2016', '2017', '2018') # retrieve = T for verification @@ -149,7 +151,9 @@ test_that("2.a. depending dim is indices(); depended dim is indices()", { suppressWarnings( data <- Start(dat = path, var = 'tos', - sdate = indices(57:59), # 2016, 2017, 2018 +#NOTE: sdate was indices(57:59) if path is /esarchive/. Now the path is under my scratch, +# the first sdate is 2016. + sdate = indices(1:3), # 2016, 2017, 2018 chunk = indices(2:4), chunk_depends = 'sdate', time = 'all', @@ -204,7 +208,9 @@ expect_error( suppressWarnings( Start(dat = path, var = 'tos', - sdate = indices(57:59), # 2016, 2017, 2018 +#NOTE: sdate was indices(57:59) if path is /esarchive/. Now the path is under my scratch, +# the first sdate is 2016. + sdate = indices(1:3), # 2016, 2017, 2018 chunk = chunks, chunk_depends = 'sdate', time = 'all', diff --git a/tests/testthat/test-Compute-chunk_split_dim.R b/tests/testthat/test-Compute-chunk_split_dim.R index 0e82126..a40f745 100644 --- a/tests/testthat/test-Compute-chunk_split_dim.R +++ b/tests/testthat/test-Compute-chunk_split_dim.R @@ -8,6 +8,7 @@ test_that("1. The files are not repeated", { repos_exp <- paste0('/esarchive/exp/ecearth/a1tr/cmorfiles/CMIP/EC-Earth-Consortium/', 'EC-Earth3/historical/r24i1p1f1/Amon/$var$/gr/v20190312/', '$var$_Amon_EC-Earth3_historical_r24i1p1f1_gr_$sdate$01-$sdate$12.nc') +repos_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_exp) suppressWarnings( exp <- Start(dat = repos_exp, @@ -32,6 +33,8 @@ dates <- attr(exp, 'Variables')$common$time # 4 3 repos_obs <- '/esarchive/recon/ecmwf/erainterim/monthly_mean/$var$_f6h/$var$_$date$.nc' +repos_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_obs) + suppressWarnings( obs <- Start(dat = repos_obs, var = 'tas', @@ -117,7 +120,10 @@ c(lon = 3, dat = 1, var = 1, sdate = 4, time = 3, lat = 2) test_that("2. The files are repeated", { ecmwf_path_hc <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily_mean/$var$_f6h/$sdate$/$var$_$syear$.nc') -obs_path <-paste0('/esarchive/recon/ecmwf/era5/daily/$var$-r240x121/$var$_$file_date$.nc') +ecmwf_path_hc <- paste0('/esarchive/scratch/aho/startR_unittest_files/', ecmwf_path_hc) +obs_path <- paste0('/esarchive/recon/ecmwf/era5/daily/$var$-r240x121/$var$_$file_date$.nc') +obs_path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', obs_path) + sdates.seq <- c("20161222","20161229", "20170105","20170112") suppressWarnings( diff --git a/tests/testthat/test-Compute-extra_params.R b/tests/testthat/test-Compute-extra_params.R index 9b42e43..02eab30 100644 --- a/tests/testthat/test-Compute-extra_params.R +++ b/tests/testthat/test-Compute-extra_params.R @@ -7,6 +7,8 @@ test_that("ex2_6", { # Prepare sdates and paths #========================= dataset <- "/esarchive/recon/ecmwf/era5/daily_mean/$var$_f1h/$var$_$sdate$.nc" + dataset <- paste0('/esarchive/scratch/aho/startR_unittest_files/', dataset) + sdates <- paste0(1981:1982, rep(10:12, 2)) #=================== # Get daily winds diff --git a/tests/testthat/test-Compute-inconsistent_target_dim.R b/tests/testthat/test-Compute-inconsistent_target_dim.R index 13d2a44..7ebc6f5 100644 --- a/tests/testthat/test-Compute-inconsistent_target_dim.R +++ b/tests/testthat/test-Compute-inconsistent_target_dim.R @@ -6,7 +6,9 @@ context("Compute()/ByChunks(): dimension consistence check") test_that("ex2_11", { path.exp <- '/esarchive/exp/ecmwf/system5c3s/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path.exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path.exp) path.obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$date$.nc' +path.obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path.obs) var <- 'tos' y1 <- 1981 diff --git a/tests/testthat/test-Compute-irregular_regrid.R b/tests/testthat/test-Compute-irregular_regrid.R index 00a5c1d..c76793d 100644 --- a/tests/testthat/test-Compute-irregular_regrid.R +++ b/tests/testthat/test-Compute-irregular_regrid.R @@ -7,6 +7,8 @@ test_that("1. ex2_13", { path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/', 'DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/$member$/Omon/$var$/gn/v20200101/', '$var$_*_s$sdate$-$member$_gn_$aux$.nc') +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( data <- Start(dataset = path, var = 'tos', diff --git a/tests/testthat/test-Compute-timedim.R b/tests/testthat/test-Compute-timedim.R index 80d96ff..d63ae6c 100644 --- a/tests/testthat/test-Compute-timedim.R +++ b/tests/testthat/test-Compute-timedim.R @@ -3,6 +3,7 @@ context("Compute on time dimension") test_that("ex2_1", { repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, diff --git a/tests/testthat/test-Compute-transform_all.R b/tests/testthat/test-Compute-transform_all.R index e6363f4..a7a67dd 100644 --- a/tests/testthat/test-Compute-transform_all.R +++ b/tests/testthat/test-Compute-transform_all.R @@ -4,6 +4,8 @@ test_that("1. Chunk along non-lat/lon dim", { #skip_on_cran() path <- '/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/Amon/$var$/gr/v20190713/$var$_Amon_*_s$sdate$-$member$_gr_$fyear$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( data <- Start(dat = path, var = 'tos', @@ -54,8 +56,11 @@ test_that("2. chunk along lon", { #NOTE: the results are not identical when exp has extra cells = 2 +path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), diff --git a/tests/testthat/test-Compute-transform_indices.R b/tests/testthat/test-Compute-transform_indices.R index 34ddf48..37decfc 100644 --- a/tests/testthat/test-Compute-transform_indices.R +++ b/tests/testthat/test-Compute-transform_indices.R @@ -25,6 +25,7 @@ test_that("1. global", { #skip_on_cran() path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) #----------------------------------- # crop = region @@ -149,6 +150,7 @@ test_that("2. regional, no border", { #skip_on_cran() path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) # crop = region suppressWarnings( @@ -248,6 +250,7 @@ test_that("3. regional, at lon border", { #skip_on_cran() path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) # crop = region suppressWarnings( diff --git a/tests/testthat/test-Compute-transform_values.R b/tests/testthat/test-Compute-transform_values.R index e6b6c26..191d651 100644 --- a/tests/testthat/test-Compute-transform_values.R +++ b/tests/testthat/test-Compute-transform_values.R @@ -17,8 +17,11 @@ lats.max <- 90 # crop = region #NOTE: res1 and res3 differ if extra_cells = 2. But if retrieve = T, extra_cells = 2 or 8 is equal. +path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -95,6 +98,8 @@ tolerance = 0.001 # crop = region, selector is values(c()) library(easyNCDF) pathh <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +pathh <- paste0('/esarchive/scratch/aho/startR_unittest_files/', pathh) + file <- NcOpen(pathh) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -106,8 +111,11 @@ lons <- NcToArray(file, dim_indices = list(longitude = 1:1296), vars_to_read = 'longitude') NcClose(file) +path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -152,8 +160,12 @@ lons.min <- -180 lons.max <- 179.9 lats.min <- -90 lats.max <- 90 + +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -242,8 +254,11 @@ lats.min <- 20 lats.max <- 40 # crop = region +path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', #paste0(2000:2001, '0101'), ensemble = indices(1), #'all', @@ -326,8 +341,11 @@ lats.min <- 21 lats.max <- 41 # crop = region +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', #paste0(2000:2001, '0101'), ensemble = indices(1), #'all', @@ -427,8 +445,11 @@ lats.max <- 40 #NOTE: transform_extra_cells = 8 the results are not equal # crop = region +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -520,8 +541,11 @@ tolerance = 0.001 # crop = region, CircularSort(-180, 180) #NOTE: transform_extra_cells = 8 the results are not equal +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), diff --git a/tests/testthat/test-Compute-two_data.R b/tests/testthat/test-Compute-two_data.R index 9cb7145..735735f 100644 --- a/tests/testthat/test-Compute-two_data.R +++ b/tests/testthat/test-Compute-two_data.R @@ -5,6 +5,8 @@ test_that("ex2_7", { # exp data repos <- paste0('/esarchive/exp/ecmwf/system4_m1/monthly_mean/', '$var$_f6h/$var$_$sdate$.nc') + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + sdates <- sapply(2013:2014, function(x) paste0(x, sprintf('%02d', 1:12), '01')) suppressWarnings( @@ -24,6 +26,8 @@ suppressWarnings( # obs data repos_obs <- paste0('/esarchive/recon/ecmwf/erainterim/monthly_mean/', '$var$_f6h/$var$_$sdate$.nc') + repos_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_obs) + sdates_obs <- (sapply(2012:2013, function(x) paste0(x, sprintf('%02d', 1:12)))) suppressWarnings( obs <- Start(dat = repos_obs, diff --git a/tests/testthat/test-Compute-use_attribute.R b/tests/testthat/test-Compute-use_attribute.R index 2ca73a7..7ec3dc2 100644 --- a/tests/testthat/test-Compute-use_attribute.R +++ b/tests/testthat/test-Compute-use_attribute.R @@ -3,6 +3,7 @@ context("Compute use attributes") test_that("ex2_2", { repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, diff --git a/tests/testthat/test-Start-DCPP-across-depends.R b/tests/testthat/test-Start-DCPP-across-depends.R index a3d9586..c561abd 100644 --- a/tests/testthat/test-Start-DCPP-across-depends.R +++ b/tests/testthat/test-Start-DCPP-across-depends.R @@ -1,6 +1,7 @@ context("DCPP successfull retrieved for depends and across parameters.") test_that("Chunks of DCPP files- Local execution", { path <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc' + path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) sdates <- c('2017', '2018') suppressWarnings( @@ -19,24 +20,31 @@ suppressWarnings( ) # [sdate = 2, chunk = 3] +path <- "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r1i1p1f2_gn_202201-202212.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -dat_2018_chunk3 <- Start(dat = '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r1i1p1f2_gn_202201-202212.nc', +dat_2018_chunk3 <- Start(dat = path, var = 'tos', time = 'all', i = indices(1:10), j = indices(1:10), retrieve = TRUE) ) expect_equal(dat[1,1,2,25:36,,], dat_2018_chunk3[1,1,,,]) # [sdate = 1, chunk = 2] +path <- "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2017-r1i1p1f2_gn_202001-202012.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) suppressWarnings( -dat_2017_chunk2 <- Start(dat = '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2017-r1i1p1f2_gn_202001-202012.nc', +dat_2017_chunk2 <- Start(dat = path, var = 'tos', time = 'all', i = indices(1:10), j = indices(1:10), retrieve = TRUE) ) expect_equal(dat[1,1,1,13:24,,], dat_2017_chunk2[1,1,,,]) # [sdate = 2, chunk = 1] +path <- "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r1i1p1f2_gn_202001-202012.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) suppressWarnings( -dat_2018_chunk1 <- Start(dat = '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r1i1p1f2_gn_202001-202012.nc', +dat_2018_chunk1 <- Start(dat = path, var = 'tos', time = 'all', i = indices(1:10), j = indices(1:10), retrieve = TRUE) ) diff --git a/tests/testthat/test-Start-calendar.R b/tests/testthat/test-Start-calendar.R index da63e53..0ee4c5e 100644 --- a/tests/testthat/test-Start-calendar.R +++ b/tests/testthat/test-Start-calendar.R @@ -5,6 +5,7 @@ test_that("1. 360_day, daily, unit = 'days since 1850-01-01'", { 'DCPP/MOHC/HadGEM3-GC31-MM/', 'dcppA-hindcast/r1i1p1f2/day/$var$/gn/v20200417/', '$var$_day_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$fyear$.nc') +path_hadgem3 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_hadgem3) sdate <- c('2000', '2001') fyear_hadgem3 <- indices(1) @@ -48,6 +49,7 @@ expect_equal( test_that("2. 365_day, daily, unit = 'days since 1984-01-01'", { path_bcc_csm2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/BCC-CSM2-MR/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200114/$var$_day_BCC-CSM2-MR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_19800101-19891231.nc' +path_bcc_csm2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_bcc_csm2) suppressWarnings( data <- Start(dat = path_bcc_csm2, @@ -82,6 +84,7 @@ test_that("3. standard, daily, unit = 'days since 1850-1-1 00:00:00'", { 'DCPP/MPI-M/MPI-ESM1-2-HR/', 'dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200101/', '$var$_day_MPI-ESM1-2-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_$fyear$.nc') + path_mpi_esm <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_mpi_esm) var <- 'tasmax' sdate <- '2000' @@ -122,6 +125,8 @@ test_that("4. standard, monthly, unit = 'days since 1850-1-1 00:00:00'", { 'DCPP/MPI-M/MPI-ESM1-2-HR/', 'dcppA-hindcast/r1i1p1f1/Amon/$var$/gn/v20200320/', '$var$_Amon_MPI-ESM1-2-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_200011-201012.nc') + path_mpi_esm <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_mpi_esm) + sdate <- '2000' fyear_mpi_esm <- paste0(sdate, '1101-', as.numeric(sdate) + 10, '1231') @@ -154,6 +159,7 @@ expect_equal( test_that("5. proleptic_gregorian, 6hrly, unit = 'hours since 2000-11-01 00:00:00'", { repos_obs <- paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', '$var$/$var$_199405.nc') + repos_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_obs) date <- paste0('1994-05-', sprintf('%02d', 1:31), ' 00:00:00') date <- as.POSIXct(date, tz = 'UTC') # attr(date, 'tzone') <- 'UTC' @@ -189,6 +195,8 @@ expect_equal( test_that("6. standard, monthly, unit = 'months since 1850-01-01 00:00:00'", { repos_obs <- '/esarchive/obs/ukmo/hadisst_v1.1/monthly_mean/$var$/$var$_$date$.nc' + repos_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_obs) + suppressWarnings( obs <- Start(dat = repos_obs, var = 'tos', @@ -246,6 +254,8 @@ c("1960-11-16 00:00:00", "1960-12-16 12:00:00", "1961-01-16 12:00:00", "1961-02- test_that("7. proleptic_gregorian, monthly, unit = 'days since 1850-1-1 00:00:00'", { repos <- '/esarchive/exp/mpi-esm-lr/cmip5-historical_i0p1/monthly_mean/$var$/$var$_$sdate$.nc' + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( data <- Start(dat = repos, var = 'tas', @@ -278,6 +288,7 @@ suppressWarnings( test_that("8. gregorian, 3hrly, unit = 'days since 1850-1-1'", { repos <- '/esarchive/exp/CMIP5/historical/ecearth/cmip5-historical_i0p1/$var$_3hr_EC-EARTH_historical_r6i1p1_$period$.nc' + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, var = 'vas', diff --git a/tests/testthat/test-Start-depends_values.R b/tests/testthat/test-Start-depends_values.R index 18d1b9f..e4e4adc 100644 --- a/tests/testthat/test-Start-depends_values.R +++ b/tests/testthat/test-Start-depends_values.R @@ -6,6 +6,7 @@ context("Start() using values() to define dependency relations") path <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) sdates <- c('2016', '2017', '2018') chunks <- array(dim = c(chunk = 3, sdate = 3)) diff --git a/tests/testthat/test-Start-first_file_missing.R b/tests/testthat/test-Start-first_file_missing.R index 392841a..9c232e6 100644 --- a/tests/testthat/test-Start-first_file_missing.R +++ b/tests/testthat/test-Start-first_file_missing.R @@ -7,6 +7,7 @@ context("Start() retrieves files that the first file is missing") # the data. The parameter 'metadata_dims' can also be used in this case. file <- "/esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/$var$_f24h/$var$_$file_date$.nc" +file <- paste0('/esarchive/scratch/aho/startR_unittest_files/', file) var <- 'tas' sdates1 <- c('20130611') #exists sdates2 <- c('20130618') #does not exist @@ -119,7 +120,7 @@ data <- Start(dat = file, ) expect_equal( as.vector(attr(data, 'NotFoundFiles')), - c("/esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20130618.nc", NA) + c("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20130618.nc", NA) ) }) @@ -149,7 +150,7 @@ data <- Start(dat = file, ) expect_equal( as.vector(attr(data, 'NotFoundFiles')), - c(NA, "/esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20130618.nc") + c(NA, "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20130618.nc") ) }) diff --git a/tests/testthat/test-Start-global-lon-across_meridian.R b/tests/testthat/test-Start-global-lon-across_meridian.R index 34c861f..0360629 100644 --- a/tests/testthat/test-Start-global-lon-across_meridian.R +++ b/tests/testthat/test-Start-global-lon-across_meridian.R @@ -5,6 +5,7 @@ context("Start() across_meridia global lon length check") test_that("first test", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) var <- 'tas' lon.min <- 0 lon.max <- 359.723 #360 diff --git a/tests/testthat/test-Start-implicit_dependency_by_selector.R b/tests/testthat/test-Start-implicit_dependency_by_selector.R index fd82aa7..4e89190 100644 --- a/tests/testthat/test-Start-implicit_dependency_by_selector.R +++ b/tests/testthat/test-Start-implicit_dependency_by_selector.R @@ -80,6 +80,7 @@ context("Start() implicit dependency by selector dimension") test_that("2. time depends on sdate", { repos <- '/esarchive/exp/ecmwf/system4_m1/daily_mean/$var$_f24h/$var$_$sdate$.nc' +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) sdates <- paste0(2001:2003, '0501') tmp <- as.POSIXct(sapply(2001:2003, function(x) paste0(x, '-05-', sprintf('%02d', 1:31))), tz = 'UTC') tmp <- array(tmp, dim = c(time = 31, sdate = 3)) @@ -122,6 +123,7 @@ test_that("3. region depends on member and sdate", { reg <- array('Nino3.4', dim = c(sdate = 3, memb = 2, region = 1)) path_SR <- paste0('/esarchive/exp/ecearth/a42y/diags/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$memb$/Omon/$var$/gn/v*/$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$memb$_gn_$chunk$.nc') +path_SR <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_SR) suppressWarnings( data <- Start(dat = path_SR, var = 'tosmean', diff --git a/tests/testthat/test-Start-implicit_inner_dim.R b/tests/testthat/test-Start-implicit_inner_dim.R index 3788af0..fcae53e 100644 --- a/tests/testthat/test-Start-implicit_inner_dim.R +++ b/tests/testthat/test-Start-implicit_inner_dim.R @@ -8,6 +8,7 @@ context("Start() implicit inner dimension") test_that("1. time = 1", { obs.path <- "/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$file_date$.nc" +obs.path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', obs.path) variable <- "prlr" dates_file <- c("201311","201312") diff --git a/tests/testthat/test-Start-indices_list_vector.R b/tests/testthat/test-Start-indices_list_vector.R index 82e5cb1..b225a0a 100644 --- a/tests/testthat/test-Start-indices_list_vector.R +++ b/tests/testthat/test-Start-indices_list_vector.R @@ -6,12 +6,15 @@ context("List of indices and vector of indices") +repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + test_that("1. transform", { # lat and lon are lists of indices suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -36,7 +39,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of indices suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -74,7 +77,7 @@ test_that("2. no transform", { # lat and lon are lists of indices suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -93,7 +96,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of indices suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -194,7 +197,7 @@ test_that("4. no transform, indices reverse", { # lat and lon are lists of indices suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -215,7 +218,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of indices suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), diff --git a/tests/testthat/test-Start-largest_dims_length.R b/tests/testthat/test-Start-largest_dims_length.R index b448f89..6a796a2 100644 --- a/tests/testthat/test-Start-largest_dims_length.R +++ b/tests/testthat/test-Start-largest_dims_length.R @@ -7,9 +7,10 @@ test_that("1. inconsistent member length", { # system3: 40 members. repos <- list(list(name = 'system5c3s', - path = "/esarchive/exp/ecmwf/system5c3s/monthly_mean/g500_f12h/$var$_$sdate$.nc"), + path = "/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/ecmwf/system5c3s/monthly_mean/g500_f12h/$var$_$sdate$.nc"), list(name = 'system3_m1-c3s', - path = "/esarchive/exp/cmcc/system3_m1-c3s/monthly_mean/g500_f12h/$var$_$sdate$.nc")) + path = "/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/cmcc/system3_m1-c3s/monthly_mean/g500_f12h/$var$_$sdate$.nc")) + # largest_dims_length = FALSE suppressWarnings( @@ -140,6 +141,7 @@ test_that("2. inconsistent time length, merge_across_dims = T", { path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/EC-Earth3/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/', '$member$/Amon/$var$/gr/v20210309/', '$var$_Amon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc') +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) suppressWarnings( data <- Start(dataset = path, diff --git a/tests/testthat/test-Start-line_order-consistency.R b/tests/testthat/test-Start-line_order-consistency.R index 6b797a8..8bf4564 100644 --- a/tests/testthat/test-Start-line_order-consistency.R +++ b/tests/testthat/test-Start-line_order-consistency.R @@ -4,6 +4,7 @@ context("Start() line order consistency check") variable <- "tas" obs.path <- "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_$file_date$.nc" + obs.path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', obs.path) dates_file <- "201702" lats.min <- -90 diff --git a/tests/testthat/test-Start-metadata_dims.R b/tests/testthat/test-Start-metadata_dims.R index 4251c71..569a28e 100644 --- a/tests/testthat/test-Start-metadata_dims.R +++ b/tests/testthat/test-Start-metadata_dims.R @@ -2,6 +2,7 @@ context("Start() metadata_dims check") test_that("1. One data set, one var", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = list(list(name = 'system5_m1', path = repos)), var = 'tas', @@ -81,7 +82,9 @@ suppressWarnings( test_that("2. Two data sets, one var", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos2) suppressWarnings( data <- Start(dat = list(list(name = 'system4_m1', path = repos2), list(name = 'system5_m1', path = repos)), @@ -186,6 +189,7 @@ test_that("3. One data set, two vars", { repos <- paste0('/esarchive/exp/ecearth/a1tr/cmorfiles/CMIP/EC-Earth-Consortium/', 'EC-Earth3/historical/r24i1p1f1/Amon/$var$/gr/v20190312/', '$var$_Amon_EC-Earth3_historical_r24i1p1f1_gr_185001-185012.nc') + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) var <- c('tas', 'clt') suppressWarnings( data <- Start(dat = repos, @@ -271,7 +275,9 @@ suppressWarnings( test_that("4. Two data sets, two vars", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos2) suppressWarnings( data <- Start(dat = list(list(name = 'system4_m1', path = repos2), list(name = 'system5_m1', path = repos)), @@ -527,7 +533,9 @@ suppressWarnings( test_that("5. Specify metadata_dims with another file dimension", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos2) suppressWarnings( data <- Start(dat = list(list(name = 'system4_m1', path = repos2), list(name = 'system5_m1', path = repos)), @@ -733,6 +741,7 @@ suppressWarnings( test_that("6. One data set, two vars from one file", { mask_path <- '/esarchive/autosubmit/con_files/mask.regions.Ec3.0_O1L46.nc' +mask_path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', mask_path) suppressWarnings( data <- Start(repos = mask_path, var = c('nav_lon', 'nav_lat'), @@ -781,8 +790,10 @@ data <- Start(repos = mask_path, test_that("7. Two data sets, while one is missing", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) # incorrect path. Therefore repos2 doesn't have any valid files repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f2h/$var$_$sdate$.nc" # correct one is _f6h + repos2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos2) var <- 'tas' suppressWarnings( data <- Start(dat = list(list(name = 'system4_m1', path = repos2), @@ -824,7 +835,7 @@ suppressWarnings( ) expect_equal( attr(data, 'Files'), - array(c(NA, "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20170101.nc"), + array(c(NA, "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20170101.nc"), dim = c(dat = 2, var = 1, sdate = 1)) ) @@ -865,7 +876,7 @@ suppressWarnings( ) expect_equal( attr(dataF, 'ExpectedFiles'), - array(c("/esarchive/exp/ecmwf/system4_m1/monthly_mean/tas_f2h/tas_20170101.nc", "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20170101.nc"), + array(c("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system4_m1/monthly_mean/tas_f2h/tas_20170101.nc", "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20170101.nc"), dim = c(dat = 2, var = 1, sdate = 1)) ) @@ -875,12 +886,12 @@ suppressWarnings( test_that("8. Two data sets, both have files but the first file is missing", { path_list <- list( MPI = list(name = 'MPI_ESM', - path = paste0('/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/', + path = paste0('/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/', 'DCPP/MPI-M/MPI-ESM1-2-HR/', 'dcppA-hindcast/$member$/day/$var$/gn/v20200101/', '$var$_day_MPI-ESM1-2-HR_dcppA-hindcast_s$sdate$-$member$_gn_$chunk$.nc')), Had = list(name = 'HadGEM3', - path = paste0('/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/', + path = paste0('/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/', 'DCPP/MOHC/HadGEM3-GC31-MM/', 'dcppA-hindcast/$member$/day/$var$/gn/v20200417/', '$var$_day_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-$member$_gn_$chunk$.nc'))) @@ -927,9 +938,9 @@ data <- Start(dataset = path_list, ) expect_equal( attr(data, 'Files'), - array(c("/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/r1i1p1f1/day/tasmin/gn/v20200101/tasmin_day_MPI-ESM1-2-HR_dcppA-hindcast_s2018-r1i1p1f1_gn_20181101-20281231.nc", + array(c("/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/r1i1p1f1/day/tasmin/gn/v20200101/tasmin_day_MPI-ESM1-2-HR_dcppA-hindcast_s2018-r1i1p1f1_gn_20181101-20281231.nc", NA, NA, NA, NA, NA, NA, - "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r2i1p1f2/day/tasmin/gn/v20200417/tasmin_day_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r2i1p1f2_gn_20181101-20181230.nc"), + "/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r2i1p1f2/day/tasmin/gn/v20200417/tasmin_day_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r2i1p1f2_gn_20181101-20181230.nc"), dim = c(dataset = 2, var = 1, member = 2, sdate = 1, chunk = 2)) ) diff --git a/tests/testthat/test-Start-metadata_filedim_dependency.R b/tests/testthat/test-Start-metadata_filedim_dependency.R index cfd7dfb..13cac47 100644 --- a/tests/testthat/test-Start-metadata_filedim_dependency.R +++ b/tests/testthat/test-Start-metadata_filedim_dependency.R @@ -6,6 +6,7 @@ context("Start() metadata filedim dependency") # Preparation: Get the time values repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index 7e9c280..92e831b 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -3,8 +3,11 @@ context("Start() metadata reshaping") test_that("1. time across fyear, fyear depends on sdate", { +repos <- "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +data <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -54,7 +57,7 @@ as.vector(seq(as.POSIXct('1962-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1962-12 # retrieve = FALSE suppressWarnings( -dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +dataF <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -82,8 +85,11 @@ dates test_that("2. time across fyear, only one sdate", { +repos <- "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +data <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -118,7 +124,7 @@ as.vector(seq(as.POSIXct('1961-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1961-12 #retrieve = FALSE suppressWarnings( -dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +dataF <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -145,8 +151,11 @@ dates test_that("3. time across fyear, fyear depends on sdate, 1st fyear is empty, 3rd fyear has more indices than 2nd one, 1964 is leap year", { +repos <- "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +data <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -193,7 +202,7 @@ as.vector(seq(as.POSIXct('1963-10-01 12:00:00', tz = 'UTC'), as.POSIXct('1964-03 suppressWarnings( -dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +dataF <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -228,9 +237,10 @@ datess <- as.POSIXct(array(datess, dim = c(time = 31, sdate = 8)), dates_file <- sort(unique(gsub('-', '', sapply(as.character(datess), substr, 1, 7)))) +repos <- "/esarchive/recon/ecmwf/erainterim/6hourly/$var$/$var$_$file_date$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( - data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), + data <- Start(dat = repos, var = 'tas', file_date = dates_file, time = values(datess), #[time = 31, sdate = 8] @@ -277,8 +287,7 @@ as.vector(datess) ) suppressWarnings( - dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), + dataF <- Start(dat = repos, var = 'tas', file_date = dates_file, time = values(datess), #[time = 31, sdate = 8] @@ -310,8 +319,12 @@ dates test_that("5. test 1 but merge_across_dims_narm = F", { + +repos <- "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +data <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -361,7 +374,7 @@ c(as.vector(seq(as.POSIXct('1962-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1962- ) suppressWarnings( -dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +dataF <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -392,9 +405,11 @@ test_that("6. split time dim only", { datess <- seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-07-14', tz = 'UTC'), by = 'days') datess <- as.POSIXct(array(datess, dim = c(time = 7, week = 2)), origin = '1970-01-01', tz = 'UTC') +repos <- '/esarchive/recon/ecmwf/erainterim/6hourly/$var$/$var$_199407.nc' +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_199407.nc'), +data <- Start(dat = repos, var = 'tas', # file_date = '199407', time = values(datess), #[time = 7, week = 2] @@ -439,8 +454,7 @@ as.vector(seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-07-14', tz ) suppressWarnings( -dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_199407.nc'), +dataF <- Start(dat = repos, var = 'tas', # file_date = '199407', time = values(datess), #[time = 7, week = 2] @@ -470,9 +484,10 @@ datess <- seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-08-31', tz datess <- as.POSIXct(array(datess, dim = c(time = 31, month = 2)), origin = '1970-01-01', tz = 'UTC') +repos <- '/esarchive/recon/ecmwf/erainterim/6hourly/$var$/$var$_$file_date$.nc' +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( -data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +data <- Start(dat = repos, var = 'tas', file_date = c('199407', '199408'), time = values(datess), #[time = 31, month = 2] @@ -518,8 +533,7 @@ as.vector(seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-08-31', tz ) suppressWarnings( -dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +dataF <- Start(dat = repos, var = 'tas', file_date = c('199407', '199408'), time = values(datess), #[time = 31, month = 2] @@ -549,9 +563,10 @@ test_that("8. split sdate dim", { file_date <- array(c(paste0(1993:1995, '07'), paste0(1993:1995, '08')), dim = c(syear = 3, smonth = 2)) +repos <- '/esarchive/recon/ecmwf/erainterim/6hourly/$var$/$var$_$file_date$.nc' +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( -data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +data <- Start(dat = repos, var = 'tas', file_date = file_date, #[syear = 3, smonth = 2] time = indices(1:2), @@ -596,8 +611,7 @@ seq(as.POSIXct('1993-08-01 06:00:00', tz = 'UTC'), as.POSIXct('1995-08-01 06:00: ) suppressWarnings( -dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +dataF <- Start(dat = repos, var = 'tas', file_date = file_date, #[syear = 3, smonth = 2] time = indices(1:2), @@ -617,8 +631,7 @@ dates # no return_vars suppressWarnings( -data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +data <- Start(dat = repos, var = 'tas', file_date = file_date, #[syear = 3, smonth = 2] time = indices(1:2), @@ -636,8 +649,7 @@ names(attributes(data)$Variables$common), ) suppressWarnings( -data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +data <- Start(dat = repos, var = 'tas', file_date = file_date, #[syear = 3, smonth = 2] time = indices(1:2), @@ -660,8 +672,11 @@ test_that("9. split file dim that contains 'time', and 'time' inner dim is impli dates_arr <- array(c(paste0(1961, '0', 1:5), paste0(1962, '0', 1:5)), dim = c(time = 5, syear = 2)) +repos <- "/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$file_date$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$file_date$.nc', +data <- Start(dat = repos, var = 'tas', file_date = dates_arr, # [syear, time] split_multiselected_dims = TRUE, @@ -704,7 +719,7 @@ as.vector(dates_arr) suppressWarnings( -dataF <- Start(dat = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$file_date$.nc', +dataF <- Start(dat = repos, var = 'tas', file_date = dates_arr, # [syear, time] split_multiselected_dims = TRUE, @@ -735,8 +750,10 @@ y2 <- seq(a, b, by = 'days') y2 <- y2[-3] # remove 28 Feb time_array <- array(c(y1, y2), dim = c(time = 3, file_date = 2)) time_array <- as.POSIXct(time_array, origin = '1970-01-01', tz = 'UTC') +repos <- "/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$file_date$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( -data <- Start(dat = "/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$file_date$.nc", +data <- Start(dat = repos, var = "tas", file_date = paste0(1994:1995, '1101'), #1996 is leap year time = time_array, #[time = 3, file_date = 2] @@ -772,7 +789,7 @@ as.vector(time_array) ) suppressWarnings( -dataF <- Start(dat = "/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$file_date$.nc", +dataF <- Start(dat = repos, var = "tas", file_date = paste0(1994:1995, '1101'), #1996 is leap year time = time_array, #[time = 3, file_date = 2] diff --git a/tests/testthat/test-Start-multiple-sdates.R b/tests/testthat/test-Start-multiple-sdates.R index d4ab2b2..6467a84 100644 --- a/tests/testthat/test-Start-multiple-sdates.R +++ b/tests/testthat/test-Start-multiple-sdates.R @@ -8,7 +8,9 @@ context("Start() multiple sdate with split + merge dim") # and the corresponding observations are required to have the same data structure. ecmwf_path_hc <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily_mean/$var$_f6h/$sdate$/$var$_$syear$.nc') +ecmwf_path_hc <- paste0('/esarchive/scratch/aho/startR_unittest_files/', ecmwf_path_hc) obs_path <-paste0('/esarchive/recon/ecmwf/era5/daily/$var$-r240x121/$var$_$file_date$.nc') +obs_path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', obs_path) var_name <- 'tas' var100_name <- 'windagl100' @@ -55,7 +57,7 @@ obs <- Start(dat = obs_path, ) expect_equal( dim(obs), - c(dat = 1, var = 1, latitude = 1, longitude = 1, sdate = 4, syear = 20, time = 47) + c(dat = 1, var = 1, latitude = 1, longitude = 1, sdate = 4, syear = 20, time = 46) ) expect_equal( obs[1, 1, 1, 1, 1, 1, 8:15], @@ -131,7 +133,7 @@ obs <- Start(dat = obs_path, expect_equal( dim(obs), - c(dat = 1, var = 1, latitude = 1, longitude = 1, sdate = 4, syear = 20, time = 47) + c(dat = 1, var = 1, latitude = 1, longitude = 1, sdate = 4, syear = 20, time = 46) ) expect_equal( obs[1, 1, 1, 1, 1, 1, 8:15], diff --git a/tests/testthat/test-Start-path_glob_permissive.R b/tests/testthat/test-Start-path_glob_permissive.R index ddd69be..e32d0b3 100644 --- a/tests/testthat/test-Start-path_glob_permissive.R +++ b/tests/testthat/test-Start-path_glob_permissive.R @@ -8,6 +8,7 @@ years <- paste0(c(1960:1961), '01-', c(1960:1961), '12') repos <- paste0('/esarchive/exp/ecearth/$expid$/diags/CMIP/EC-Earth-Consortium/', 'EC-Earth3/historical/*/Omon/$var$/gn/v*/', '$var$_Omon_EC-Earth3_historical_*_gn_$year$.nc') +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, var = 'tosmean', @@ -30,10 +31,10 @@ data <- Start(dat = repos, ) expect_equal( attr(data, 'Files'), - array(c("/esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196001-196012.nc", - "/esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196001-196012.nc", - "/esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196101-196112.nc", - "/esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196101-196112.nc"), + array(c("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196001-196012.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196001-196012.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196101-196112.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196101-196112.nc"), dim = c(dat = 1, var = 1, expid = 2, year = 2)) ) # NOTE: in R_3.2.0, the following test doesn't have dimension. In R_3.6.2 it does. @@ -48,6 +49,7 @@ data <- Start(dat = repos, repos <- paste0('/esarchive/exp/ecearth/$expid$/diags/CMIP/EC-Earth-Consortium/', 'EC-Earth3/historical/$member$/Omon/$var$/gn/v*/', '$var$_Omon_EC-Earth3_historical_*_gn_$year$.nc') +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, var = 'tosmean', @@ -72,10 +74,10 @@ data <- Start(dat = repos, ) expect_equal( attr(data, 'Files'), - array(c("/esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196001-196012.nc", - "/esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196001-196012.nc", - "/esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196101-196112.nc", - "/esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196101-196112.nc"), + array(c("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196001-196012.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196001-196012.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196101-196112.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196101-196112.nc"), dim = c(dat = 1, var = 1, expid = 2, year = 2, member = 1)) ) # NOTE: in R_3.2.0, the following test doesn't have dimension. In R_3.6.2 it does. @@ -138,6 +140,7 @@ test_that("2. tag at the end", { path <- "/esarchive/exp/ecmwf/system4_m1/6hourly/$var$/$var$_$year$0*.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) suppressWarnings( data <- Start(dat = path, var = "tas", @@ -152,8 +155,8 @@ data <- Start(dat = path, expect_equal( as.list(attr(data, 'ExpectedFiles')), -list("/esarchive/exp/ecmwf/system4_m1/6hourly/tas/tas_19940501.nc", - "/esarchive/exp/ecmwf/system4_m1/6hourly/tas/tas_19950101.nc") +list("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system4_m1/6hourly/tas/tas_19940501.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system4_m1/6hourly/tas/tas_19950101.nc") ) }) diff --git a/tests/testthat/test-Start-reorder-lat.R b/tests/testthat/test-Start-reorder-lat.R index 2fe5de9..0ac7701 100644 --- a/tests/testthat/test-Start-reorder-lat.R +++ b/tests/testthat/test-Start-reorder-lat.R @@ -13,6 +13,7 @@ context("Start() lat Reorder test") ############################################## path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) ## Origin longitude in file: [0:360] @@ -884,8 +885,10 @@ test_that("4-x-2-12-123-2-1-x", { # 1-1. no Sort(), NULL ## lat should be descending +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) suppressWarnings( -exp1_1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1_1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -904,7 +907,7 @@ exp1_1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$v # 1-2. Sort(), NULL ## lat should be ascending suppressWarnings( -exp1_2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1_2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -923,7 +926,7 @@ exp1_2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$v # 1-3. Sort(drcreasing = T), NULL ## lat should be descending suppressWarnings( -exp1_3 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1_3 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -971,7 +974,7 @@ as.vector(attr(exp1_3, 'Variables')$common$latitude) # 2-1. no Sort(), 'dat' ## lat should be descending suppressWarnings( -exp2_1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2_1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -990,7 +993,7 @@ exp2_1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$v # 2-2. Sort(), 'dat' ## lat should be ascending suppressWarnings( -exp2_2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2_2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -1009,7 +1012,7 @@ exp2_2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$v # 2-3. Sort(drcreasing = T), NULL ## lat should be descending suppressWarnings( -exp2_3 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2_3 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), diff --git a/tests/testthat/test-Start-reorder-latCoarse.R b/tests/testthat/test-Start-reorder-latCoarse.R index 6ca7b15..af9c2db 100644 --- a/tests/testthat/test-Start-reorder-latCoarse.R +++ b/tests/testthat/test-Start-reorder-latCoarse.R @@ -16,6 +16,7 @@ context("Start() lat Reorder test") ############################################## #path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' path_exp <- '/esarchive/exp/ncar/cesm-dple/monthly_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) ## Origin longitude in file: [0:358.75] step 1.25 degrees #288 values ## latitude: -90 o 90 {-90, -89.05759 ...} #192 values ############################################## diff --git a/tests/testthat/test-Start-reorder-lon-180to180.R b/tests/testthat/test-Start-reorder-lon-180to180.R index aa209b8..e0a066c 100644 --- a/tests/testthat/test-Start-reorder-lon-180to180.R +++ b/tests/testthat/test-Start-reorder-lon-180to180.R @@ -15,6 +15,7 @@ context("Start() lon Reorder non-transform -180to180 test") ## Origin longitude in file: [-179.71875:180] path_exp <- '/esarchive/recon/ecmwf/era5/original_files/reorder/daily_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) variable <- 'tas' sdate <- '199212' diff --git a/tests/testthat/test-Start-reorder-lon-transform_-180to180.R b/tests/testthat/test-Start-reorder-lon-transform_-180to180.R index 4351aa4..46da00e 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_-180to180.R +++ b/tests/testthat/test-Start-reorder-lon-transform_-180to180.R @@ -14,6 +14,7 @@ context("Start() lon Reorder transform -180to180 test") # 3-2 ## Origin longitude in file: [-179.71875:180] path_exp <- '/esarchive/recon/ecmwf/era5/original_files/reorder/daily_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) variable <- 'tas' sdate <- '199212' diff --git a/tests/testthat/test-Start-reorder-lon-transform_0to360.R b/tests/testthat/test-Start-reorder-lon-transform_0to360.R index 3d2047e..e05c731 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_0to360.R +++ b/tests/testthat/test-Start-reorder-lon-transform_0to360.R @@ -14,6 +14,8 @@ context("Start() lon Reorder transform 0to360 test") # 3-2 ## Origin longitude in file: [0:360] path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) + variable <- 'psl' sdate <- '19821201' diff --git a/tests/testthat/test-Start-reorder-lon0to360.R b/tests/testthat/test-Start-reorder-lon0to360.R index 340860a..84b0527 100644 --- a/tests/testthat/test-Start-reorder-lon0to360.R +++ b/tests/testthat/test-Start-reorder-lon0to360.R @@ -13,6 +13,7 @@ context("Start() lon Reorder non-transform 0to360 test") ############################################## # 3-2 path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) ## Origin longitude in file: [0:359.722222222222] diff --git a/tests/testthat/test-Start-reorder-lon0to360Coarse.R b/tests/testthat/test-Start-reorder-lon0to360Coarse.R index e093a88..16ad2e0 100644 --- a/tests/testthat/test-Start-reorder-lon0to360Coarse.R +++ b/tests/testthat/test-Start-reorder-lon0to360Coarse.R @@ -13,6 +13,7 @@ context("Start() lon Reorder non-transform 0to360 test") ############################################## # 3-2 path_exp <- '/esarchive/exp/ncar/cesm-dple/monthly_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) ## Origin longitude in file: [0:358.75] step 1.25 degrees #288 values ## latitude: -90 o 90 {-90, -89.05759 ...} #192 values diff --git a/tests/testthat/test-Start-reorder-retrieve.R b/tests/testthat/test-Start-reorder-retrieve.R index 42a79ce..28d8c79 100644 --- a/tests/testthat/test-Start-reorder-retrieve.R +++ b/tests/testthat/test-Start-reorder-retrieve.R @@ -7,6 +7,7 @@ context("Start() lon Reorder non-transform retrieve test") test_that("original range 0to360", { ## Origin longitude in file: [0:359.722222222222] path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) lons.min <- -2 lons.max <- 2 @@ -86,6 +87,7 @@ res2 <- Start(dat = path_exp, test_that("original range -180to180", { ## Origin longitude in file: [0:359.722222222222] path_exp <- '/esarchive/recon/ecmwf/era5/original_files/reorder/daily_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) variable <- 'tas' sdate <- '199212' diff --git a/tests/testthat/test-Start-reorder_all.R b/tests/testthat/test-Start-reorder_all.R index b8279de..fce2dc4 100644 --- a/tests/testthat/test-Start-reorder_all.R +++ b/tests/testthat/test-Start-reorder_all.R @@ -7,6 +7,7 @@ context("No transform, reorder test: 'all'") # cdo is used to verify the data values library(easyNCDF) path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -23,6 +24,7 @@ NcClose(file) #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. lat", { # lon_reorder = CircularSort(0, 360) diff --git a/tests/testthat/test-Start-reorder_indices.R b/tests/testthat/test-Start-reorder_indices.R index b2ca0ac..4027b78 100644 --- a/tests/testthat/test-Start-reorder_indices.R +++ b/tests/testthat/test-Start-reorder_indices.R @@ -6,6 +6,7 @@ context("No transform, reorder test: indices()") # cdo is used to verify the data values library(easyNCDF) path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -22,6 +23,7 @@ NcClose(file) #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. lat", { # lon_reorder = CircularSort(0, 360) diff --git a/tests/testthat/test-Start-reshape.R b/tests/testthat/test-Start-reshape.R index 3d576d8..fc7acb6 100644 --- a/tests/testthat/test-Start-reshape.R +++ b/tests/testthat/test-Start-reshape.R @@ -2,7 +2,9 @@ context("Start() reshape parameters check") # This one is more comprehensive than test-Start-split-merge.R path_exp <- '/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) path_obs <- '/esarchive/recon/ecmwf/era5/daily_mean/$var$_f1h-r360x181/$var$_$date$.nc' +path_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_obs) var <- 'tas' sdate <- paste0(1993:1995, '1201') @@ -31,7 +33,7 @@ easy_sdate <- c('199312', paste0(rep(1994:1995, each = 3), c('01', '02', '12')), easy_array <- c() for (i in 1:length(easy_sdate)) { - easy_file <- NcOpen(paste0('/esarchive/recon/ecmwf/era5/daily_mean/tas_f1h-r360x181/tas_', + easy_file <- NcOpen(paste0('/esarchive/scratch/aho/startR_unittest_files//esarchive/recon/ecmwf/era5/daily_mean/tas_f1h-r360x181/tas_', easy_sdate[i], '.nc')) if (substr(easy_sdate[i], 5, 6) == '02') { sub_time <- 1:28 @@ -400,7 +402,7 @@ exp1 <- Start(dat = path_exp, ) # easyNCDF easy_sdate_exp <- '19931201' -easy_file_exp <- NcOpen(paste0('/esarchive/exp/ecmwf/system5c3s/daily_mean/tas_f6h/tas_', +easy_file_exp <- NcOpen(paste0('/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system5c3s/daily_mean/tas_f6h/tas_', easy_sdate_exp, '.nc')) easy_exp <- NcToArray(easy_file_exp, vars_to_read = 'tas', dim_indices = list(longitude = c(1), latitude = c(1), ensemble = c(1), @@ -490,7 +492,7 @@ obs2 <- Start(dat = path_obs, retrieve = TRUE) ) # easyNCDF -easy_file_199311 <- NcOpen(paste0('/esarchive/recon/ecmwf/era5/daily_mean/tas_f1h-r360x181/tas_', +easy_file_199311 <- NcOpen(paste0('/esarchive/scratch/aho/startR_unittest_files//esarchive/recon/ecmwf/era5/daily_mean/tas_f1h-r360x181/tas_', '199311', '.nc')) easy_obs_199311 <- NcToArray(easy_file_199311, vars_to_read = 'tas', dim_indices = list(lon = c(1), lat = c(1), time = 1:30)) diff --git a/tests/testthat/test-Start-return_vars_name.R b/tests/testthat/test-Start-return_vars_name.R index 4bf83c6..e97023d 100644 --- a/tests/testthat/test-Start-return_vars_name.R +++ b/tests/testthat/test-Start-return_vars_name.R @@ -3,6 +3,7 @@ context("Start() return_vars name") # be used but will be changed back to the inner dim names. repos_obs <- '/esarchive/obs/ukmo/hadisst_v1.1/monthly_mean/$var$/$var$_$date$.nc' +repos_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_obs) #--------------------------------------------------------------- diff --git a/tests/testthat/test-Start-split-merge.R b/tests/testthat/test-Start-split-merge.R index 7133adc..d95fa62 100644 --- a/tests/testthat/test-Start-split-merge.R +++ b/tests/testthat/test-Start-split-merge.R @@ -2,6 +2,7 @@ context("Start() split + merge dim and value check") var_name <- 'tas' path.exp <- '/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily_mean/$var$_f6h/$sdate$/$var$_$syear$.nc' +path.exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path.exp) suppressWarnings( hcst <- Start(dat = path.exp, @@ -24,6 +25,7 @@ file_date <- sort(unique(gsub('-', '', sapply(as.character(dates), substr, 1, 7)))) path.obs <- '/esarchive/recon/ecmwf/era5/1hourly/$var$/$var$_$file_date$.nc' +path.obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path.obs) var_name <- "sfcWind" test_that("1. split + merge + narm", { @@ -148,6 +150,7 @@ obs <- Start(dat = path.obs, test_that("4. split only", { obs.path <- "/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$file_date$.nc" +obs.path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', obs.path) variable <- "prlr" dates_file <- c("201311","201312","201411","201412") dim(dates_file) <- c(smonth = 2, syear = 2) diff --git a/tests/testthat/test-Start-time_unit.R b/tests/testthat/test-Start-time_unit.R index 3aa1930..a05a42a 100644 --- a/tests/testthat/test-Start-time_unit.R +++ b/tests/testthat/test-Start-time_unit.R @@ -4,7 +4,7 @@ test_that("1. The data has units like time", { suppressWarnings( -FD <- Start(dat = '/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var$_MON.nc', +FD <- Start(dat = '/esarchive/scratch/aho/startR_unittest_files/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var$_MON.nc', var = 'FD', # units: days time = indices(1), longitude = indices(1), @@ -14,7 +14,7 @@ FD <- Start(dat = '/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var retrieve = TRUE) ) suppressWarnings( -FD2 <- Start(dat = '/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var$_MON.nc', +FD2 <- Start(dat = '/esarchive/scratch/aho/startR_unittest_files/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var$_MON.nc', var = 'FD', # units: days time = indices(1), longitude = indices(1), @@ -39,7 +39,7 @@ test_that("2. The metadata variable name is not time", { # VITIGEOOS vari <- "rsds" -anlgs <- paste0("/esarchive/oper/VITIGEOSS","/output/cfsv2/weekly_mean/", +anlgs <- paste0("/esarchive/scratch/aho/startR_unittest_files/esarchive/oper/VITIGEOSS","/output/cfsv2/weekly_mean/", "$var$/$var$-vitigeoss-cat","_1999-2018_", "$file_date$.nc") file_date_array <- array(dim = c(sweek = 2, sday = 3)) diff --git a/tests/testthat/test-Start-transform-all.R b/tests/testthat/test-Start-transform-all.R index 8a9ca65..7fbac55 100644 --- a/tests/testthat/test-Start-transform-all.R +++ b/tests/testthat/test-Start-transform-all.R @@ -9,8 +9,8 @@ context("Transform test target grid: lon and lat = 'all'") # cdo is used to verify the data values # Test 1: original grid 'r360x180' library(easyNCDF) -grid1 <- '/esarchive/exp/CMIP6/dcppA-hindcast/CanESM5/DCPP/CCCma/CanESM5/dcppA-hindcast/r1i1p2f1/Omon/tos/gr/v20190429/tos_Omon_CanESM5_dcppA-hindcast_s1980-r1i1p2f1_gr_198101-199012.nc' # 'r128x64' -path <- '/esarchive/exp/CMIP6/dcppA-hindcast/CESM1-1-CAM5-CMIP5/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/r1i1p1f1/Omon/tos/gr/v20191016/tos_Omon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s2015-r1i1p1f1_gr_201511-202512.nc' # 'r360x180' +grid1 <- '/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/CanESM5/DCPP/CCCma/CanESM5/dcppA-hindcast/r1i1p2f1/Omon/tos/gr/v20190429/tos_Omon_CanESM5_dcppA-hindcast_s1980-r1i1p2f1_gr_198101-199012.nc' # 'r128x64' +path <- '/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/CESM1-1-CAM5-CMIP5/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/r1i1p1f1/Omon/tos/gr/v20191016/tos_Omon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s2015-r1i1p1f1_gr_201511-202512.nc' # 'r360x180' file <- NcOpen(path) arr <- NcToArray(file, @@ -36,6 +36,8 @@ suppressWarnings( #--------------------------------------------------------------- # Test 2: Original grid 'r432x324' path <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Amon/tas/gn/v20200417/tas_Amon_HadGEM3-GC31-MM_dcppA-hindcast_s2009-r1i1p1f2_gn_201501-201512.nc' # 'r432x324' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(lat = 1:324, lon = 1:432, time = 1:2), @@ -53,6 +55,7 @@ suppressWarnings( #--------------------------------------------------------------- path1 <- '/esarchive/exp/CMIP6/dcppA-hindcast/CESM1-1-CAM5-CMIP5/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/r1i1p1f1/Omon/$var$/gr/v20191016/$var$_Omon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s$sdate$-r1i1p1f1_gr_$sdate$11-202512.nc' # 'r360x180' +path1 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path1) test_that("1. 'all'", { suppressWarnings( @@ -108,6 +111,7 @@ test_that("1. 'all'", { #--------------------------------------------------------------- path2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Amon/$var$/gn/v20200417/$var$_Amon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$sdate$11-201512.nc' # 'r432x324' +path2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path2) test_that("2. test path 2", { suppressWarnings( diff --git a/tests/testthat/test-Start-transform-border.R b/tests/testthat/test-Start-transform-border.R index dee8b4e..90b48b6 100644 --- a/tests/testthat/test-Start-transform-border.R +++ b/tests/testthat/test-Start-transform-border.R @@ -26,6 +26,9 @@ context("Transform: check with cdo") # The result of cdo is from CDO version 1.9.8. +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + ############################################## test_that("1. normal regional situation", { @@ -35,7 +38,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -99,7 +102,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -173,7 +176,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -251,7 +254,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -308,7 +311,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -386,7 +389,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -456,7 +459,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -517,7 +520,7 @@ lons.min <- 0 lons.max <- 359 suppressWarnings( - exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', + exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -617,7 +620,7 @@ lons.min <- 0.5 lons.max <- 359.9 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -709,4 +712,4 @@ expect_equal( # [8,] 299.2109 300.3170 300.1524 299.6214 298.8563 # [9,] 299.4723 299.9515 299.4566 299.0601 299.5071 # [10,] 299.5299 299.7573 299.0317 299.1104 300.0644 -############################################## \ No newline at end of file +############################################## diff --git a/tests/testthat/test-Start-transform-lat-Sort-all.R b/tests/testthat/test-Start-transform-lat-Sort-all.R index b41ec0a..3852da9 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-all.R +++ b/tests/testthat/test-Start-transform-lat-Sort-all.R @@ -9,7 +9,7 @@ context("Transform and lat_reorder test: 'all'") #--------------------------------------------------------------- # cdo is used to verify the data values library(easyNCDF) -path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +path <- "/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -27,6 +27,7 @@ arr2 <- s2dv::CDORemap(arr, lons = as.vector(lons), lats = as.vector(lats), #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. 'all'", { diff --git a/tests/testthat/test-Start-transform-lat-Sort-indices.R b/tests/testthat/test-Start-transform-lat-Sort-indices.R index 6c3a797..f729545 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-indices.R +++ b/tests/testthat/test-Start-transform-lat-Sort-indices.R @@ -15,6 +15,7 @@ context("Transform and lat_reorder test: indices") # cdo is used to verify the data values library(easyNCDF) path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -32,6 +33,7 @@ arr2 <- s2dv::CDORemap(arr, lons = as.vector(lons), lats = as.vector(lats), #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. indices(1:640)", { diff --git a/tests/testthat/test-Start-transform-lat-Sort-values.R b/tests/testthat/test-Start-transform-lat-Sort-values.R index 92490ae..0333101 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-values.R +++ b/tests/testthat/test-Start-transform-lat-Sort-values.R @@ -13,6 +13,7 @@ context("Transform and lat_reorder test: values") # cdo is used to verify the data values library(easyNCDF) pathh <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +pathh <- paste0('/esarchive/scratch/aho/startR_unittest_files/', pathh) file <- NcOpen(pathh) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -30,6 +31,7 @@ arr2 <- s2dv::CDORemap(arr, lons = as.vector(lons), lats = as.vector(lats), #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. values(list(-90, 90))", { diff --git a/tests/testthat/test-Start-transform-lon-across_meridian.R b/tests/testthat/test-Start-transform-lon-across_meridian.R index f164046..d3c3dfa 100644 --- a/tests/testthat/test-Start-transform-lon-across_meridian.R +++ b/tests/testthat/test-Start-transform-lon-across_meridian.R @@ -5,6 +5,8 @@ context("Start() transform across_meridian lon order check") test_that("first test", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + var <- 'tas' lon.min <- 170 lon.max <- 185 #359.723 #360 diff --git a/tests/testthat/test-Start-transform-metadata.R b/tests/testthat/test-Start-transform-metadata.R index ede3c95..62d31da 100644 --- a/tests/testthat/test-Start-transform-metadata.R +++ b/tests/testthat/test-Start-transform-metadata.R @@ -5,6 +5,7 @@ test_that("1. Sort() and CircularSort(0, 360)", { # Original lon is [-180, 180] path_exp <- '/esarchive/recon/ecmwf/era5/original_files/reorder/daily_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) lons.min <- -10 lons.max <- 10 lats.min <- 10 @@ -140,6 +141,7 @@ test_that("2. Sort(decreasing = TRUE) and CircularSort(-180, 180)", { # Original lon is [0, 360] path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) lons.min <- 190 lons.max <- 200 lats.min <- 10 diff --git a/tests/testthat/test-Start-transform-three-selectors.R b/tests/testthat/test-Start-transform-three-selectors.R index 657cca3..500168e 100644 --- a/tests/testthat/test-Start-transform-three-selectors.R +++ b/tests/testthat/test-Start-transform-three-selectors.R @@ -14,6 +14,7 @@ context("Transform: three selector forms") # cdo is used to verify the data values library(easyNCDF) path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -31,6 +32,7 @@ arr2 <- s2dv::CDORemap(arr, lons = as.vector(lons), lats = as.vector(lats), #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. indices", { diff --git a/tests/testthat/test-Start-two_dats.R b/tests/testthat/test-Start-two_dats.R index ff83441..e2fef3b 100644 --- a/tests/testthat/test-Start-two_dats.R +++ b/tests/testthat/test-Start-two_dats.R @@ -6,9 +6,11 @@ test_that("1. ex1_8, case 1", { path_tas <- paste0('/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/', 'dcppA-hindcast/$member$/Amon/$var$/gr/v20190713/', '$var$_Amon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc') +path_tas <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_tas) path_tos <- paste0('/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/', 'dcppA-hindcast/$member$/Omon/$var$/gr/v20190713/', '$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc') +path_tos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_tos) suppressWarnings( data <- Start(dataset = list(list(path = path_tas), list(path = path_tos)), var = c('tas', 'tos'), diff --git a/tests/testthat/test-Start-values_list_vector.R b/tests/testthat/test-Start-values_list_vector.R index 76c4f91..a84530f 100644 --- a/tests/testthat/test-Start-values_list_vector.R +++ b/tests/testthat/test-Start-values_list_vector.R @@ -10,6 +10,7 @@ context("List of values and vector of values") # To get lat and lon vectors library(easyNCDF) pathh <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +pathh <- paste0('/esarchive/scratch/aho/startR_unittest_files/', pathh) file <- NcOpen(pathh) lats <- NcToArray(file, dim_indices = list(latitude = 1:35), vars_to_read = 'latitude') @@ -18,11 +19,14 @@ lons <- NcToArray(file, NcClose(file) #------------------------------------------------------------------ +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + test_that("1. transform", { # lat and lon are lists of values suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -47,7 +51,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of values. This one is a weird usage though... suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -85,7 +89,7 @@ test_that("2. no transform", { # lat and lon are lists of indices suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -104,7 +108,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of indices suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -137,7 +141,7 @@ test_that("3. transform, vector reverse", { # lat and lon are lists of values suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -163,7 +167,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var #WRONG!!!!!!!!!! # lat and lon are vectors of values suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -201,7 +205,7 @@ test_that("4. no transform, vector reverse", { # lat and lon are lists of values suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -220,7 +224,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of values suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), -- GitLab From c896a633ce980ca53b33b262ae25906b60dc51a9 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 24 Aug 2023 11:53:03 +0200 Subject: [PATCH 27/32] Add note saying the data has been deleted. --- inst/doc/usecase/ex1_13_implicit_dependency.R | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/doc/usecase/ex1_13_implicit_dependency.R b/inst/doc/usecase/ex1_13_implicit_dependency.R index 6740a21..8f60413 100644 --- a/inst/doc/usecase/ex1_13_implicit_dependency.R +++ b/inst/doc/usecase/ex1_13_implicit_dependency.R @@ -55,6 +55,7 @@ summary(exp) #============================================================================= # Case 2: 'region' depends on 'sdate' +#NOTE: Exp "a35b" has been deleted. This example cannot be run now. path <- paste0('/esarchive/exp/ecearth/a35b/diags/DCPP/EC-Earth-Consortium/', 'EC-Earth3-HR/dcppA-hindcast/r1i1p1f1/Omon/$var$_mixed/gn/v20201107/', '$var$_Omon_EC-Earth3-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_$chunk$.nc') -- GitLab From 5b81e3738a3e603e26cb09a44bd422051671124e Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 30 Aug 2023 16:45:44 +0200 Subject: [PATCH 28/32] Change autosubmit 'THREADS' back to 'cores_per_job'; Modify documentation about ncores and num_procs --- R/ByChunks_autosubmit.R | 22 ++++++++++------------ R/Compute.R | 4 ++-- R/Start.R | 4 ++-- R/Utils.R | 6 +++--- inst/doc/practical_guide.md | 23 +++++++++++++++++------ man/Compute.Rd | 4 ++-- man/Start.Rd | 4 ++-- 7 files changed, 38 insertions(+), 29 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 133f467..5a43146 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -289,19 +289,17 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', stop("The component 'autosubmit_module' of the parameter 'cluster' must be a character string.") } ### cores_per_job -# if (is.null(cluster[['cores_per_job']])) { -# cluster[['cores_per_job']] <- threads_compute -# } -# if (!is.numeric(cluster[['cores_per_job']])) { -# stop("The component 'cores_per_job' of the parameter 'cluster' must be numeric.") -# } -# cluster[['cores_per_job']] <- round(cluster[['cores_per_job']]) + if (is.null(cluster[['cores_per_job']])) { + cluster[['cores_per_job']] <- threads_compute + } + if (!is.numeric(cluster[['cores_per_job']])) { + stop("The component 'cores_per_job' of the parameter 'cluster' must be numeric.") + } + cluster[['cores_per_job']] <- round(cluster[['cores_per_job']]) +# NOTE: Why do we have this condition? # if (cluster[['cores_per_job']] > threads_compute) { # .message("WARNING: 'threads_compute' should be >= cluster[['cores_per_job']].") # } - if (!is.null(cluster[['cores_per_job']])) { - .warning("The component 'cores_per_job' in cluster list is not used. Please specify the cores by parameter 'threads_compute'.") - } ### job_wallclock tmp <- strsplit( '01:00:00', ':')[[1]] if (!length(tmp) %in% c(2, 3) | any(!grepl("^[0-9]+$", tmp)) | any(nchar(tmp) != 2)) { @@ -533,7 +531,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', write_autosubmit_bash(chunks, cluster, autosubmit_suite_dir = autosubmit_suite_dir) # Modify conf files from template and rewrite to /esarchive/autosubmit/expid/conf/ - write_autosubmit_confs(chunks, threads_compute, cluster, autosubmit_suite_dir) + write_autosubmit_confs(chunks, cluster, autosubmit_suite_dir) # Iterate through chunks chunk_array <- array(1:prod(unlist(chunks)), dim = (unlist(chunks))) @@ -550,7 +548,7 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', } -# timings[['cores_per_job']] <- cluster[['cores_per_job']] + timings[['cores_per_job']] <- cluster[['cores_per_job']] timings[['concurrent_chunks']] <- cluster[['max_jobs']] t_end_bychunks_setup <- Sys.time() diff --git a/R/Compute.R b/R/Compute.R index 981f28b..7ab6549 100644 --- a/R/Compute.R +++ b/R/Compute.R @@ -19,9 +19,9 @@ #' those not required as the target dimension in function Step(). The default #' value is 'auto', which lists all the non-target dimensions and each one has #' one chunk. -#'@param threads_load An integer indicating the number of execution threads to +#'@param threads_load An integer indicating the number of execution processes to #' use for the data retrieval stage. The default value is 1. -#'@param threads_compute An integer indicating the number of execution threads +#'@param threads_compute An integer indicating the number of execution processes #' to use for the computation. The default value is 1. #'@param cluster A list of components that define the configuration of the #' machine to be run on. The comoponents vary from the different machines. diff --git a/R/Start.R b/R/Start.R index 92eb16d..b72a02b 100644 --- a/R/Start.R +++ b/R/Start.R @@ -704,8 +704,8 @@ #'@param num_procs An integer of number of processes to be created for the #' parallel execution of the retrieval/transformation/arrangement of the #' multiple involved files in a call to Start(). If set to NULL, -#' takes the number of available cores (as detected by detectCores() in -#' the package 'future'). The default value is 1 (no parallel execution). +#' takes the number of available cores (as detected by future::detectCores). +#' The default value is 1 (no parallel execution). #'@param ObjectBigmemory a character string to be included as part of the #' bigmemory object name. This parameter is thought to be used internally by the #' chunking capabilities of startR. diff --git a/R/Utils.R b/R/Utils.R index f496333..a20653e 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -928,7 +928,7 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { } # This function generates the .yml files under autosubmit conf/ -write_autosubmit_confs <- function(chunks, threads_compute, cluster, autosubmit_suite_dir) { +write_autosubmit_confs <- function(chunks, cluster, autosubmit_suite_dir) { #TODO: Remove this library(configr) # "chunks" is from Compute() input, e.g., chunks <- list(lat = 2, lon = 3) @@ -975,7 +975,7 @@ write_autosubmit_confs <- function(chunks, threads_compute, cluster, autosubmit_ # wallclock from '01:00:00' to '01:00' jobs[[1]]$WALLCLOCK <- substr(cluster$job_wallclock, 1, 5) jobs[[1]]$PLATFORM <- cluster$queue_host - jobs[[1]]$THREADS <- as.integer(threads_compute) + jobs[[1]]$THREADS <- as.integer(cluster$cores_per_job) jobs[[1]][paste0(names(chunks), "_N")] <- as.integer(unlist(chunks)) jobs[[1]][names(chunks)] <- "" @@ -995,7 +995,7 @@ write_autosubmit_confs <- function(chunks, threads_compute, cluster, autosubmit_ } else if (conf_type == "platforms") { if (tolower(cluster$queue_host) != "local") { conf$Platforms[[cluster$queue_host]]$USER <- cluster$hpc_user - conf$Platforms[[cluster$queue_host]]$PROCESSORS_PER_NODE <- as.integer(threads_compute) + conf$Platforms[[cluster$queue_host]]$PROCESSORS_PER_NODE <- as.integer(cores_per_job) if (!is.null(cluster$extra_queue_params)) { tmp <- unlist(cluster$extra_queue_params) for (ii in 1:length(tmp)) { diff --git a/inst/doc/practical_guide.md b/inst/doc/practical_guide.md index dead996..4f79ae0 100644 --- a/inst/doc/practical_guide.md +++ b/inst/doc/practical_guide.md @@ -371,8 +371,18 @@ It is not possible for now to define workflows with more than one step, but this ### 4-3. Compute() Once the data sources are declared and the workflow is defined, you can proceed to specify the execution parameters (including which platform to run on) and trigger the execution with the `Compute()` function. +The execution can run locally (only on the machine where the R session is running) or on different HPCs (Nord3-v2, CTE-Power9 and other HPCs). -Next, a few examples are shown with `Compute()` calls to trigger the processing of a dataset locally (only on the machine where the R session is running) and different HPCs (Nord3-v2, CTE-Power9 and other HPCs). However, let's first define a `Start()` call that involves a smaller subset of data in order not to make the examples too heavy. +The common Compute() parameters of local and remote execution are: +- `wf`: The workflow defined by the previous steps. +- `chunks`: The dimensions to be chunked and how many chunks you want for each dimension. +startR will automatically chunk the data for you. See more details in session [#5-1](#5-1-how-to-choose-the-number-of-chunks-jobs-and-cores). +- `threads_load`: The number of processes to be created for data retrieval stage. It is used as Start() parameter "num_procs" if it is not specified when Start() is defined. +- `threads_compute`: The number of processes to be created for data computing. It is used as multiApply::Apply parameter "ncores". + +Using more than 2 threads for the retrieval will usually be perjudicial, since two will already be able to make full use of the bandwidth between the workstation and the data repository. The optimal number of threads for the computation will depend on the number of processors in your machine, the number of cores they have, and the number of threads supported by each of them. + +In the following example, we first define a `Start()` call that involves a smaller subset of data in order not to make the examples too heavy. ```r library(startR) @@ -526,7 +536,7 @@ dim(res$output1) 2 1 1 1 640 1296 ``` -In addition to performing the computation in chunks, you can adjust the number of execution threads to use for the data retrieval stage (with `threads_load`) and for the computation (with `threads_compute`). Using more than 2 threads for the retrieval will usually be perjudicial, since two will already be able to make full use of the bandwidth between the workstation and the data repository. The optimal number of threads for the computation will depend on the number of processors in your machine, the number of cores they have, and the number of threads supported by each of them. +Run Compute() with desired chunks and resource setting. ```r res <- Compute(wf, @@ -729,17 +739,18 @@ You can see one example of cluster configuration below. The cluster components and options are explained next: -- `queue_host`: Must match the platform name in Autosubmit configuration file `platforms.yml`, or 'local'. The provided platforms are: 'nord3'. +- `queue_host`: Must match the platform name in Autosubmit configuration file _platforms.yml_, or 'local'. The current provided platforms are: 'nord3'. - `expid`: The Autosubmit experiment to run the computation. You can create the experiment beforehand or let startR create one for you by not specifying this componenet. To have the good practice, note down the expid if it is automatically created by startR and re-use/delete it afterwards. - `hpc_user`: Your user ID on the HPC (i.e., "bsc32xxx"). It is required if "queue_host" is not 'local'. - `data_dir`: The path to the data repository if the data is not shared. - `lib_dir`: directory on the HPC where the startR R package and other required R packages are installed, accessible from all HPC nodes. These installed packages must be compatible with the R module specified in `r_module`. This parameter is optional; only required when the libraries are not installed in the R module. - `init_commands`: The initial commands in bash script before R script runs. For example, the modules required by computation can be loaded here. -- `r_module`: name of the UNIX environment module to be used for R. If not specified, `module load R` will be used. -- `CDO_module`: name of the UNIX environment module to be used for CDO. If not specified, it is NULL and no CDO module will be loaded. Make sure to assign it if `tranform` is required in Start(). +- `r_module`: Name of the UNIX environment module to be used for R. If not specified, `module load R` will be used. +- `CDO_module`: Name of the UNIX environment module to be used for CDO. If not specified, it is NULL and no CDO module will be loaded. Make sure to assign it if `tranform` is required in Start(). - `autosubmit_module`: The name of the Autosubmit module. If not specified, `module load autosubmit` will be used. -- `cores_per_job`: number of computing cores to be requested when submitting the job for each chunk to the HPC queue. Each node may be capable of supporting more than one computing thread. +- `cores_per_job`: Number of computing cores to be requested when submitting the job for each chunk to the HPC queue. Each node may be capable of supporting more than one computing thread. +It is corresponded to the parameter "THREADS" in _jobs.yml_ and "PROCESSORS_PER_NODE" in _platforms.yml_. - `job_wallclock`: amount of time to reserve the resources when submitting the job for each chunk. Must follow the specific format required by the specified `queue_type`. - `max_jobs`: maximum number of jobs (chunks) to be queued simultaneously onto the HPC queue. Submitting too many jobs could overload the bandwidth between the HPC nodes and the storage system, or could overload the queue system. - `extra_queue_params`: list of character strings for additional queue headers for the jobs diff --git a/man/Compute.Rd b/man/Compute.Rd index f6ad867..f475860 100644 --- a/man/Compute.Rd +++ b/man/Compute.Rd @@ -33,10 +33,10 @@ one chunk.} \item{workflow_manager}{Can be NULL, 'ecFlow' or 'Autosubmit'. The default is 'ecFlow'.} -\item{threads_load}{An integer indicating the number of execution threads to +\item{threads_load}{An integer indicating the number of execution processes to use for the data retrieval stage. The default value is 1.} -\item{threads_compute}{An integer indicating the number of execution threads +\item{threads_compute}{An integer indicating the number of execution processes to use for the computation. The default value is 1.} \item{cluster}{A list of components that define the configuration of the diff --git a/man/Start.Rd b/man/Start.Rd index 3bdae42..7cdc9f8 100644 --- a/man/Start.Rd +++ b/man/Start.Rd @@ -684,8 +684,8 @@ value is FALSE.} \item{num_procs}{An integer of number of processes to be created for the parallel execution of the retrieval/transformation/arrangement of the multiple involved files in a call to Start(). If set to NULL, -takes the number of available cores (as detected by detectCores() in -the package 'future'). The default value is 1 (no parallel execution).} +takes the number of available cores (as detected by future::detectCores). +The default value is 1 (no parallel execution).} \item{ObjectBigmemory}{a character string to be included as part of the bigmemory object name. This parameter is thought to be used internally by the -- GitLab From 5bae396cbe4e045d15fe86a04af5b23f1de7317c Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 30 Aug 2023 17:10:06 +0200 Subject: [PATCH 29/32] Correct cores_per_job to cluster --- R/Utils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Utils.R b/R/Utils.R index a20653e..3fce2da 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -995,7 +995,7 @@ write_autosubmit_confs <- function(chunks, cluster, autosubmit_suite_dir) { } else if (conf_type == "platforms") { if (tolower(cluster$queue_host) != "local") { conf$Platforms[[cluster$queue_host]]$USER <- cluster$hpc_user - conf$Platforms[[cluster$queue_host]]$PROCESSORS_PER_NODE <- as.integer(cores_per_job) + conf$Platforms[[cluster$queue_host]]$PROCESSORS_PER_NODE <- as.integer(cluster$cores_per_job) if (!is.null(cluster$extra_queue_params)) { tmp <- unlist(cluster$extra_queue_params) for (ii in 1:length(tmp)) { -- GitLab From 263cddd3ea278849ae966881fd9593c76557b131 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 31 Aug 2023 11:26:21 +0200 Subject: [PATCH 30/32] Refine document about threads and cores_per_job --- R/Compute.R | 11 ++++++----- inst/doc/practical_guide.md | 8 ++++---- man/Compute.Rd | 11 ++++++----- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/R/Compute.R b/R/Compute.R index 7ab6549..321a0a1 100644 --- a/R/Compute.R +++ b/R/Compute.R @@ -19,16 +19,17 @@ #' those not required as the target dimension in function Step(). The default #' value is 'auto', which lists all the non-target dimensions and each one has #' one chunk. -#'@param threads_load An integer indicating the number of execution processes to -#' use for the data retrieval stage. The default value is 1. -#'@param threads_compute An integer indicating the number of execution processes -#' to use for the computation. The default value is 1. +#'@param threads_load An integer indicating the number of parallel execution +#' processes to use for the data retrieval stage. The default value is 1. +#'@param threads_compute An integer indicating the number of parallel execution +#' processes to use for the computation. The default value is 1. #'@param cluster A list of components that define the configuration of the #' machine to be run on. The comoponents vary from the different machines. #' Check \href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{Practical guide on GitLab} for more #' details and examples. Only needed when the computation is not run locally. #' The default value is NULL. -#'@param workflow_manager Can be NULL, 'ecFlow' or 'Autosubmit'. The default is 'ecFlow'. +#'@param workflow_manager Can be NULL, 'ecFlow' or 'Autosubmit'. The default is +#' 'ecFlow'. #'@param ecflow_suite_dir A character string indicating the path to a folder in #' the local workstation where to store temporary files generated for the #' automatic management of the workflow. Only needed when the execution is run diff --git a/inst/doc/practical_guide.md b/inst/doc/practical_guide.md index 4f79ae0..6bc4bef 100644 --- a/inst/doc/practical_guide.md +++ b/inst/doc/practical_guide.md @@ -377,8 +377,8 @@ The common Compute() parameters of local and remote execution are: - `wf`: The workflow defined by the previous steps. - `chunks`: The dimensions to be chunked and how many chunks you want for each dimension. startR will automatically chunk the data for you. See more details in session [#5-1](#5-1-how-to-choose-the-number-of-chunks-jobs-and-cores). -- `threads_load`: The number of processes to be created for data retrieval stage. It is used as Start() parameter "num_procs" if it is not specified when Start() is defined. -- `threads_compute`: The number of processes to be created for data computing. It is used as multiApply::Apply parameter "ncores". +- `threads_load`: The number of parallel execution processes to be created for data retrieval stage. It is used as Start() parameter "num_procs" if it is not specified when Start() is defined. +- `threads_compute`: The number of parallel execution processes to be created for data computing. It is used as multiApply::Apply parameter "ncores". Using more than 2 threads for the retrieval will usually be perjudicial, since two will already be able to make full use of the bandwidth between the workstation and the data repository. The optimal number of threads for the computation will depend on the number of processors in your machine, the number of cores they have, and the number of threads supported by each of them. @@ -621,7 +621,7 @@ The cluster components and options are explained next: - `temp_dir`: directory on the HPC where to store temporary files. Must be accessible from the HPC login node and all HPC nodes. - `lib_dir`: directory on the HPC where the startR R package and other required R packages are installed, accessible from all HPC nodes. These installed packages must be compatible with the R module specified in `r_module`. This parameter is optional; only required when the libraries are not installed in the R module. - `r_module`: name of the UNIX environment module to be used for R. If not specified, 'module load R' will be used. -- `cores_per_job`: number of computing cores to be requested when submitting the job for each chunk to the HPC queue. Each node may be capable of supporting more than one computing thread. +- `cores_per_job`: number of computing cores to be requested when submitting the job for each chunk to the HPC queue. Each node (should be core?) may be capable of supporting more than one computing thread. - `job_wallclock`: amount of time to reserve the resources when submitting the job for each chunk. Must follow the specific format required by the specified `queue_type`. - `max_jobs`: maximum number of jobs (chunks) to be queued simultaneously onto the HPC queue. Submitting too many jobs could overload the bandwidth between the HPC nodes and the storage system, or could overload the queue system. - `extra_queue_params`: list of character strings with additional queue headers for the jobs to be submitted to the HPC. Mainly used to specify the amount of memory to book for each job (e.g. '#SBATCH --mem-per-cpu=30000'; __NOTE: this line does not work on Nord3v2__), to request special queuing (e.g. '#SBATCH --qos=bsc_es'), or to request use of specific software (e.g. '#SBATCH --reservation=test-rhel-7.5'). @@ -749,7 +749,7 @@ To have the good practice, note down the expid if it is automatically created by - `r_module`: Name of the UNIX environment module to be used for R. If not specified, `module load R` will be used. - `CDO_module`: Name of the UNIX environment module to be used for CDO. If not specified, it is NULL and no CDO module will be loaded. Make sure to assign it if `tranform` is required in Start(). - `autosubmit_module`: The name of the Autosubmit module. If not specified, `module load autosubmit` will be used. -- `cores_per_job`: Number of computing cores to be requested when submitting the job for each chunk to the HPC queue. Each node may be capable of supporting more than one computing thread. +- `cores_per_job`: Number of computing cores to be requested when submitting the job for each chunk to the HPC queue. It is corresponded to the parameter "THREADS" in _jobs.yml_ and "PROCESSORS_PER_NODE" in _platforms.yml_. - `job_wallclock`: amount of time to reserve the resources when submitting the job for each chunk. Must follow the specific format required by the specified `queue_type`. - `max_jobs`: maximum number of jobs (chunks) to be queued simultaneously onto the HPC queue. Submitting too many jobs could overload the bandwidth between the HPC nodes and the storage system, or could overload the queue system. diff --git a/man/Compute.Rd b/man/Compute.Rd index f475860..270846a 100644 --- a/man/Compute.Rd +++ b/man/Compute.Rd @@ -31,13 +31,14 @@ those not required as the target dimension in function Step(). The default value is 'auto', which lists all the non-target dimensions and each one has one chunk.} -\item{workflow_manager}{Can be NULL, 'ecFlow' or 'Autosubmit'. The default is 'ecFlow'.} +\item{workflow_manager}{Can be NULL, 'ecFlow' or 'Autosubmit'. The default is +'ecFlow'.} -\item{threads_load}{An integer indicating the number of execution processes to -use for the data retrieval stage. The default value is 1.} +\item{threads_load}{An integer indicating the number of parallel execution +processes to use for the data retrieval stage. The default value is 1.} -\item{threads_compute}{An integer indicating the number of execution processes -to use for the computation. The default value is 1.} +\item{threads_compute}{An integer indicating the number of parallel execution +processes to use for the computation. The default value is 1.} \item{cluster}{A list of components that define the configuration of the machine to be run on. The comoponents vary from the different machines. -- GitLab From a30eb2e10e2b964936917834d3fc409cda3c33c3 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 31 Aug 2023 13:30:04 +0200 Subject: [PATCH 31/32] Remove TODOs --- R/ByChunks_autosubmit.R | 6 ++---- R/ByChunks_ecflow.R | 32 +++++++++++--------------------- R/Utils.R | 12 +++--------- 3 files changed, 16 insertions(+), 34 deletions(-) diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R index 5a43146..08414f9 100644 --- a/R/ByChunks_autosubmit.R +++ b/R/ByChunks_autosubmit.R @@ -467,10 +467,8 @@ ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', } # Copy load_process_save_chunk_autosubmit.R into local folder -#TODO: Change the following line to read from package - chunk_script <- file("/esarchive/scratch/aho/tmp/chunking/Autosubmit/load_process_save_chunk_autosubmit.R") -# chunk_script <- file(system.file('chunking/Autosubmit/load_process_save_chunk_autosubmit.R', -# package = 'startR')) + chunk_script <- file(system.file('chunking/Autosubmit/load_process_save_chunk_autosubmit.R', + package = 'startR')) chunk_script_lines <- readLines(chunk_script) close(chunk_script) chunk_script_lines <- gsub('^lib_dir <- *', paste0('lib_dir <- ', diff --git a/R/ByChunks_ecflow.R b/R/ByChunks_ecflow.R index cb5c95b..6292448 100644 --- a/R/ByChunks_ecflow.R +++ b/R/ByChunks_ecflow.R @@ -438,10 +438,8 @@ ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', "access is properly set in both directions.")) # Copy load_process_save_chunk_ecflow.R into shared folder - #TODO: Change to package file - chunk_script <- file("/esarchive/scratch/aho/tmp/chunking/ecFlow/load_process_save_chunk_ecflow.R") -# chunk_script <- file(system.file('chunking/ecFlow/load_process_save_chunk_ecflow.R', -# package = 'startR')) + chunk_script <- file(system.file('chunking/ecFlow/load_process_save_chunk_ecflow.R', + package = 'startR')) chunk_script_lines <- readLines(chunk_script) close(chunk_script) chunk_script_lines <- gsub('^lib_dir <- *', paste0('lib_dir <- ', @@ -494,10 +492,8 @@ ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', writeLines(chunk_script_lines, paste0(ecflow_suite_dir_suite, '/load_process_save_chunk_ecflow.R')) # Copy Chunk.ecf into shared folder - #TODO: Change to package file - chunk_ecf_script <- file("/esarchive/scratch/aho/tmp/chunking/ecFlow/Chunk.ecf") -# chunk_ecf_script <- file(system.file('chunking/ecFlow/Chunk.ecf', -# package = 'startR')) + chunk_ecf_script <- file(system.file('chunking/ecFlow/Chunk.ecf', + package = 'startR')) chunk_ecf_script_lines <- readLines(chunk_ecf_script) close(chunk_ecf_script) if (cluster[['queue_type']] == 'host') { @@ -562,9 +558,7 @@ ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', # Copy queue header into shared folder #file.copy(system.file(paste0('chunking/', cluster[['queue_type']], '.h'), package = 'startR'), # ecflow_suite_dir_suite) - #TODO: Change to package file - chunk_queue_header <- file(paste0("/esarchive/scratch/aho/tmp/chunking/ecFlow/", cluster[['queue_type']], '.h')) -# chunk_queue_header <- file(system.file(paste0('chunking/ecFlow/', cluster[['queue_type']], '.h'), package = 'startR')) + chunk_queue_header <- file(system.file(paste0('chunking/ecFlow/', cluster[['queue_type']], '.h'), package = 'startR')) chunk_queue_header_lines <- readLines(chunk_queue_header) close(chunk_queue_header) chunk_queue_header_lines <- gsub('^include_extra_queue_params', @@ -573,15 +567,10 @@ ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', writeLines(chunk_queue_header_lines, paste0(ecflow_suite_dir_suite, '/', cluster[['queue_type']], '.h')) # Copy headers - #TODO: Change to package file - file.copy(file("/esarchive/scratch/aho/tmp/chunking/ecFlow/head.h"), ecflow_suite_dir_suite) - file.copy(file("/esarchive/scratch/aho/tmp/chunking/ecFlow/tail.h"), ecflow_suite_dir_suite) -# file.copy(system.file('chunking/ecFlow/head.h', package = 'startR'), -# ecflow_suite_dir_suite) -# file.copy(system.file('chunking/ecFlow/tail.h', package = 'startR'), -# ecflow_suite_dir_suite) - #file.copy(system.file('chunking/clean_devshm.sh', package = 'startR'), - # ecflow_suite_dir_suite) + file.copy(system.file('chunking/ecFlow/head.h', package = 'startR'), + ecflow_suite_dir_suite) + file.copy(system.file('chunking/ecFlow/tail.h', package = 'startR'), + ecflow_suite_dir_suite) } add_line <- function(suite, line, tabs) { @@ -900,7 +889,8 @@ ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', ecflow_server[['port']])) timings[['total']] <- t_begin_total - startr_exec <- list(cluster = cluster, ecflow_server = ecflow_server, + startr_exec <- list(cluster = cluster, ecflow_server = ecflow_server, + workflow_manager = 'ecFlow', suite_id = suite_id, chunks = chunks, num_outputs = length(arrays_of_results), ecflow_suite_dir = ecflow_suite_dir, diff --git a/R/Utils.R b/R/Utils.R index 3fce2da..940e2d3 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -885,10 +885,8 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { chunk_args[2, ] <- paste0('%JOBS.CHUNK_', n_chunk, '.', chunk_names, '_N%') chunk_args <- paste0('(', paste(c(chunk_args), collapse = ' '), ')') - #TODO: Change to the following line getting .sh template from package - # bash_script <- file(system.file('chunking/Autosubmit/startR_autosubmit.sh', - # package = 'startR')) - bash_script_template <- file("/esarchive/scratch/aho/tmp/chunking/Autosubmit/startR_autosubmit.sh") + bash_script_template <- file(system.file('chunking/Autosubmit/startR_autosubmit.sh', + package = 'startR')) bash_script_lines <- readLines(bash_script_template) close(bash_script_template) @@ -929,16 +927,12 @@ write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { # This function generates the .yml files under autosubmit conf/ write_autosubmit_confs <- function(chunks, cluster, autosubmit_suite_dir) { - #TODO: Remove this - library(configr) # "chunks" is from Compute() input, e.g., chunks <- list(lat = 2, lon = 3) # "cluster" is the argument "cluster" in Compute(), to set machine configuration # "autosubmit_suite_dir" should be the local folder that has R script, like ecflow_suite_dir in Compute() # Get config template files from package - #TODO: Change to package path -# template_dir <- system.file('chunking/Autosubmit/', package = 'startR') - template_dir <- "/esarchive/scratch/aho/tmp/chunking/Autosubmit/" + template_dir <- system.file('chunking/Autosubmit/', package = 'startR') config_files <- list.files(template_dir, pattern = "*\\.yml$") for (i_file in config_files) { -- GitLab From b3baa4a1431580ccb99ea8a6c4fc80c96cbc2897 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 31 Aug 2023 13:30:44 +0200 Subject: [PATCH 32/32] Add explanation about ecFlow port; ignore .gitlab/ when building R package --- .Rbuildignore | 2 +- inst/doc/practical_guide.md | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 2ef8ba9..d988cd4 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -11,7 +11,7 @@ ## unit tests should be ignored when building the package for CRAN ^tests$ ^inst/PlotProfiling\.R$ - +^.gitlab$ # Suggested by http://r-pkgs.had.co.nz/package.html ^.*\.Rproj$ # Automatically added by RStudio, ^\.Rproj\.user$ # used for temporary files. diff --git a/inst/doc/practical_guide.md b/inst/doc/practical_guide.md index 6bc4bef..7038ad7 100644 --- a/inst/doc/practical_guide.md +++ b/inst/doc/practical_guide.md @@ -22,6 +22,7 @@ If you would like to start using startR rightaway on the BSC infrastructure, you 2. [**How to clean a failed execution**](#5-2-how-to-clean-a-failed-execution) 3. [**Visualizing the profiling of the execution**](#5-3-visualizing-the-profiling-of-the-execution) 4. [**Pending features**](#5-4-pending-features) + 5. [**ecFlow server and port**](#5-5-ecflow-server-and-port) 6. [**Other examples**](#6-other-examples) 7. [**Compute() cluster templates**](#7-compute-cluster-templates) @@ -794,9 +795,14 @@ module load ecFlow ecflow_ui & ``` -After doing that, a window will pop up. You will be able to monitor the status of your EC-Flow suites there. However, if it is the first time you are using the EC-Flow GUI with startR, you will need to register the EC-Flow server that has been started automatically by `Compute()`. You can open the top menu "Manage servers" > "New server" > set host to 'localhost' > set port to '5678' > save. +After doing that, a window will pop up. You will be able to monitor the status of your EC-Flow suites there. +However, if it is the first time you are using the EC-Flow GUI with startR, +you will need to register the EC-Flow server that has been started automatically by `Compute()`. +You can open the top menu "Manage servers" > "Add server" > Put a recognizable 'Name' for host > set 'Host' to your workstation (i.e., bscearthxxx) or 'localhost' > set 'Port' to '5678' > save. +See more information about ecFlow server in [#5-5](#5-5-ecflow-server-and-port). + +Note that the host and port can be adjusted with the parameter `ecflow_server` in `Compute()`, which must be provided in the form `c(host = 'hostname', port = port_number)`. _(NOTE: 'host' is not supported for now. You can use `ecflow_server = c(port = xxxx)` to change port number.)_ -Note that the host and port can be adjusted with the parameter `ecflow_server` in `Compute()`, which must be provided in the form `c(host = 'hostname', port = port_number)`. After registering the EC-Flow server, an expandable entry will appear, where you can see listed the jobs to be executed, one for each chunk, with their status represented by a colour. Gray means pending, blue means queuing, green means in progress, and yellow means completed. @@ -916,6 +922,15 @@ You can click on the image to expand it. - Adding feature in `Start()` to read sparse grid points. - Allow for chunking along "essential" (a.k.a. "target") dimensions. +### 5-5. ecFlow server and port + +You cannot start two ecFlow servers on the same machine with the same port number. +That is, the port number on one workstation cannot be shared. For example, if port '5678' on workstation 'bscearth123' is taken by user A, +user B cannot ssh to 'bscearth123' and use the port number '5678'. But user B can use a new port number and specify 'ecflow_server' in Compute(). +Or, if user B uses another workstation that has port number '5678' available, s/he can use it without problem. + +You can check the host-port you have in `~/.ecflowrc/servers`. To stop using a server, you can go to ecFlow UI, right click the server > halt > checkpoint > terminate. + ## 6. Other examples You can find more use cases in [usecase.md](inst/doc/usecase.md). -- GitLab