diff --git a/.Rbuildignore b/.Rbuildignore index 2ef8ba9063900318c7c0be04cc2ac48a636842e4..d988cd48d7cf9b292349f0624d378fbefcf9e4df 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -11,7 +11,7 @@ ## unit tests should be ignored when building the package for CRAN ^tests$ ^inst/PlotProfiling\.R$ - +^.gitlab$ # Suggested by http://r-pkgs.had.co.nz/package.html ^.*\.Rproj$ # Automatically added by RStudio, ^\.Rproj\.user$ # used for temporary files. diff --git a/DESCRIPTION b/DESCRIPTION index 2d569fe9c1883931c7784607620b2181a04e6f5a..173bacaaa482db6982262cdb135768d145de976b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -35,7 +35,8 @@ Imports: Suggests: stats, utils, - testthat + testthat, + yaml License: GPL-3 URL: https://earth.bsc.es/gitlab/es/startR/ BugReports: https://earth.bsc.es/gitlab/es/startR/-/issues diff --git a/R/ByChunks_autosubmit.R b/R/ByChunks_autosubmit.R new file mode 100644 index 0000000000000000000000000000000000000000..08414f9dc91bcc0583bbedbe4ccd9862534308de --- /dev/null +++ b/R/ByChunks_autosubmit.R @@ -0,0 +1,662 @@ +#'Execute the operation by chunks +#' +#'This is an internal function used in Compute(), executing the operation by +#'the chunks specified in Compute(). It also returns the configuration details +#'and profiling information. It is used when the workflow manager is +#'Autosubmit. +#' +#'@param step_fun A function with the class 'startR_step_fun' containing the +#' details of operation. +#'@param cube_headers A list with the class 'startR_cube' returned by Start(). +#' It contains the details of data to be operated. +#'@param \dots Additional parameters for the inputs of 'step_fun'. +#'@param chunks A named list of dimensions which to split the data along and +#' the number of chunks to make for each. The chunked dimension can only be +#' those not required as the target dimension in function Step(). The default +#' value is 'auto', which lists all the non-target dimensions and each one has +#' one chunk. +#'@param threads_load An integer indicating the number of execution threads to +#' use for the data retrieval stage. The default value is 1. +#'@param threads_compute An integer indicating the number of execution threads +#' to use for the computation. The default value is 1. +#'@param cluster A list of components that define the configuration of the +#' machine to be run on. The comoponents vary from different machines. Check +#' \href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{practical guide} +#' for more details and examples. +#'@param autosubmit_suite_dir A character string indicating the path to a folder +#' where to store temporary files generated for the automatic management of the +#' workflow manager. This path should be available in local workstation as well +#' as autosubmit machine. The default value is NULL, and a temporary folder +#' will be created. +#'@param autosubmit_server A character vector indicating the login node of the +#' autosubmit machine. It can be "bscesautosubmit01" or "bscesautosubmit02". +#' The default value is NULL, and the node will be randomly chosen. +#'@param silent A logical value deciding whether to print the computation +#' progress (FALSE) on the R session or not (TRUE). It only works when the +#' execution runs locally or the parameter 'wait' is TRUE. The default value +#' is FALSE. +#'@param debug A logical value deciding whether to return detailed messages on +#' the progress and operations in a Compute() call (TRUE) or not (FALSE). +#' Automatically changed to FALSE if parameter 'silent' is TRUE. The default +#' value is FALSE. +#'@param wait A logical value deciding whether the R session waits for the +#' Compute() call to finish (TRUE) or not (FALSE). If FALSE, it will return an +#' object with all the information of the startR execution that can be stored +#' in your disk. After that, the R session can be closed and the results can +#' be collected later with the Collect() function. The default value is TRUE. +#' +#'@return A list of data arrays for the output returned by the last step in the +#' specified workflow. The configuration details and profiling information are +#' attached as attributes to the returned list of arrays. +#' +#'@examples +#' # ByChunks_autosubmit() is internally used in Compute(), not intended to be +#' # used by users. The example just illustrates the inputs of +#' # ByChunks_autosubmit(). +#' # data_path <- system.file('extdata', package = 'startR') +#' # path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') +#' # sdates <- c('200011', '200012') +#' # data <- Start(dat = list(list(path = path_obs)), +#' # var = 'tos', +#' # sdate = sdates, +#' # time = 'all', +#' # latitude = 'all', +#' # longitude = 'all', +#' # return_vars = list(latitude = 'dat', +#' # longitude = 'dat', +#' # time = 'sdate'), +#' # retrieve = FALSE) +#' # fun <- function(x) { +#' # lat = attributes(x)$Variables$dat1$latitude +#' # weight = sqrt(cos(lat * pi / 180)) +#' # corrected = Apply(list(x), target_dims = "latitude", +#' # fun = function(x) {x * weight}) +#' # } +#' # step <- Step(fun = fun, +#' # target_dims = 'latitude', +#' # output_dims = 'latitude', +#' # use_libraries = c('multiApply'), +#' # use_attributes = list(data = "Variables")) +#' #ByChunks_autosubmit(step, data) +#' +#'@import multiApply +#'@importFrom methods is +#'@noRd +ByChunks_autosubmit <- function(step_fun, cube_headers, ..., chunks = 'auto', + threads_load = 1, threads_compute = 1, + cluster = NULL, + autosubmit_suite_dir = NULL, autosubmit_server = NULL, + silent = FALSE, debug = FALSE, wait = TRUE) { + + #NOTE: + #autosubmit_suite_dir: /home/Earth/aho/startR_local_autosubmit/ + #autosubmit_suite_dir_suite: /home/Earth/aho/startR_local_autosubmit/STARTR_CHUNKING_a68h/ + #remote_autosubmit_suite_dir: /esarchive/autosubmit/a68h/proj/ + #remote_autosubmit_suite_dir_suite: /esarchive/autosubmit/a68h/proj/STARTR_CHUNKING_a68h/ + + # Build object to store profiling timings + t_begin_total <- Sys.time() + t_begin_bychunks_setup <- t_begin_total + timings <- list(nchunks = NULL, + concurrent_chunks = NULL, + cores_per_job = NULL, + threads_load = NULL, + threads_compute = NULL, + bychunks_setup = NULL, + transfer = NULL, + queue = NULL, + job_setup = NULL, + load = NULL, + compute = NULL, + transfer_back = NULL, + merge = NULL, + total = NULL) + + MergeArrays <- .MergeArrays + + # Sanity checks + ## step_fun + if (!is(step_fun, 'startR_step_fun')) { + stop("Parameter 'step_fun' must be of the class 'startR_step_fun', as returned ", + "by the function Step.") + } + + ## cube_headers + if (is(cube_headers, 'startR_cube')) { + cube_headers <- list(cube_headers) + } + if (!all(sapply(lapply(cube_headers, class), + function(x) 'startR_cube' %in% x))) { + stop("All objects passed in 'cube_headers' must be of class 'startR_cube', ", + "as returned by Start().") + } + if (length(cube_headers) != length(attr(step_fun, 'TargetDims'))) { + stop("Number of inputs in parameter 'cube_headers' must be equal to the ", + "number of inputs expected by the function 'step_fun'.") + } + + ## threads_load and threads_compute + if (!is.numeric(threads_load)) { + stop("Parameter 'threads_load' must be a numeric value.") + } + threads_load <- round(threads_load) + if (!is.numeric(threads_compute)) { + stop("Parameter 'threads_compute' must be a numeric value.") + } + threads_compute <- round(threads_compute) + timings[['threads_load']] <- threads_load + timings[['threads_compute']] <- threads_compute + + ## autosubmit_suite_dir + if (is.null(autosubmit_suite_dir)) { + # Create a tmp folder as autosubmit_suite_dir + autosubmit_suite_dir <- file.path(getwd(), "startR_autosubmit_temp") + if (!dir.exists(autosubmit_suite_dir)) { + dir.create("startR_autosubmit_temp", recursive = FALSE) + } + .warning(paste0("Parameter 'autosubmit_suite_dir' is not specified. Create a temporary ", + "folder under current directory: ", autosubmit_suite_dir, "/. Make sure ", + "that Autosubmit machine can find this path.")) + } + if (!is.character(autosubmit_suite_dir)) { + stop("Parameter 'autosubmit_suite_dir' must be a character string.") + } + + ## autosubmit_server + if (!is.null(autosubmit_server)) { + if (!autosubmit_server %in% c('bscesautosubmit01', 'bscesautosubmit02')) { + stop("Parameter 'autosubmit_server' must be one existing Autosubmit machine login node, 'bscesautosubmit01' or 'bscesautosubmit02'.") + } + } else { + autosubmit_server <- paste0('bscesautosubmit0', sample(1:2, 1)) + } + + ## silent + if (!is.logical(silent)) { + stop("Parameter 'silent' must be logical.") + } + + ## debug + if (!is.logical(debug)) { + stop("Parameter 'debug' must be logical.") + } + if (silent) { + debug <- FALSE + } + + ## wait + if (!is.logical(wait)) { + stop("Parameter 'wait' must be logical.") + } + + ## cluster + default_cluster <- list(queue_host = NULL, +# queue_type = 'slurm', + data_dir = NULL, +# temp_dir = NULL, + lib_dir = NULL, + init_commands = list(''), + r_module = 'R', + CDO_module = NULL, + autosubmit_module = 'autosubmit', + node_memory = NULL, # not used + cores_per_job = NULL, + job_wallclock = '01:00:00', + max_jobs = 6, + extra_queue_params = list(''), +# bidirectional = TRUE, + polling_period = 10, + special_setup = 'none', + expid = NULL, + hpc_user = NULL, + run_dir = NULL) + if (!is.list(cluster) || is.null(names(cluster))) { + stop("Parameter 'cluster' must be a named list.") + } + if (any(!(names(cluster) %in% c('queue_host', 'queue_type', 'data_dir', + 'temp_dir', 'lib_dir', 'init_commands', + 'r_module', 'CDO_module', 'autosubmit_module', + 'ecflow_module', 'node_memory', + 'cores_per_job', 'job_wallclock', 'max_jobs', + 'extra_queue_params', 'bidirectional', + 'polling_period', 'special_setup', 'expid', 'hpc_user', + 'run_dir' + )))) { + stop("Found invalid component names in parameter 'cluster'.") + } + # Remove ecFlow components + redundant_components <- c('queue_type', 'temp_dir', 'ecflow_module', 'bidirectional') + if (any(redundant_components %in% names(cluster))) { + tmp <- redundant_components[which(redundant_components %in% names(cluster))] + .warning(paste0("Cluster component ", paste(tmp, collapse = ','), + " not used when Autosubmit is the workflow manager.")) + cluster[[tmp]] <- NULL + } + default_cluster[names(cluster)] <- cluster + cluster <- default_cluster + + ### queue_host + support_hpcs <- c('local', 'nord3') # names in platforms.yml + if (is.null(cluster$queue_host) || !cluster$queue_host %in% support_hpcs) { + stop("Cluster component 'queue_host' must be one of the follows: ", + paste(support_hpcs, collapse = ','), '.') + } + + ### data_dir + is_data_dir_shared <- FALSE + if (is.null(cluster[['data_dir']])) { + is_data_dir_shared <- TRUE + } else { + if (!is.character(cluster[['data_dir']])) { + stop("The component 'data_dir' of the parameter 'cluster' must be a character string.") + } + remote_data_dir <- cluster[['data_dir']] + } + ### lib_dir + if (!is.null(cluster[['lib_dir']])) { + if (!is.character(cluster[['lib_dir']])) { + stop("The component 'lib_dir', of the parameter 'cluster' must be NULL or ", + "a character string.") + } + } + ### init_commands + if (!is.list(cluster[['init_commands']]) || + !all(sapply(cluster[['init_commands']], is.character))) { + stop("The component 'init_commands' of the parameter 'cluster' must be a list of ", + "character strings.") + } + ### r_module + if (!is.character(cluster[['r_module']])) { + stop("The component 'r_module' of the parameter 'cluster' must be a character string.") + } + if ((nchar(cluster[['r_module']]) < 1) || (grepl(' ', cluster[['r_module']]))) { + stop("The component 'r_module' of the parameter 'cluster' must have at least one character ", + "and contain no blank spaces.") + } + ### CDO_module + if (!is.null(cluster[['CDO_module']])) { + if (!is.character(cluster[['CDO_module']])) { + stop("The component 'CDO_module' of the parameter 'cluster' must be a character string.") + } + if (nchar(cluster[['CDO_module']]) < 1 || grepl(' ', cluster[['CDO_module']])) { + warning("The component 'CDO_module' of parameter 'cluster' must have ", + " than 1 and only the first element will be used.") + } + cluster[['r_module']] <- paste(cluster[['r_module']], cluster[['CDO_module']]) + } + ### autosubmit_module + if (!is.character(cluster[['autosubmit_module']])) { + stop("The component 'autosubmit_module' of the parameter 'cluster' must be a character string.") + } + ### cores_per_job + if (is.null(cluster[['cores_per_job']])) { + cluster[['cores_per_job']] <- threads_compute + } + if (!is.numeric(cluster[['cores_per_job']])) { + stop("The component 'cores_per_job' of the parameter 'cluster' must be numeric.") + } + cluster[['cores_per_job']] <- round(cluster[['cores_per_job']]) +# NOTE: Why do we have this condition? +# if (cluster[['cores_per_job']] > threads_compute) { +# .message("WARNING: 'threads_compute' should be >= cluster[['cores_per_job']].") +# } + ### job_wallclock + tmp <- strsplit( '01:00:00', ':')[[1]] + if (!length(tmp) %in% c(2, 3) | any(!grepl("^[0-9]+$", tmp)) | any(nchar(tmp) != 2)) { + stop("The compoment 'job_wallclock' should be the format of HH:MM or HH:MM:SS.") + } + ### max_jobs + if (!is.numeric(cluster[['max_jobs']])) { + stop("The component 'max_jobs' of the parameter 'cluster' must be numeric.") + } + cluster[['max_jobs']] <- round(cluster[['max_jobs']]) + ### extra_queue_params + if (!is.list(cluster[['extra_queue_params']]) || + !all(sapply(cluster[['extra_queue_params']], is.character))) { + stop("The component 'extra_queue_params' of the parameter 'cluster' must be a list of ", + "character strings.") + } + ### polling_period + if (!is.numeric(cluster[['polling_period']])) { + stop("The component 'polling_period' of the parameter 'cluster' must be numeric.") + } + cluster[['polling_period']] <- round(cluster[['polling_period']]) + ### special_setup + if (!(cluster[['special_setup']] %in% c('none', 'marenostrum4'))) { + stop("The value provided for the component 'special_setup' of the parameter ", + "'cluster' is not recognized.") + } + ### expid + as_module <- cluster[['autosubmit_module']] + if (is.null(cluster[['expid']])) { + text <- system( + paste0("module load ", as_module, "; ", + "autosubmit expid -H local -d 'startR computation'"), + intern = T) + cluster[['expid']] <- strsplit( + text[grep("The new experiment", text)], + "\"")[[1]][2] + message(paste0("ATTENTION: The new experiment '", cluster[['expid']], + "' is created. Please note it down.")) + } else { + if (!is.character(cluster[['expid']]) | length(cluster[['expid']]) != 1) { + stop("The component 'expid' of the parameter 'cluster' must be a character string.") + } + if (!dir.exists(file.path("/esarchive/autosubmit", cluster[['expid']]))) { + stop("Cluster component 'expid' is not found under /esarchive/autosubmit/.") + } + } + suite_id <- cluster[['expid']] + + ### hpc_user + if (!is.null(cluster$hpc_user) && (!is.character(cluster$hpc_user) | length(cluster$hpc_user) != 1)) { + stop("Cluster component 'hpc_user' must be a character string.") + } + ### run_dir + if (!is.null(cluster$run_dir)) { + if (!dir.exists(cluster$run_dir)) { + stop("Cluster component 'run_dir' ", cluster$run_dir," is not found.") + } + } + +#============================================== + + autosubmit_suite_dir_suite <- paste0(autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') + if (!dir.exists(autosubmit_suite_dir_suite)) { + dir.create(autosubmit_suite_dir_suite, recursive = TRUE) + } + if (!dir.exists(autosubmit_suite_dir_suite)) { + stop("Could not find or create the directory in parameter 'autosubmit_suite_dir'.") + } + + remote_autosubmit_suite_dir <- file.path("/esarchive/autosubmit/", suite_id, 'proj') + remote_autosubmit_suite_dir_suite <- paste0(remote_autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') + + + # Work out chunked dimensions and target dimensions + all_dims <- lapply(cube_headers, attr, 'Dimensions') + all_dims_merged <- NULL + for (i in all_dims) { + if (is.null(all_dims_merged)) { + all_dims_merged <- i + } else { + all_dims_merged <- .MergeArrayDims(all_dims_merged, i)[[3]] + } + } + all_dimnames <- names(all_dims_merged) + + target_dims_indices <- which(all_dimnames %in% unlist(attr(step_fun, 'TargetDims'))) + target_dims <- NULL + if (length(target_dims_indices) > 0) { + target_dims <- all_dimnames[target_dims_indices] + } + + chunked_dims <- all_dimnames + if (length(target_dims_indices) > 0) { + chunked_dims <- chunked_dims[-target_dims_indices] + } + if (length(chunked_dims) < 1) { + stop("Not possible to process input by chunks. All input dimensions are ", + "target dimensions.") + } + + # Check all input headers have matching dimensions + cube_index <- 1 + for (cube_header in cube_headers) { + + # Check if all the margin dims are consistent among datasets + if (!all(chunked_dims %in% names(attr(cube_header, "Dimensions")))) { + trouble_dim_name <- chunked_dims[which(!chunked_dims %in% + names(attr(cube_header, "Dimensions")))] + stop(paste0("Found margin dimension, ", toString(trouble_dim_name), + ", is not in input data ", cube_index, ".")) + } + + # Only check margin dimensions (i.e., chunked_dims) + if (!all(attr(cube_header, 'Dimensions')[chunked_dims] == all_dims_merged[names(attr(cube_header, 'Dimensions'))][chunked_dims])) { + stop("All provided 'cube_headers' must have matching dimension lengths ", + "with each other.") + } + if (!all(attr(step_fun, 'TargetDims')[[cube_index]] %in% names(attr(cube_header, 'Dimensions')))) { + stop("All provided 'cube_headers' must contain at least the target dimensions ", + "expected by 'step_fun'.") + } + cube_index <- cube_index + 1 + # work out expected result dimensions + } + + # Check chunks + default_chunks <- as.list(rep(1, length(chunked_dims))) + names(default_chunks) <- chunked_dims + if (length(chunks) == 1 && chunks == 'auto') { + chunks <- default_chunks + } + if (!is.list(chunks)) { + stop("Parameter 'chunks' must be a named list or 'auto'.") + } + if (is.null(names(chunks))) { + stop("Parameter 'chunks' must be a named list or 'auto'.") + } + if (any(!(names(chunks) %in% chunked_dims))) { + stop("All names in parameter 'chunks' must be one of the non-target dimensions ", + "present in the cubes in 'cube_headers'. The target dimensions are ", + paste(paste0("'", target_dims, "'"), collapse = ', '), ". The non-target ", + "dimensions (margins) are ", paste(paste0("'", chunked_dims, "'"), collapse = ', '), ".") + } + if (any(!(((unlist(chunks) %% 1) == 0) | (unlist(chunks) == 'all')))) { + stop("All values in parameter 'chunks' must take a numeric value or 'all'.") + } + if (any(unlist(chunks) < 1)) { + stop("All values in parameter 'chunks' must be >= 1.") + } + for (chunk_spec in 1:length(chunks)) { + if (chunks[[chunk_spec]] > all_dims_merged[names(chunks)[chunk_spec]]) { + stop("Too many chunks requested for the dimension ", names(chunks)[chunk_spec], + ". Maximum allowed is ", all_dims_merged[names(chunks)[chunk_spec]]) + } + } + default_chunks[names(chunks)] <- chunks + #NOTE: chunks here has all the margin dims, not only the chunked ones + chunks <- default_chunks + timings[['nchunks']] <- prod(unlist(chunks)) + + # Replace 'all's + chunks_all <- which(unlist(chunks) == 'all') + if (length(chunks_all) > 0) { + chunks[chunks_all] <- all_dims[names(chunks)[chunks_all]] + } + + # Copy load_process_save_chunk_autosubmit.R into local folder + chunk_script <- file(system.file('chunking/Autosubmit/load_process_save_chunk_autosubmit.R', + package = 'startR')) + chunk_script_lines <- readLines(chunk_script) + close(chunk_script) + chunk_script_lines <- gsub('^lib_dir <- *', paste0('lib_dir <- ', + paste(deparse(cluster[['lib_dir']]), collapse = '\n')), + chunk_script_lines) + #TODO: Change out_dir to somewhere else like expid/outputs/ + chunk_script_lines <- gsub('^out_dir <- *', paste0('out_dir <- ', + paste(deparse(remote_autosubmit_suite_dir_suite), collapse = '\n')), chunk_script_lines) + chunk_script_lines <- gsub('^debug <- *', paste0('debug <- ', paste(deparse(debug), collapse = '\n')), + chunk_script_lines) + deparsed_calls <- paste0('start_calls <- list(') + extra_path <- '' + if (cluster[['special_setup']] == 'marenostrum4') { + extra_path <- '/gpfs/archive/bsc32/' + } + for (cube_header in 1:length(cube_headers)) { + pattern_dim <- attr(cube_headers[[cube_header]], 'PatternDim') + bk_pattern_dim <- cube_headers[[cube_header]][[pattern_dim]] + bk_expected_files <- attr(cube_headers[[cube_header]], 'ExpectedFiles') + if (!is_data_dir_shared) { + cube_headers[[cube_header]][[pattern_dim]] <- paste0(remote_data_dir, '/', + extra_path, '/', cube_headers[[cube_header]][[pattern_dim]]) + for (file_n in 1:length(bk_expected_files)) { + attr(cube_headers[[cube_header]], 'ExpectedFiles')[file_n] <- paste0(remote_data_dir, '/', + extra_path, '/', attr(cube_headers[[cube_header]], 'ExpectedFiles')[file_n]) + } + } + deparsed_calls <- paste0(deparsed_calls, '\nquote(', + paste(deparse(cube_headers[[cube_header]]), collapse = '\n'), + ')') + cube_headers[[cube_header]][[pattern_dim]] <- bk_pattern_dim + attr(cube_headers[[cube_header]], 'ExpectedFiles') <- bk_expected_files + if (cube_header < length(cube_headers)) { + deparsed_calls <- paste0(deparsed_calls, ', ') + } + } + deparsed_calls <- paste0(deparsed_calls, '\n)') + chunk_script_lines <- gsub('^start_calls <- *', deparsed_calls, chunk_script_lines) + chunk_script_lines <- gsub('^start_calls_attrs <- *', + paste0('start_calls_attrs <- ', + paste(deparse(lapply(cube_headers, attributes)), collapse = '\n')), + chunk_script_lines) + chunk_script_lines <- gsub('^param_dimnames <- *', + paste0('param_dimnames <- ', + paste(deparse(chunked_dims), collapse = '\n')), + chunk_script_lines) + chunk_script_lines <- gsub('^threads_load <- *', paste0('threads_load <- ', threads_load), + chunk_script_lines) + chunk_script_lines <- gsub('^threads_compute <- *', paste0('threads_compute <- ', threads_compute), + chunk_script_lines) + chunk_script_lines <- gsub('^fun <- *', paste0('fun <- ', paste(deparse(step_fun), collapse = '\n')), + chunk_script_lines) + chunk_script_lines <- gsub('^params <- *', paste0('params <- ', paste(deparse(list(...)), collapse = '\n')), + chunk_script_lines) + writeLines(chunk_script_lines, paste0(autosubmit_suite_dir_suite, '/load_process_save_chunk_autosubmit.R')) + + # Write and copy startR_autosubmit.sh into local folder + write_autosubmit_bash(chunks, cluster, autosubmit_suite_dir = autosubmit_suite_dir) + + # Modify conf files from template and rewrite to /esarchive/autosubmit/expid/conf/ + write_autosubmit_confs(chunks, cluster, autosubmit_suite_dir) + + # Iterate through chunks + chunk_array <- array(1:prod(unlist(chunks)), dim = (unlist(chunks))) + arrays_of_results <- vector('list', length(attr(step_fun, 'OutputDims'))) + names(arrays_of_results) <- names(attr(step_fun, 'OutputDims')) + for (component in 1:length(arrays_of_results)) { + arrays_of_results[[component]] <- vector('list', prod((unlist(chunks)))) + dim(arrays_of_results[[component]]) <- (unlist(chunks)) + } + found_first_result <- FALSE + for (i in 1:length(chunk_array)) { + chunk_indices <- which(chunk_array == i, arr.ind = TRUE)[1, ] + names(chunk_indices) <- names(dim(chunk_array)) + } + + + timings[['cores_per_job']] <- cluster[['cores_per_job']] + timings[['concurrent_chunks']] <- cluster[['max_jobs']] + + t_end_bychunks_setup <- Sys.time() + timings[['bychunks_setup']] <- as.numeric(difftime(t_end_bychunks_setup, + t_begin_bychunks_setup, units = 'secs')) + if (!is_data_dir_shared) { + #NOTE: Not consider this part yet + t_begin_transfer <- Sys.time() + .message("Sending involved files to the cluster file system...") + files_to_send <- NULL + #files_to_check <- NULL + for (cube_header in 1:length(cube_headers)) { + expected_files <- attr(cube_headers[[cube_header]], 'ExpectedFiles') + #files_to_check <- c(files_to_check, expected_files) + #if (cluster[['special_setup']] == 'marenostrum4') { + # expected_files <- paste0('/gpfs/archive/bsc32/', expected_files) + #} + files_to_send <- c(files_to_send, expected_files) + } + #which_files_exist <- sapply(files_to_check, file.exists) + which_files_exist <- sapply(files_to_send, file.exists) + files_to_send <- files_to_send[which_files_exist] + if (cluster[['special_setup']] == 'marenostrum4') { + file_spec <- paste(paste0("/gpfs/archive/bsc32/", + files_to_send), collapse = ' ') + system(paste0("ssh ", cluster[['queue_host']], " 'mkdir -p ", remote_data_dir, + ' ; module load transfer ; cd ', remote_autosubmit_suite_dir_suite, + ' ; dtrsync -Rrav ', '\'', file_spec, '\' "', remote_data_dir, '/"', + " ; sleep 1 ; ", + "while [[ ! $(ls dtrsync_*.out 2>/dev/null | wc -l) -ge 1 ]] ; ", + "do sleep 2 ; done", + " ; sleep 1 ; ", + 'while [[ ! $(grep "total size is" dtrsync_*.out | ', + "wc -l) -ge 1 ]] ; ", + "do sleep 5 ; done", "'")) + } else { + file_spec <- paste(files_to_send, collapse = ' :') + system(paste0("ssh ", cluster[['queue_host']], ' "mkdir -p ', + remote_data_dir, '"')) + system(paste0("rsync -Rrav '", file_spec, "' '", + cluster[['queue_host']], ":", remote_data_dir, "/'")) + } + .message("Files sent successfully.") + t_end_transfer <- Sys.time() + timings[['transfer']] <- as.numeric(difftime(t_end_transfer, t_begin_transfer, units = 'secs')) + } else { + timings[['transfer']] <- 0 + } + if (!silent) { + .message(paste0("Processing chunks... ")) + } + time_begin_first_chunk <- Sys.time() + sys_commands <- paste0("module load ", as_module, "; ", + "autosubmit create ", suite_id, " -np; ", + "autosubmit refresh ", suite_id, "; ") + if (wait) { + sys_commands <- paste0(sys_commands, "autosubmit run ", suite_id) + } else { + sys_commands <- paste0(sys_commands, "nohup autosubmit run ", suite_id, " >/dev/null 2>&1 &") # disown? + } + if (gsub('[[:digit:]]', "", Sys.getenv('HOSTNAME')) == 'bscesautosubmit') { + #NOTE: If we ssh to AS VM and run everything there, we don't need to ssh here + system(sys_commands) + + } else if (gsub("[[:digit:]]", "", Sys.getenv("HOSTNAME")) == "bscearth") { + # ssh from WS to AS VM to run exp + as_login <- paste0(Sys.getenv("USER"), '@', autosubmit_server, '.bsc.es') + sys_commands <- paste0('ssh ', as_login, ' "', sys_commands, '"') #'; exit"') + system(sys_commands) + + } else { + stop("Cannot identify host", Sys.getenv("HOSTNAME"), ". Where to run AS exp?") + } + + # Check the size of tmp/ASLOGS/jobs_failed_status.log. If it is not 0, the jobs failed. + failed_file_size <- system(paste0("du /esarchive/autosubmit/", suite_id, "/tmp/ASLOGS/jobs_failed_status.log"), intern = T) + if (substr(failed_file_size, 1, 1) != 0) { + # Remove bigmemory objects (e.g., a68h_1_1 and a68h_1_1.desc) if they exist + # If run_dir is specified, the files are under run_dir; if not, files are under proj/STARTR_CHUNKING_xxxx/ + if (!is.null(cluster[['run_dir']])) { + file.remove( + file.path(cluster[['run_dir']], + list.files(cluster[['run_dir']])[grepl(paste0("^", suite_id, "_.*"), list.files(cluster[['run_dir']]))]) + ) + } else { + file.remove( + file.path(remote_autosubmit_suite_dir_suite, + list.files(remote_autosubmit_suite_dir_suite)[grepl(paste0("^", suite_id, "_.*"), list.files(remote_autosubmit_suite_dir_suite))]) + ) + } + + stop("Some Autosubmit jobs failed. Check GUI and logs.") + } + + timings[['total']] <- t_begin_total + startr_exec <- list(cluster = cluster, workflow_manager = 'autosubmit', + suite_id = suite_id, chunks = chunks, + num_outputs = length(arrays_of_results), + autosubmit_suite_dir = autosubmit_suite_dir, #ecflow_server = ecflow_server, + timings = timings) + class(startr_exec) <- 'startR_exec' + + if (wait) { + result <- Collect(startr_exec, wait = TRUE, remove = T) + .message("Computation ended successfully.") + return(result) + + } else { + # if wait = F, return startr_exec and merge chunks in Collect(). + return(startr_exec) + } + +} diff --git a/R/ByChunks.R b/R/ByChunks_ecflow.R similarity index 93% rename from R/ByChunks.R rename to R/ByChunks_ecflow.R index 37a554c1e44d8fda4bc60a6a2a0732c54e4009b3..6292448cea2503641ec999e39ffbad2c5fb31653 100644 --- a/R/ByChunks.R +++ b/R/ByChunks_ecflow.R @@ -19,9 +19,10 @@ #'@param threads_compute An integer indicating the number of execution threads #' to use for the computation. The default value is 1. #'@param cluster A list of components that define the configuration of the -#' machine to be run on. The comoponents vary from the different machines. -#' Check \href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab} for more -#' details and examples. +#' machine to be run on. The comoponents vary from the different machines. +#' Check +#' \href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{practical guide} +#' for more examples. #' Only needed when the computation is not run locally. The default value is #' NULL. #'@param ecflow_suite_dir A character string indicating the path to a folder in @@ -51,8 +52,8 @@ #' attached as attributes to the returned list of arrays. #' #'@examples -#' # ByChunks() is internally used in Compute(), not intended to be used by -#' # users. The example just illustrates the inputs of ByChunks(). +#' # ByChunks_ecflow() is internally used in Compute(), not intended to be used +#' # by users. The example just illustrates the inputs of ByChunks_ecflow(). #' # data_path <- system.file('extdata', package = 'startR') #' # path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') #' # sdates <- c('200011', '200012') @@ -77,18 +78,18 @@ #' # output_dims = 'latitude', #' # use_libraries = c('multiApply'), #' # use_attributes = list(data = "Variables")) -#' #ByChunks(step, data) +#' #ByChunks_ecflow(step, data) #' #'@import multiApply #'@importFrom methods is #'@noRd -ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', - threads_load = 2, threads_compute = 1, - cluster = NULL, - ecflow_suite_dir = NULL, - ecflow_server = NULL, - silent = FALSE, debug = FALSE, - wait = TRUE) { +ByChunks_ecflow <- function(step_fun, cube_headers, ..., chunks = 'auto', + threads_load = 1, threads_compute = 1, + cluster = NULL, + ecflow_suite_dir = NULL, + ecflow_server = NULL, + silent = FALSE, debug = FALSE, + wait = TRUE) { # Build object to store profiling timings t_begin_total <- Sys.time() t_begin_bychunks_setup <- t_begin_total @@ -181,15 +182,23 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', if (is.null(names(cluster))) { stop("Parameter 'cluster' must be a named list.") } - if (any(!(names(cluster) %in% c('queue_host', 'queue_type', 'data_dir', - 'temp_dir', 'lib_dir', 'init_commands', - 'r_module', 'CDO_module', - 'ecflow_module', 'node_memory', - 'cores_per_job', 'job_wallclock', 'max_jobs', + if (any(!(names(cluster) %in% c('queue_host', 'queue_type', 'data_dir', + 'temp_dir', 'lib_dir', 'init_commands', + 'r_module', 'CDO_module', 'autosubmit_module', + 'ecflow_module', 'node_memory', + 'cores_per_job', 'job_wallclock', 'max_jobs', 'extra_queue_params', 'bidirectional', - 'polling_period', 'special_setup')))) { + 'polling_period', 'special_setup', 'expid', 'hpc_user')))) { + stop("Found invalid component names in parameter 'cluster'.") } + # Remove ecFlow components + redundant_components <- c('autosubmit_module', 'expid', 'hpc_user') + if (any(redundant_components %in% names(cluster))) { + tmp <- redundant_components[which(redundant_components %in% names(cluster))] + .warning(paste0("Cluster component ", paste(tmp, collapse = ','), " not used when ecFlow is the workflow manager.")) + cluster[[tmp]] <- NULL + } default_cluster[names(cluster)] <- cluster } localhost_name <- NULL @@ -258,8 +267,8 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', stop("The component 'CDO_module' of the parameter 'cluster' must be a character string.") } if (nchar(cluster[['CDO_module']]) < 1 || grepl(' ', cluster[['CDO_module']])) { - warning("The component 'CDO_module' of parameter 'cluster' must have ", - " than 1 and only the first element will be used.") + .warning(paste0("The component 'CDO_module' of parameter 'cluster' must have ", + " than 1 and only the first element will be used.")) } cluster[['r_module']] <- paste(cluster[['r_module']], cluster[['CDO_module']]) } @@ -428,8 +437,8 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', ". Make sure passwordless ", "access is properly set in both directions.")) - # Copy load_process_save_chunk.R into shared folder - chunk_script <- file(system.file('chunking/load_process_save_chunk.R', + # Copy load_process_save_chunk_ecflow.R into shared folder + chunk_script <- file(system.file('chunking/ecFlow/load_process_save_chunk_ecflow.R', package = 'startR')) chunk_script_lines <- readLines(chunk_script) close(chunk_script) @@ -480,10 +489,10 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', chunk_script_lines) chunk_script_lines <- gsub('^params <- *', paste0('params <- ', paste(deparse(list(...)), collapse = '\n')), chunk_script_lines) - writeLines(chunk_script_lines, paste0(ecflow_suite_dir_suite, '/load_process_save_chunk.R')) + writeLines(chunk_script_lines, paste0(ecflow_suite_dir_suite, '/load_process_save_chunk_ecflow.R')) # Copy Chunk.ecf into shared folder - chunk_ecf_script <- file(system.file('chunking/Chunk.ecf', + chunk_ecf_script <- file(system.file('chunking/ecFlow/Chunk.ecf', package = 'startR')) chunk_ecf_script_lines <- readLines(chunk_ecf_script) close(chunk_ecf_script) @@ -522,8 +531,8 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', # } else { # transfer_back_line <- '' # } - chunk_ecf_script_lines <- gsub('^Rscript load_process_save_chunk.R --args \\$task_path insert_indices', - paste0('Rscript load_process_save_chunk.R --args $task_path ', paste(ecf_vars, collapse = ' ')), + chunk_ecf_script_lines <- gsub('^Rscript load_process_save_chunk_ecflow.R --args \\$task_path insert_indices', + paste0('Rscript load_process_save_chunk_ecflow.R --args $task_path ', paste(ecf_vars, collapse = ' ')), chunk_ecf_script_lines) #chunk_ecf_script_lines <- gsub('^include_transfer_back_and_rm', transfer_back_line, chunk_ecf_script_lines) writeLines(chunk_ecf_script_lines, paste0(ecflow_suite_dir_suite, '/Chunk.ecf')) @@ -549,7 +558,7 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', # Copy queue header into shared folder #file.copy(system.file(paste0('chunking/', cluster[['queue_type']], '.h'), package = 'startR'), # ecflow_suite_dir_suite) - chunk_queue_header <- file(system.file(paste0('chunking/', cluster[['queue_type']], '.h'), package = 'startR')) + chunk_queue_header <- file(system.file(paste0('chunking/ecFlow/', cluster[['queue_type']], '.h'), package = 'startR')) chunk_queue_header_lines <- readLines(chunk_queue_header) close(chunk_queue_header) chunk_queue_header_lines <- gsub('^include_extra_queue_params', @@ -558,12 +567,10 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', writeLines(chunk_queue_header_lines, paste0(ecflow_suite_dir_suite, '/', cluster[['queue_type']], '.h')) # Copy headers - file.copy(system.file('chunking/head.h', package = 'startR'), + file.copy(system.file('chunking/ecFlow/head.h', package = 'startR'), ecflow_suite_dir_suite) - file.copy(system.file('chunking/tail.h', package = 'startR'), + file.copy(system.file('chunking/ecFlow/tail.h', package = 'startR'), ecflow_suite_dir_suite) - #file.copy(system.file('chunking/clean_devshm.sh', package = 'startR'), - # ecflow_suite_dir_suite) } add_line <- function(suite, line, tabs) { @@ -882,7 +889,8 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', ecflow_server[['port']])) timings[['total']] <- t_begin_total - startr_exec <- list(cluster = cluster, ecflow_server = ecflow_server, + startr_exec <- list(cluster = cluster, ecflow_server = ecflow_server, + workflow_manager = 'ecFlow', suite_id = suite_id, chunks = chunks, num_outputs = length(arrays_of_results), ecflow_suite_dir = ecflow_suite_dir, @@ -1000,3 +1008,15 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', } #TODO: check result dimensions match expected dimensions } + +ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', + threads_load = 2, threads_compute = 1, + cluster = NULL, + ecflow_suite_dir = NULL, + ecflow_server = NULL, + silent = FALSE, debug = FALSE, + wait = TRUE) { + + stop(.Deprecated("ByChunks_ecflow")) +} + diff --git a/R/Collect.R b/R/Collect.R index 4c80b037d4b91101fc0e90fc6c5c3b1f09b5d624..6d752f5fc907d6f6fbe84d3aa015ccbf8f4afc07 100644 --- a/R/Collect.R +++ b/R/Collect.R @@ -1,11 +1,11 @@ #'Collect and merge the computation results #' #'The final step of the startR workflow after the data operation. It is used when -#'the parameter 'wait' of Compute() is FALSE, and the functionality includes -#'updating the job status shown on the EC-Flow GUI and collecting all the chunks -#'of results as one data array when the execution is done. See more details on -#'\href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab}. -#' +#'the parameter 'wait' of Compute() is FALSE. It combines all the chunks of the +#'results as one data array when the execution is done. See more details on +#'\href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{practical guide}. +#'Collect() calls Collect_ecflow() or Collect_autosubmit() according to the +#'chosen workflow manager. #'@param startr_exec An R object returned by Compute() when the parameter 'wait' #' of Compute() is FALSE. It can be directly from a Compute() call or read from #' the RDS file. @@ -13,14 +13,15 @@ #' Collect() call to finish (TRUE) or not (FALSE). If TRUE, it will be a #' blocking call, in which Collect() will retrieve information from the HPC, #' including signals and outputs, each polling_period seconds. The the status -#' can be monitored on the EC-Flow GUI. Collect() will not return until the -#' results of all chunks have been received. If FALSE, Collect() will crash with -#' an error if the execution has not finished yet, otherwise it will return the -#' merged array. The default value is TRUE. -#'@param remove A logical value deciding whether to remove of all data results -#' received from the HPC (and stored under 'ecflow_suite_dir', the parameter in -#' Compute()) after being collected. To preserve the data and Collect() it as -#' many times as desired, set remove to FALSE. The default value is TRUE. +#' can be monitored on the workflow manager GUI. Collect() will not return +#' until the results of all the chunks have been received. If FALSE, Collect() +#' return an error if the execution has not finished, otherwise it will return +#' the merged array. The default value is TRUE. +#'@param remove A logical value deciding whether to remove of all chunk results +#' received from the HPC after data being collected, as well as the local job +#' folder under 'ecflow_suite_dir' or 'autosubmit_suite_dir'. To preserve the +#' data and Collect() them as many times as desired, set remove to FALSE. The +#' default value is TRUE. #'@return A list of merged data array. #' #'@examples @@ -72,10 +73,34 @@ #' #'@export Collect <- function(startr_exec, wait = TRUE, remove = TRUE) { + + # Parameter checks if (!is(startr_exec, 'startR_exec')) { stop("Parameter 'startr_exec' must be an object of the class ", - "'startR_exec', as returned by Collect(..., wait = FALSE).") + "'startR_exec', as returned by Compute(..., wait = FALSE).") + } + if (!tolower(startr_exec$workflow_manager) %in% c('ecflow', 'autosubmit')) { + stop("Cannot identify the workflow manager. Check the value of 'startr_exec$workflow_manager', which should be 'ecFlow' or 'Autosubmit'.") + } + if (!is.logical(wait)) { + stop("Parameter 'wait' must be logical.") + } + if (!is.logical(remove)) { + stop("Parameter 'remove' must be logical.") + } + + if (tolower(startr_exec$workflow_manager) == 'ecflow') { + res <- Collect_ecflow(startr_exec, wait = wait, remove = remove) + } else if (tolower(startr_exec$workflow_manager) == 'autosubmit') { + res <- Collect_autosubmit(startr_exec, wait = wait, remove = remove) } + + return(res) +} + + +Collect_ecflow <- function(startr_exec, wait = TRUE, remove = TRUE) { + if (Sys.which('ecflow_client') == '') { stop("ecFlow must be installed in order to collect results from a ", "Compute() execution.") @@ -346,3 +371,59 @@ Collect <- function(startr_exec, wait = TRUE, remove = TRUE) { attr(result, 'startR_compute_profiling') <- timings result } + + + +Collect_autosubmit <- function(startr_exec, wait = TRUE, remove = TRUE) { + + suite_id <- startr_exec[['suite_id']] + chunks <- startr_exec[['chunks']] + num_outputs <- startr_exec[['num_outputs']] + autosubmit_suite_dir <- startr_exec[['autosubmit_suite_dir']] + autosubmit_suite_dir_suite <- paste0(autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') + remote_autosubmit_suite_dir <- file.path("/esarchive/autosubmit/", suite_id, 'proj') + remote_autosubmit_suite_dir_suite <- paste0(remote_autosubmit_suite_dir, '/STARTR_CHUNKING_', suite_id, '/') + run_dir <- startr_exec$cluster[['run_dir']] + + done <- FALSE + sum_received_chunks <- sum(grepl('.*\\.Rds$', list.files(remote_autosubmit_suite_dir_suite))) + + while (!done) { # If wait, try until it is done + if (sum_received_chunks / num_outputs == prod(unlist(chunks))) { + done <- TRUE + + } else if (!wait) { + stop("Computation in progress...") + } else { + Sys.sleep(startr_exec$cluster[['polling_period']]) + message("Computation in progress, ", sum_received_chunks, " of ", prod(unlist(chunks)), " chunks are done...\n", + "Check status on Autosubmit GUI: https://earth.bsc.es/autosubmitapp/experiment/", suite_id) +# Sys.sleep(min(sqrt(attempt), 5)) + } + + } # while !done + + result <- .MergeChunks(remote_autosubmit_suite_dir, suite_id, remove = remove) + if (remove) { + .warning("ATTENTION: The source chunks will be removed from the ", + "system. Store the result after Collect() ends if needed.") + unlink(paste0(autosubmit_suite_dir_suite), + recursive = TRUE) + } + + # Remove bigmemory objects (e.g., a68h_1_1_1_1_1 and a68h_1_1_1_1_1.desc) + # If run_dir is specified, the files are under run_dir; if not, files are under proj/STARTR_CHUNKING_xxxx/ + if (!is.null(run_dir)) { + file.remove( + file.path(run_dir, + list.files(run_dir)[grepl(paste0("^", suite_id, "_.*"), list.files(run_dir))]) + ) + } else { + file.remove( + file.path(remote_autosubmit_suite_dir_suite, + list.files(remote_autosubmit_suite_dir_suite)[grepl(paste0("^", suite_id, "_.*"), list.files(remote_autosubmit_suite_dir_suite))]) + ) + } + + return(result) +} diff --git a/R/Compute.R b/R/Compute.R index 1450b0157b4d5480a8c077c09742f7b02e0e4f12..321a0a192a647fc7e13f00d5ec4724a61bd70e7e 100644 --- a/R/Compute.R +++ b/R/Compute.R @@ -19,15 +19,17 @@ #' those not required as the target dimension in function Step(). The default #' value is 'auto', which lists all the non-target dimensions and each one has #' one chunk. -#'@param threads_load An integer indicating the number of execution threads to -#' use for the data retrieval stage. The default value is 1. -#'@param threads_compute An integer indicating the number of execution threads -#' to use for the computation. The default value is 1. +#'@param threads_load An integer indicating the number of parallel execution +#' processes to use for the data retrieval stage. The default value is 1. +#'@param threads_compute An integer indicating the number of parallel execution +#' processes to use for the computation. The default value is 1. #'@param cluster A list of components that define the configuration of the #' machine to be run on. The comoponents vary from the different machines. #' Check \href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{Practical guide on GitLab} for more #' details and examples. Only needed when the computation is not run locally. #' The default value is NULL. +#'@param workflow_manager Can be NULL, 'ecFlow' or 'Autosubmit'. The default is +#' 'ecFlow'. #'@param ecflow_suite_dir A character string indicating the path to a folder in #' the local workstation where to store temporary files generated for the #' automatic management of the workflow. Only needed when the execution is run @@ -84,11 +86,11 @@ #' #'@importFrom methods is #'@export -Compute <- function(workflow, chunks = 'auto', - threads_load = 1, threads_compute = 1, - cluster = NULL, ecflow_suite_dir = NULL, - ecflow_server = NULL, silent = FALSE, debug = FALSE, - wait = TRUE) { +Compute <- function(workflow, chunks = 'auto', workflow_manager = 'ecFlow', + threads_load = 1, threads_compute = 1, + cluster = NULL, ecflow_suite_dir = NULL, ecflow_server = NULL, + autosubmit_suite_dir = NULL, autosubmit_server = NULL, + silent = FALSE, debug = FALSE, wait = TRUE) { # Check workflow if (!is(workflow, 'startR_cube') & !is(workflow, 'startR_workflow')) { stop("Parameter 'workflow' must be an object of class 'startR_cube' as ", @@ -144,16 +146,42 @@ Compute <- function(workflow, chunks = 'auto', if (!all(sapply(workflow$inputs, class) == 'startR_cube')) { stop("Workflows with only one step supported by now.") } - # Run ByChunks with the combined operation - res <- ByChunks(step_fun = operation, - cube_headers = workflow$inputs, - chunks = chunks, - threads_load = threads_load, - threads_compute = threads_compute, - cluster = cluster, - ecflow_suite_dir = ecflow_suite_dir, - ecflow_server = ecflow_server, - silent = silent, debug = debug, wait = wait) + + # Run ByChunks with the chosen operation + if (!is.null(cluster)) { + if (is.null(workflow_manager)) { + stop("Specify parameter 'workflow_manager' as 'ecFlow' or 'Autosubmit'.") + } else if (!tolower(workflow_manager) %in% c('ecflow', 'autosubmit')) { + stop("Parameter 'workflow_manager' can only be 'ecFlow' or 'Autosubmit'.") + } + } else { # run locally + workflow_manager <- 'ecflow' + } + + if (tolower(workflow_manager) == 'ecflow') { + # ecFlow or run locally + res <- ByChunks_ecflow(step_fun = operation, + cube_headers = workflow$inputs, + chunks = chunks, + threads_load = threads_load, + threads_compute = threads_compute, + cluster = cluster, + ecflow_suite_dir = ecflow_suite_dir, + ecflow_server = ecflow_server, + silent = silent, debug = debug, wait = wait) + } else { + res <- ByChunks_autosubmit(step_fun = operation, + cube_headers = workflow$inputs, + chunks = chunks, + threads_load = threads_load, + threads_compute = threads_compute, + cluster = cluster, + autosubmit_suite_dir = autosubmit_suite_dir, + autosubmit_server = autosubmit_server, + silent = silent, debug = debug, wait = wait) + + } + # TODO: carry out remaining steps locally, using multiApply # Return results res diff --git a/R/Start.R b/R/Start.R index 92eb16dc107628e8e4d04e666c74878f9c373d54..b72a02b4fec0f7b8b35eeece2ba9f1bc388ff223 100644 --- a/R/Start.R +++ b/R/Start.R @@ -704,8 +704,8 @@ #'@param num_procs An integer of number of processes to be created for the #' parallel execution of the retrieval/transformation/arrangement of the #' multiple involved files in a call to Start(). If set to NULL, -#' takes the number of available cores (as detected by detectCores() in -#' the package 'future'). The default value is 1 (no parallel execution). +#' takes the number of available cores (as detected by future::detectCores). +#' The default value is 1 (no parallel execution). #'@param ObjectBigmemory a character string to be included as part of the #' bigmemory object name. This parameter is thought to be used internally by the #' chunking capabilities of startR. diff --git a/R/Utils.R b/R/Utils.R index 3d4d864a31c93f80529c2730dc3436e0149a3251..940e2d3a92c3373922203d41515a11944117f9f8 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -869,3 +869,149 @@ val <- withCallingHandlers(expr, warning = wHandler) list(value = val, warnings = myWarnings) } + +# This function writes startR_autosubmit.sh to local startR_autosubmit folder, under expID/ +write_autosubmit_bash <- function(chunks, cluster, autosubmit_suite_dir) { + # "chunks" should be the argument "chunks" in Compute() plus the redundant margin dims, + # e.g., list(dat = 1, var = 1, sdate = 1, time = 1, lat = 2, lon = 3) + + # Loop through chunks to create load script for each + for (n_chunk in 0:(prod(unlist(chunks)) - 1)) { + + # Create chunk args + chunk_names <- names(chunks) + chunk_args <- matrix(NA, 2, length(chunks)) + chunk_args[1, ] <- paste0('%JOBS.CHUNK_', n_chunk, '.', chunk_names, '%') + chunk_args[2, ] <- paste0('%JOBS.CHUNK_', n_chunk, '.', chunk_names, '_N%') + chunk_args <- paste0('(', paste(c(chunk_args), collapse = ' '), ')') + + bash_script_template <- file(system.file('chunking/Autosubmit/startR_autosubmit.sh', + package = 'startR')) + bash_script_lines <- readLines(bash_script_template) + close(bash_script_template) + + # Rewrite chunk_args= + bash_script_lines <- gsub('^chunk_args=*', paste0('chunk_args=', chunk_args), + bash_script_lines) + # Include init commands + bash_script_lines <- gsub('^include_init_commands', + paste0(paste0(cluster[['init_commands']], collapse = '\n'), '\n'), + + bash_script_lines) + # Rewrite include_module_load + bash_script_lines <- gsub('^include_module_load', + paste0('module load ', cluster[['r_module']]), + bash_script_lines) + # Rewrite cd run_dir + # If run_dir is not specified, the script will run under ${proj_dir} + if (!is.null(cluster[['run_dir']])) { + bash_script_lines <- gsub('^cd_run_dir', + paste0('cd ', cluster[['run_dir']]), + bash_script_lines) + } else { + bash_script_lines <- gsub('^cd_run_dir', 'cd ${proj_dir}', + bash_script_lines) + } + + # Save modified .sh file under local$PROJECT_PATH in expdef.yml + #NOTE: dest_dir is ecflow_suite_dir_suite in ByChunks_autosubmit() + #NOTE: the file will be copied to proj/ by "autosubmit create" + dest_dir <- file.path(autosubmit_suite_dir, paste0("/STARTR_CHUNKING_", cluster$expid)) + + if (!file.exists(dest_dir)) { + dir.create(dest_fir, recursive = TRUE) + } + writeLines(bash_script_lines, paste0(dest_dir, '/startR_autosubmit_', n_chunk, '.sh')) + } +} + +# This function generates the .yml files under autosubmit conf/ +write_autosubmit_confs <- function(chunks, cluster, autosubmit_suite_dir) { + # "chunks" is from Compute() input, e.g., chunks <- list(lat = 2, lon = 3) + # "cluster" is the argument "cluster" in Compute(), to set machine configuration + # "autosubmit_suite_dir" should be the local folder that has R script, like ecflow_suite_dir in Compute() + + # Get config template files from package + template_dir <- system.file('chunking/Autosubmit/', package = 'startR') + config_files <- list.files(template_dir, pattern = "*\\.yml$") + + for (i_file in config_files) { + + conf <- yaml::read_yaml(file.path(template_dir, i_file)) + conf_type <- strsplit(i_file, split = "[.]")[[1]][1] + +############################################################ + if (conf_type == "autosubmit") { + + #Q: Should it be the total amount of chunk? + conf$config$MAXWAITINGJOBS <- as.integer(prod(unlist(chunks))) # total amount of chunk + #NOTE: Nord3 max. amount of queued jobs is 366 + if (conf$config$MAXWAITINGJOBS > 366) conf$config$MAXWAITINGJOBS <- 366 + conf$config$TOTALJOBS <- as.integer(cluster$max_jobs) + +############################################################ + } else if (conf_type == "expdef") { + conf$default$EXPID <- cluster$expid + conf$default$HPCARCH <- cluster$queue_host + # PROJECT_PATH should be where submit.sh and load....R stored --> local startR_autosubmit folder, under expID/ + conf$local$PROJECT_PATH <- file.path(autosubmit_suite_dir, paste0("STARTR_CHUNKING_", cluster$expid)) + +############################################################ + } else if (conf_type == "jobs") { + + chunks_vec <- lapply(lapply(chunks, seq, 1), rev) # list(lat = 1:2, lon = 1:3) + chunk_df <- expand.grid(chunks_vec) + nchunks <- nrow(chunk_df) + chunk_name <- paste0("CHUNK_", 0:(nchunks - 1)) + + # Fill in common configurations + jobs <- conf$JOBS + # wallclock from '01:00:00' to '01:00' + jobs[[1]]$WALLCLOCK <- substr(cluster$job_wallclock, 1, 5) + jobs[[1]]$PLATFORM <- cluster$queue_host + jobs[[1]]$THREADS <- as.integer(cluster$cores_per_job) + jobs[[1]][paste0(names(chunks), "_N")] <- as.integer(unlist(chunks)) + jobs[[1]][names(chunks)] <- "" + + # Create chunks and fill in info for each chunk + if (nchunks > 1) { + jobs <- c(jobs, rep(jobs, nchunks - 1)) + names(jobs) <- chunk_name + } + for (i_chunk in 1:nchunks) { + jobs[[i_chunk]][names(chunks)] <- chunk_df[i_chunk, ] + jobs[[i_chunk]]$FILE <- paste0('startR_autosubmit_', i_chunk - 1, '.sh') + } + + conf$JOBS <- jobs + +############################################################ + } else if (conf_type == "platforms") { + if (tolower(cluster$queue_host) != "local") { + conf$Platforms[[cluster$queue_host]]$USER <- cluster$hpc_user + conf$Platforms[[cluster$queue_host]]$PROCESSORS_PER_NODE <- as.integer(cluster$cores_per_job) + if (!is.null(cluster$extra_queue_params)) { + tmp <- unlist(cluster$extra_queue_params) + for (ii in 1:length(tmp)) { + tmp[ii] <- paste0('\"', tmp[ii], '\"') + } + conf$Platforms[[cluster$queue_host]]$CUSTOM_DIRECTIVES <- paste0('[ ', paste(tmp, collapse = ','), ' ]') + } + } + +############################################################ + } else { + stop("File ", i_file, " is not considered in this function.") + } + +############################################################ + # Output directory + dest_dir <- paste0("/esarchive/autosubmit/", cluster$expid, "/conf/") + dest_file <- paste0(conf_type, "_", cluster$expid, ".yml") + + # Write config file inside autosubmit dir + yaml::write_yaml(conf, paste0(dest_dir, dest_file)) + Sys.chmod(paste0(dest_dir, dest_file), mode = "755", use_umask = F) + + } # for loop each file +} diff --git a/inst/chunking/Autosubmit/autosubmit.yml b/inst/chunking/Autosubmit/autosubmit.yml new file mode 100644 index 0000000000000000000000000000000000000000..8b129a0dd71a2b6795f5548d7ba5937ce33b7970 --- /dev/null +++ b/inst/chunking/Autosubmit/autosubmit.yml @@ -0,0 +1,16 @@ +config: + AUTOSUBMIT_VERSION: 4.0.0b0 + MAXWAITINGJOBS: # Should it be the total amount of chunk? + TOTALJOBS: + SAFETYSLEEPTIME: 10 + RETRIALS: 0 +#wrappers: +# wrapper_sim: +# TYPE: "vertical" +# JOBS_IN_WRAPPER: "SIM" +mail: + NOTIFICATIONS: False + TO: +storage: + TYPE: "pkl" + COPY_REMOTE_LOGS: True diff --git a/inst/chunking/Autosubmit/expdef.yml b/inst/chunking/Autosubmit/expdef.yml new file mode 100644 index 0000000000000000000000000000000000000000..624040d8f86a319602ab9b2f7920f7d17660b1ab --- /dev/null +++ b/inst/chunking/Autosubmit/expdef.yml @@ -0,0 +1,33 @@ +default: + EXPID: #a659 + HPCARCH: #nord3v2, local +experiment: + DATELIST: 20220401 + MEMBERS: "fc0" + CHUNKSIZEUNIT: month + CHUNKSIZE: 4 + NUMCHUNKS: 2 + CHUNKINI: '' + CALENDAR: standard +project: + PROJECT_TYPE: local + PROJECT_DESTINATION: '' +git: + PROJECT_ORIGIN: '' #https://xxx + PROJECT_BRANCH: '' #master + PROJECT_COMMIT: '' + PROJECT_SUBMODULES: '' + FETCH_SINGLE_BRANCH: True +svn: + PROJECT_URL: '' + PROJECT_REVISION: '' +local: + PROJECT_PATH: #'/home/Earth/aho/startR_local_autosubmit/' +project_files: + FILE_PROJECT_CONF: '' + FILE_JOBS_CONF: '' + JOB_SCRIPTS_TYPE: '' +rerun: + RERUN: FALSE + RERUN_JOBLIST: '' +#Q: Are these all needed and correct? diff --git a/inst/chunking/Autosubmit/jobs.yml b/inst/chunking/Autosubmit/jobs.yml new file mode 100644 index 0000000000000000000000000000000000000000..3ff4d0b284834488ab71053ffb763b6d3e76b93e --- /dev/null +++ b/inst/chunking/Autosubmit/jobs.yml @@ -0,0 +1,9 @@ +JOBS: + CHUNK_0: + PLATFORM: #LOCAL + RUNNING: once + WALLCLOCK: #00:05 + THREADS: + FILE: startR_autosubmit.sh #templates/sleep_5.sh +# DIM: +# DIM_N: diff --git a/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R b/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R new file mode 100644 index 0000000000000000000000000000000000000000..8762eeb01bf10f7de2627645971395659839ee03 --- /dev/null +++ b/inst/chunking/Autosubmit/load_process_save_chunk_autosubmit.R @@ -0,0 +1,136 @@ +lib_dir <- +if (!is.null(lib_dir)) { + if (!dir.exists(lib_dir)) { + stop("The specified 'lib_dir' does not exist.") + } + .libPaths(new = lib_dir) +} +library(startR) + +out_dir <- + +debug <- +start_calls <- +start_calls_attrs <- +param_dimnames <- +fun <- +params <- +threads_load <- +threads_compute <- + +task_path <- commandArgs(TRUE)[2] + +args <- as.integer(commandArgs(TRUE)[-c(1, 2)]) + +total_specified_dims <- length(args) / 2 +chunk_indices <- args[((1:total_specified_dims) - 1) * 2 + 1] +names(chunk_indices) <- param_dimnames +chunks <- as.list(args[((1:total_specified_dims) - 1) * 2 + 2]) +names(chunks) <- param_dimnames + +t_begin_load <- Sys.time() +data <- vector('list', length(start_calls)) +# Add data names if data input has names +if (!is.null(names(start_calls_attrs))) { + names(data) <- names(start_calls_attrs) +} +for (input in 1:length(data)) { + start_call <- start_calls[[input]] + call_dims <- names(start_calls_attrs[[input]][['Dimensions']]) + dims_to_alter <- which(call_dims %in% param_dimnames) + names_dims_to_alter <- call_dims[dims_to_alter] + # If any dimension comes from split dimensions + split_dims <- start_calls_attrs[[input]][['SplitDims']] + for (k in 1:length(split_dims)) { + if (any(names(split_dims[[k]]) %in% names_dims_to_alter)) { + chunks_split_dims <- rep(1, length(split_dims[[k]])) + names(chunks_split_dims) <- names(split_dims[[k]]) + chunks_indices_split_dims <- chunks_split_dims + split_dims_to_alter <- which(names(split_dims[[k]]) %in% names_dims_to_alter) + chunks_split_dims[split_dims_to_alter] <- unlist(chunks[names(split_dims[[k]])[split_dims_to_alter]]) + chunks_indices_split_dims[split_dims_to_alter] <- chunk_indices[names(split_dims[[k]])[split_dims_to_alter]] + start_call[[names(split_dims)[k]]] <- startR:::.chunk(chunks_indices_split_dims, chunks_split_dims, + eval(start_call[[names(split_dims)[k]]])) + dims_to_alter_to_remove <- which(names_dims_to_alter %in% names(split_dims[[k]])) + if (length(dims_to_alter_to_remove) > 0) { + dims_to_alter <- dims_to_alter[-dims_to_alter_to_remove] + names_dims_to_alter <- names_dims_to_alter[-dims_to_alter_to_remove] + } + } + } + if (length(dims_to_alter) > 0) { + for (call_dim in names_dims_to_alter) { + start_call[[call_dim]] <- startR:::.chunk(chunk_indices[call_dim], chunks[[call_dim]], + eval(start_call[[call_dim]])) + } + } + if (!('num_procs' %in% names(start_call))) { + start_call[['num_procs']] <- threads_load + } + # Creates a name for the temporal file using the chunks numbers: + ## ecFlow should be like "_4737920362_1_1_1_1_1_1_" + ## autosubmit should be like "a659_1_1_1_1_1_1" + + nameMemoryObject <- paste0(task_path, '_', paste(chunk_indices, collapse='_')) #task_path is EXPID actually + + start_call[['ObjectBigmemory']] <- nameMemoryObject + data[[input]] <- tryCatch(eval(start_call), + # Handler when an error occurs: + error = function(e) { + message(paste("The data cannot be loaded.")) + message("See the original error message:") + message(e) + message("\n Current files in /dev/shm:") + noreturn <- lapply(list.files("/dev/shm"), function (x) { + info <- file.info(paste0("/dev/shm/", x)) + message(paste("file:", rownames(info), + "size:", info$size, + "uname:", info$uname))}) + message(getwd()) + file.remove(nameMemoryObject) + file.remove(paste0(nameMemoryObject, ".desc")) + message(paste("Files", nameMemoryObject, "has been removed.")) + stop("The job has failed while loading data. See original error reported above.") + }) + warning(attributes(data[[input]])$ObjectBigmemory) +} +t_end_load <- Sys.time() +t_load <- as.numeric(difftime(t_end_load, t_begin_load, units = 'secs')) + +t_begin_compute <- Sys.time() +if (!is.null(attr(fun, 'UseLibraries'))) { + for (i in seq_along(attr(fun, 'UseLibraries'))) { + require(attr(fun, 'UseLibraries')[i], character.only = TRUE) + } +} +chunk_indices_apply <- setNames(as.integer(chunk_indices), names(chunk_indices)) +chunk_indices_apply <- chunk_indices_apply[names(chunks)[which(chunks > 1)]] +Apply <- multiApply::Apply +res <- do.call("Apply", + c( + list(data, + target_dims = attr(fun, 'TargetDims'), + fun = fun, + output_dims = attr(fun, 'OutputDims'), + use_attributes = attr(fun, 'UseAttributes'), + extra_info = list(chunk_indices = chunk_indices_apply), + ncores = threads_compute), + params + ) + ) +rm(data) +gc() + +for (component in names(res)) { + filename <- paste0(component, '__') + for (i in 1:total_specified_dims) { + filename <- paste0(filename, param_dimnames[i], '_', chunk_indices[i], '__') + } + # Saving in a temporary file, then renaming. This way, the polling mechanism + # won't transfer back results before the save is completed. + saveRDS(res[[component]], file = paste0(out_dir, '/', filename, '.Rds.tmp')) + file.rename(paste0(out_dir, '/', filename, '.Rds.tmp'), + paste0(out_dir, '/', filename, '.Rds')) +} +rm(res) +gc() diff --git a/inst/chunking/Autosubmit/platforms.yml b/inst/chunking/Autosubmit/platforms.yml new file mode 100644 index 0000000000000000000000000000000000000000..f8d8f70c11902875dd39a90b52c11a0080c2ef59 --- /dev/null +++ b/inst/chunking/Autosubmit/platforms.yml @@ -0,0 +1,14 @@ +Platforms: + nord3: + TYPE: SLURM + HOST: nord4.bsc.es #Q: Should we have more login nodes? + PROJECT: bsc32 + ADD_PROJECT_TO_HOST: "false" + USER: #bsc32734 + PROCESSORS_PER_NODE: #16 + SERIAL_QUEUE: debug + QUEUE: bsc_es + SCRATCH_DIR: /gpfs/scratch + CUSTOM_DIRECTIVES: # "['#SBATCH --exclusive']" "['#SBATCH --constraint=medmem']" +# MAX_WALLCLOCK: '48:00' +#Q: ARE THESE SETTING CORRECT? diff --git a/inst/chunking/Autosubmit/startR_autosubmit.sh b/inst/chunking/Autosubmit/startR_autosubmit.sh new file mode 100644 index 0000000000000000000000000000000000000000..63d9e6124b510a4b5a93ef529c71be9a3fec0264 --- /dev/null +++ b/inst/chunking/Autosubmit/startR_autosubmit.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +######## AUTOSUBMIT INPUTS ####### +proj_dir=%PROJDIR% +task_path=%DEFAULT.EXPID% +chunknum=%JOBNAME% #e.g., a68h_CHUNK_0 +chunknum="${chunknum:5}" #e.g., CHUNK_0 +################################## + +# Modified by write_bash.R +# e.g., chunk_args=(%JOBS."${chunknum}".dat% %JOBS."${chunknum}".dat_N% %JOBS."${chunknum}".var% %JOBS."${chunknum}".var_N% %JOBS."${chunknum}".sdate% %JOBS."${chunknum}".sdate_N%) +chunk_args= + +include_init_commands +include_module_load + +##Should move to the path that has load_process_save_chunk_autosubmit.R +#cd ${proj_dir} + +# move to run_dir +cd_run_dir + +#e.g., Rscript load_process_save_chunk_autosubmit.R --args $task_path 1 1 1 1 2 2 1 1 1 2 1 2 +Rscript ${proj_dir}/load_process_save_chunk_autosubmit.R --args ${task_path} ${chunk_args[@]} + diff --git a/inst/chunking/Chunk.ecf b/inst/chunking/ecFlow/Chunk.ecf similarity index 82% rename from inst/chunking/Chunk.ecf rename to inst/chunking/ecFlow/Chunk.ecf index 60bd051a657d28ef957876c28c8ab2a45686f579..5a265fb0db82b49cce435e3d04306ada59b51ac2 100644 --- a/inst/chunking/Chunk.ecf +++ b/inst/chunking/ecFlow/Chunk.ecf @@ -12,7 +12,7 @@ set -vx cd %REMOTE_ECF_HOME% task_path=%REMOTE_ECF_HOME%/%ECF_NAME% -Rscript load_process_save_chunk.R --args $task_path insert_indices +Rscript load_process_save_chunk_ecflow.R --args $task_path insert_indices #include_transfer_back_and_rm #clean temporal folder diff --git a/inst/chunking/clean_devshm.sh b/inst/chunking/ecFlow/clean_devshm.sh similarity index 100% rename from inst/chunking/clean_devshm.sh rename to inst/chunking/ecFlow/clean_devshm.sh diff --git a/inst/chunking/head.h b/inst/chunking/ecFlow/head.h similarity index 100% rename from inst/chunking/head.h rename to inst/chunking/ecFlow/head.h diff --git a/inst/chunking/load_process_save_chunk.R b/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R similarity index 98% rename from inst/chunking/load_process_save_chunk.R rename to inst/chunking/ecFlow/load_process_save_chunk_ecflow.R index b7b73a9ff027937015d3e8b4400c1080c990c43a..1bc5d6deabd43f33c4e51892812cb7547a049808 100644 --- a/inst/chunking/load_process_save_chunk.R +++ b/inst/chunking/ecFlow/load_process_save_chunk_ecflow.R @@ -37,6 +37,10 @@ t_job_setup <- as.numeric(difftime(t_end_job_setup, t_begin_job_setup, units = ' t_begin_load <- Sys.time() data <- vector('list', length(start_calls)) +# Add data names if data input has names +if (!is.null(names(start_calls_attrs))) { + names(data) <- names(start_calls_attrs) +} for (input in 1:length(data)) { start_call <- start_calls[[input]] call_dims <- names(start_calls_attrs[[input]][['Dimensions']]) diff --git a/inst/chunking/lsf.h b/inst/chunking/ecFlow/lsf.h similarity index 100% rename from inst/chunking/lsf.h rename to inst/chunking/ecFlow/lsf.h diff --git a/inst/chunking/pbs.h b/inst/chunking/ecFlow/pbs.h similarity index 100% rename from inst/chunking/pbs.h rename to inst/chunking/ecFlow/pbs.h diff --git a/inst/chunking/slurm.h b/inst/chunking/ecFlow/slurm.h similarity index 100% rename from inst/chunking/slurm.h rename to inst/chunking/ecFlow/slurm.h diff --git a/inst/chunking/tail.h b/inst/chunking/ecFlow/tail.h similarity index 100% rename from inst/chunking/tail.h rename to inst/chunking/ecFlow/tail.h diff --git a/inst/doc/practical_guide.md b/inst/doc/practical_guide.md index 70b29a61b9d73cb7db017c3da9ed8ebf10c15be3..7038ad79e933adc4763a23e8878656d478c7078d 100644 --- a/inst/doc/practical_guide.md +++ b/inst/doc/practical_guide.md @@ -14,13 +14,15 @@ If you would like to start using startR rightaway on the BSC infrastructure, you 2. [**Step() and AddStep()**](#4-2-step-and-addstep) 3. [**Compute()**](#4-3-compute) 1. [**Compute() locally**](#4-3-1-compute-locally) - 2. [**Compute() on HPCs**](#4-3-2-compute-on-hpcs) + 2. [**Compute() on HPCs with ecFlow**](#4-3-2-compute-on-hpcs-with-ecflow) + 3. [**Compute() on HPCs with Autosubmit**](#4-3-3-compute-on-hpcs-with-autosubmit) 4. [**Collect() and the EC-Flow GUI**](#4-4-collect-and-the-ec-flow-gui) 5. [**Additional information**](#5-additional-information) 1. [**How to choose the number of chunks, jobs and cores**](#5-1-how-to-choose-the-number-of-chunks-jobs-and-cores) 2. [**How to clean a failed execution**](#5-2-how-to-clean-a-failed-execution) 3. [**Visualizing the profiling of the execution**](#5-3-visualizing-the-profiling-of-the-execution) 4. [**Pending features**](#5-4-pending-features) + 5. [**ecFlow server and port**](#5-5-ecflow-server-and-port) 6. [**Other examples**](#6-other-examples) 7. [**Compute() cluster templates**](#7-compute-cluster-templates) @@ -370,8 +372,18 @@ It is not possible for now to define workflows with more than one step, but this ### 4-3. Compute() Once the data sources are declared and the workflow is defined, you can proceed to specify the execution parameters (including which platform to run on) and trigger the execution with the `Compute()` function. +The execution can run locally (only on the machine where the R session is running) or on different HPCs (Nord3-v2, CTE-Power9 and other HPCs). -Next, a few examples are shown with `Compute()` calls to trigger the processing of a dataset locally (only on the machine where the R session is running) and different HPCs (Nord3-v2, CTE-Power9 and other HPCs). However, let's first define a `Start()` call that involves a smaller subset of data in order not to make the examples too heavy. +The common Compute() parameters of local and remote execution are: +- `wf`: The workflow defined by the previous steps. +- `chunks`: The dimensions to be chunked and how many chunks you want for each dimension. +startR will automatically chunk the data for you. See more details in session [#5-1](#5-1-how-to-choose-the-number-of-chunks-jobs-and-cores). +- `threads_load`: The number of parallel execution processes to be created for data retrieval stage. It is used as Start() parameter "num_procs" if it is not specified when Start() is defined. +- `threads_compute`: The number of parallel execution processes to be created for data computing. It is used as multiApply::Apply parameter "ncores". + +Using more than 2 threads for the retrieval will usually be perjudicial, since two will already be able to make full use of the bandwidth between the workstation and the data repository. The optimal number of threads for the computation will depend on the number of processors in your machine, the number of cores they have, and the number of threads supported by each of them. + +In the following example, we first define a `Start()` call that involves a smaller subset of data in order not to make the examples too heavy. ```r library(startR) @@ -525,7 +537,7 @@ dim(res$output1) 2 1 1 1 640 1296 ``` -In addition to performing the computation in chunks, you can adjust the number of execution threads to use for the data retrieval stage (with `threads_load`) and for the computation (with `threads_compute`). Using more than 2 threads for the retrieval will usually be perjudicial, since two will already be able to make full use of the bandwidth between the workstation and the data repository. The optimal number of threads for the computation will depend on the number of processors in your machine, the number of cores they have, and the number of threads supported by each of them. +Run Compute() with desired chunks and resource setting. ```r res <- Compute(wf, @@ -566,9 +578,12 @@ res <- Compute(wf, * max: 8.03660178184509 ``` -#### 4-3-2. Compute() on HPCs +#### 4-3-2. Compute() on HPCs with ecFlow -In order to run the computation on a HPC, you will need to make sure the passwordless connection with the login node of that HPC is configured, as shown at the beginning of this guide. If possible, in both directions. Also, you will need to know whether there is a shared file system between your workstation and that HPC, and will need information on the number of nodes, cores per node, threads per core, RAM memory per node, and type of workload used by that HPC (Slurm, PBS and LSF supported). +We can use workflow manager (ecFlow or Autosubmit) to dispatch computation jobs on a HPC. +To use Autosubmit, check the next session. +You will need to make sure that the passwordless connection with the login node of that HPC is configured, as shown at the beginning of this guide. +If possible, in both directions. Also, you will need to know whether there is a shared file system between your workstation and that HPC, and will need information on the number of nodes, cores per node, threads per core, RAM memory per node, and type of workload used by that HPC (Slurm, PBS and LSF supported). You will need to add two parameters to your `Compute()` call: `cluster` and `ecflow_suite_dir`. @@ -607,7 +622,7 @@ The cluster components and options are explained next: - `temp_dir`: directory on the HPC where to store temporary files. Must be accessible from the HPC login node and all HPC nodes. - `lib_dir`: directory on the HPC where the startR R package and other required R packages are installed, accessible from all HPC nodes. These installed packages must be compatible with the R module specified in `r_module`. This parameter is optional; only required when the libraries are not installed in the R module. - `r_module`: name of the UNIX environment module to be used for R. If not specified, 'module load R' will be used. -- `cores_per_job`: number of computing cores to be requested when submitting the job for each chunk to the HPC queue. Each node may be capable of supporting more than one computing thread. +- `cores_per_job`: number of computing cores to be requested when submitting the job for each chunk to the HPC queue. Each node (should be core?) may be capable of supporting more than one computing thread. - `job_wallclock`: amount of time to reserve the resources when submitting the job for each chunk. Must follow the specific format required by the specified `queue_type`. - `max_jobs`: maximum number of jobs (chunks) to be queued simultaneously onto the HPC queue. Submitting too many jobs could overload the bandwidth between the HPC nodes and the storage system, or could overload the queue system. - `extra_queue_params`: list of character strings with additional queue headers for the jobs to be submitted to the HPC. Mainly used to specify the amount of memory to book for each job (e.g. '#SBATCH --mem-per-cpu=30000'; __NOTE: this line does not work on Nord3v2__), to request special queuing (e.g. '#SBATCH --qos=bsc_es'), or to request use of specific software (e.g. '#SBATCH --reservation=test-rhel-7.5'). @@ -689,6 +704,64 @@ As mentioned above in the definition of the `cluster` parameters, it is strongly You can find the `cluster` configuration for other HPCs at the end of this guide [Compute() cluster templates](#compute-cluster-templates) +#### 4-3-3. Compute() on HPCs with Autosubmit + +To use Autosubmit as workflow manager, add the following parameters to your Compute() call: + `cluster`, `autosubmit_suite_dir`, and `autosubmit_server`. + +`autosubmit_suite_dir` is the path where to store temporary files generated for +Autosubmit to establish the workflow. It should be found in both workstation and the Autosubmit machine. + +`autosubmit_server` is the login node of the Autosubmit machine, i.e., 'bscesautosubmit01'or 'bscesautosubmit02'. + +The parameter `cluster` expects a list of components that provide the configuration of Autosubmit machine. For now, the supported platforms are 'local' (run on Autosubmit machine) and 'nord3' (Autosubmit submits jobs to Nord3). +You can see one example of cluster configuration below. + +```r + res <- Compute(wf, chunks = list(sdate = 2), + threads_compute = 4, threads_load = 2, + cluster = list( + queue_host = 'nord3', + expid = , + hpc_user = "bsc32xxx", + r_module = "R/4.1.2-foss-2019b", + CDO_module = "CDO/1.9.8-foss-2019b", + autosubmit_module = 'autosubmit/4.0.0b-foss-2015a-Python-3.7.3', + cores_per_job = 4, + job_wallclock = '01:00:00', + max_jobs = 4 + ), + workflow_manager = 'autosubmit', + autosubmit_suite_dir = "/home/Earth//startR_local_autosubmit/", + autosubmit_server = 'bscesautosubmit01', + wait = TRUE + ) +``` + +The cluster components and options are explained next: + +- `queue_host`: Must match the platform name in Autosubmit configuration file _platforms.yml_, or 'local'. The current provided platforms are: 'nord3'. +- `expid`: The Autosubmit experiment to run the computation. You can create the experiment beforehand or let startR create one for you by not specifying this componenet. +To have the good practice, note down the expid if it is automatically created by startR and re-use/delete it afterwards. + - `hpc_user`: Your user ID on the HPC (i.e., "bsc32xxx"). It is required if "queue_host" is not 'local'. +- `data_dir`: The path to the data repository if the data is not shared. +- `lib_dir`: directory on the HPC where the startR R package and other required R packages are installed, accessible from all HPC nodes. These installed packages must be compatible with the R module specified in `r_module`. This parameter is optional; only required when the libraries are not installed in the R module. +- `init_commands`: The initial commands in bash script before R script runs. For example, the modules required by computation can be loaded here. +- `r_module`: Name of the UNIX environment module to be used for R. If not specified, `module load R` will be used. +- `CDO_module`: Name of the UNIX environment module to be used for CDO. If not specified, it is NULL and no CDO module will be loaded. Make sure to assign it if `tranform` is required in Start(). +- `autosubmit_module`: The name of the Autosubmit module. If not specified, `module load autosubmit` will be used. +- `cores_per_job`: Number of computing cores to be requested when submitting the job for each chunk to the HPC queue. +It is corresponded to the parameter "THREADS" in _jobs.yml_ and "PROCESSORS_PER_NODE" in _platforms.yml_. +- `job_wallclock`: amount of time to reserve the resources when submitting the job for each chunk. Must follow the specific format required by the specified `queue_type`. +- `max_jobs`: maximum number of jobs (chunks) to be queued simultaneously onto the HPC queue. Submitting too many jobs could overload the bandwidth between the HPC nodes and the storage system, or could overload the queue system. +- `extra_queue_params`: list of character strings for additional queue headers for the jobs +to be submitted to the HPC. For example, to constraint using medmem node ('#SBATCH --constraint=medmem'); to request an exclusive mode ('#SBATCH --exclusive'). +- `polling_period`: when the connection is unidirectional, the workstation will ask the HPC login node for results each `polling_period` seconds. An excessively small value can overload the login node or result in temporary banning. +- `special_setup`: name of the machine if the computation requires an special setup. Only Marenostrum 4 needs this parameter (e.g. special_setup = 'marenostrum4'). + +After the Compute() call is executed, you can monitor the status on [Autosubmit GUI](https://earth.bsc.es/autosubmitapp/). + + ### 4-4. Collect() and the EC-Flow GUI Usually, in use cases where large data inputs are involved, it is convenient to add the parameter `wait = FALSE` to your `Compute()` call. With this parameter, `Compute()` will immediately return an object with information about your startR execution. You will be able to store this object onto disk. After doing that, you will not need to worry in case your workstation turns off in the middle of the computation. You will be able to close your R session, and collect the results later on with the `Collect()` function. @@ -722,9 +795,14 @@ module load ecFlow ecflow_ui & ``` -After doing that, a window will pop up. You will be able to monitor the status of your EC-Flow suites there. However, if it is the first time you are using the EC-Flow GUI with startR, you will need to register the EC-Flow server that has been started automatically by `Compute()`. You can open the top menu "Manage servers" > "New server" > set host to 'localhost' > set port to '5678' > save. +After doing that, a window will pop up. You will be able to monitor the status of your EC-Flow suites there. +However, if it is the first time you are using the EC-Flow GUI with startR, +you will need to register the EC-Flow server that has been started automatically by `Compute()`. +You can open the top menu "Manage servers" > "Add server" > Put a recognizable 'Name' for host > set 'Host' to your workstation (i.e., bscearthxxx) or 'localhost' > set 'Port' to '5678' > save. +See more information about ecFlow server in [#5-5](#5-5-ecflow-server-and-port). + +Note that the host and port can be adjusted with the parameter `ecflow_server` in `Compute()`, which must be provided in the form `c(host = 'hostname', port = port_number)`. _(NOTE: 'host' is not supported for now. You can use `ecflow_server = c(port = xxxx)` to change port number.)_ -Note that the host and port can be adjusted with the parameter `ecflow_server` in `Compute()`, which must be provided in the form `c(host = 'hostname', port = port_number)`. After registering the EC-Flow server, an expandable entry will appear, where you can see listed the jobs to be executed, one for each chunk, with their status represented by a colour. Gray means pending, blue means queuing, green means in progress, and yellow means completed. @@ -844,6 +922,15 @@ You can click on the image to expand it. - Adding feature in `Start()` to read sparse grid points. - Allow for chunking along "essential" (a.k.a. "target") dimensions. +### 5-5. ecFlow server and port + +You cannot start two ecFlow servers on the same machine with the same port number. +That is, the port number on one workstation cannot be shared. For example, if port '5678' on workstation 'bscearth123' is taken by user A, +user B cannot ssh to 'bscearth123' and use the port number '5678'. But user B can use a new port number and specify 'ecflow_server' in Compute(). +Or, if user B uses another workstation that has port number '5678' available, s/he can use it without problem. + +You can check the host-port you have in `~/.ecflowrc/servers`. To stop using a server, you can go to ecFlow UI, right click the server > halt > checkpoint > terminate. + ## 6. Other examples You can find more use cases in [usecase.md](inst/doc/usecase.md). diff --git a/inst/doc/usecase/ex1_13_implicit_dependency.R b/inst/doc/usecase/ex1_13_implicit_dependency.R index 6740a21dc31c8916dacdfc48750a0bafabf9b285..8f60413d504cd92d5e9114a3c863972863dcd237 100644 --- a/inst/doc/usecase/ex1_13_implicit_dependency.R +++ b/inst/doc/usecase/ex1_13_implicit_dependency.R @@ -55,6 +55,7 @@ summary(exp) #============================================================================= # Case 2: 'region' depends on 'sdate' +#NOTE: Exp "a35b" has been deleted. This example cannot be run now. path <- paste0('/esarchive/exp/ecearth/a35b/diags/DCPP/EC-Earth-Consortium/', 'EC-Earth3-HR/dcppA-hindcast/r1i1p1f1/Omon/$var$_mixed/gn/v20201107/', '$var$_Omon_EC-Earth3-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_$chunk$.nc') diff --git a/man/CDORemapper.Rd b/man/CDORemapper.Rd index 024ce32de1194ca691976a6207f98f830aa7233b..5ced7cd78044bd65aa814eaa62e3d9b6769bad32 100644 --- a/man/CDORemapper.Rd +++ b/man/CDORemapper.Rd @@ -51,7 +51,7 @@ perform the interpolation, hence CDO is required to be installed. data_path <- system.file('extdata', package = 'startR') path_obs <- file.path(data_path, 'obs/monthly_mean/$var$/$var$_$sdate$.nc') sdates <- c('200011') - \donttest{ + \dontrun{ data <- Start(dat = list(list(path = path_obs)), var = 'tos', sdate = sdates, diff --git a/man/Collect.Rd b/man/Collect.Rd index 97b529b07b001dd96d68c7de8db83802e8503ce1..d90cacaf8367095c5f4505fa8371151a1fdf4060 100644 --- a/man/Collect.Rd +++ b/man/Collect.Rd @@ -15,25 +15,27 @@ the RDS file.} Collect() call to finish (TRUE) or not (FALSE). If TRUE, it will be a blocking call, in which Collect() will retrieve information from the HPC, including signals and outputs, each polling_period seconds. The the status -can be monitored on the EC-Flow GUI. Collect() will not return until the -results of all chunks have been received. If FALSE, Collect() will crash with -an error if the execution has not finished yet, otherwise it will return the -merged array. The default value is TRUE.} +can be monitored on the workflow manager GUI. Collect() will not return +until the results of all the chunks have been received. If FALSE, Collect() +return an error if the execution has not finished, otherwise it will return +the merged array. The default value is TRUE.} -\item{remove}{A logical value deciding whether to remove of all data results -received from the HPC (and stored under 'ecflow_suite_dir', the parameter in -Compute()) after being collected. To preserve the data and Collect() it as -many times as desired, set remove to FALSE. The default value is TRUE.} +\item{remove}{A logical value deciding whether to remove of all chunk results +received from the HPC after data being collected, as well as the local job +folder under 'ecflow_suite_dir' or 'autosubmit_suite_dir'. To preserve the +data and Collect() them as many times as desired, set remove to FALSE. The +default value is TRUE.} } \value{ A list of merged data array. } \description{ The final step of the startR workflow after the data operation. It is used when -the parameter 'wait' of Compute() is FALSE, and the functionality includes -updating the job status shown on the EC-Flow GUI and collecting all the chunks -of results as one data array when the execution is done. See more details on -\href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab}. +the parameter 'wait' of Compute() is FALSE. It combines all the chunks of the +results as one data array when the execution is done. See more details on +\href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{practical guide}. +Collect() calls Collect_ecflow() or Collect_autosubmit() according to the +chosen workflow manager. } \examples{ data_path <- system.file('extdata', package = 'startR') diff --git a/man/Compute.Rd b/man/Compute.Rd index 5b03abd18895ba0332f7245be2046384486745ae..270846a0790c401a99ba8167cbc125327404b334 100644 --- a/man/Compute.Rd +++ b/man/Compute.Rd @@ -7,11 +7,14 @@ Compute( workflow, chunks = "auto", + workflow_manager = "ecFlow", threads_load = 1, threads_compute = 1, cluster = NULL, ecflow_suite_dir = NULL, ecflow_server = NULL, + autosubmit_suite_dir = NULL, + autosubmit_server = NULL, silent = FALSE, debug = FALSE, wait = TRUE @@ -28,11 +31,14 @@ those not required as the target dimension in function Step(). The default value is 'auto', which lists all the non-target dimensions and each one has one chunk.} -\item{threads_load}{An integer indicating the number of execution threads to -use for the data retrieval stage. The default value is 1.} +\item{workflow_manager}{Can be NULL, 'ecFlow' or 'Autosubmit'. The default is +'ecFlow'.} -\item{threads_compute}{An integer indicating the number of execution threads -to use for the computation. The default value is 1.} +\item{threads_load}{An integer indicating the number of parallel execution +processes to use for the data retrieval stage. The default value is 1.} + +\item{threads_compute}{An integer indicating the number of parallel execution +processes to use for the computation. The default value is 1.} \item{cluster}{A list of components that define the configuration of the machine to be run on. The comoponents vary from the different machines. diff --git a/man/Start.Rd b/man/Start.Rd index 3bdae42cdd792b32c68cd45656ab47e1f20c2120..7cdc9f83ca388f4912c829812e53fccf4208414c 100644 --- a/man/Start.Rd +++ b/man/Start.Rd @@ -684,8 +684,8 @@ value is FALSE.} \item{num_procs}{An integer of number of processes to be created for the parallel execution of the retrieval/transformation/arrangement of the multiple involved files in a call to Start(). If set to NULL, -takes the number of available cores (as detected by detectCores() in -the package 'future'). The default value is 1 (no parallel execution).} +takes the number of available cores (as detected by future::detectCores). +The default value is 1 (no parallel execution).} \item{ObjectBigmemory}{a character string to be included as part of the bigmemory object name. This parameter is thought to be used internally by the diff --git a/tests/testthat/test-AddStep-DimNames.R b/tests/testthat/test-AddStep-DimNames.R index 2fe6b3976c570c71356c459d3c3a6ec505ac296b..647ca2fc646c4e04510c1167401cc6098bd80e59 100644 --- a/tests/testthat/test-AddStep-DimNames.R +++ b/tests/testthat/test-AddStep-DimNames.R @@ -3,7 +3,7 @@ context("Error with bad dimensions tests.") test_that("Single File - Local execution", { suppressWarnings( - data <- Start(dataset = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$sdate$$month$.nc', + data <- Start(dataset = '/esarchive/scratch/aho/startR_unittest_files/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$sdate$$month$.nc', var = 'tas', sdate = '2000', month = indices(1), diff --git a/tests/testthat/test-Compute-CDORemap.R b/tests/testthat/test-Compute-CDORemap.R index 28df2340a6883b72a9a859ca783791a7740c7ee3..b1479e2d6ad4dc3755402e96c5a77e5e864b7afe 100644 --- a/tests/testthat/test-Compute-CDORemap.R +++ b/tests/testthat/test-Compute-CDORemap.R @@ -3,6 +3,7 @@ context("Compute use CDORemap") test_that("ex2_3", { repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, diff --git a/tests/testthat/test-Compute-NumChunks.R b/tests/testthat/test-Compute-NumChunks.R index 9e626e44fdac8790aecda70c00071fc25f17ab38..5d9a77573004424fa29481180ca08e152f02b693 100644 --- a/tests/testthat/test-Compute-NumChunks.R +++ b/tests/testthat/test-Compute-NumChunks.R @@ -3,8 +3,11 @@ context("Number of chunks tests.") test_that("Single File - Local execution", { +path <- '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$sdate$$month$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -data <- Start(dataset = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$sdate$$month$.nc', +data <- Start(dataset = path, var = 'tas', sdate = '2000', month = indices(1), diff --git a/tests/testthat/test-Compute-chunk_depend_dim.R b/tests/testthat/test-Compute-chunk_depend_dim.R index ce92b94e39f5a0f2bcdde360518592adf07a7605..9c7876484781895de160fadc46ac7af89e5b778a 100644 --- a/tests/testthat/test-Compute-chunk_depend_dim.R +++ b/tests/testthat/test-Compute-chunk_depend_dim.R @@ -12,6 +12,8 @@ path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/', 'HadGEM3-GC31-MM/dcppA-hindcast/', 'r1i1p1f2/Omon/tos/gn/v20200417/', '$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc') +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + sdates <- c('2016', '2017', '2018') # retrieve = T for verification @@ -149,7 +151,9 @@ test_that("2.a. depending dim is indices(); depended dim is indices()", { suppressWarnings( data <- Start(dat = path, var = 'tos', - sdate = indices(57:59), # 2016, 2017, 2018 +#NOTE: sdate was indices(57:59) if path is /esarchive/. Now the path is under my scratch, +# the first sdate is 2016. + sdate = indices(1:3), # 2016, 2017, 2018 chunk = indices(2:4), chunk_depends = 'sdate', time = 'all', @@ -204,7 +208,9 @@ expect_error( suppressWarnings( Start(dat = path, var = 'tos', - sdate = indices(57:59), # 2016, 2017, 2018 +#NOTE: sdate was indices(57:59) if path is /esarchive/. Now the path is under my scratch, +# the first sdate is 2016. + sdate = indices(1:3), # 2016, 2017, 2018 chunk = chunks, chunk_depends = 'sdate', time = 'all', diff --git a/tests/testthat/test-Compute-chunk_split_dim.R b/tests/testthat/test-Compute-chunk_split_dim.R index 09da16096467e861dc5035840f346d3d46ce0275..a40f74501cfebcfd50c894d2a3f52c8d452c2036 100644 --- a/tests/testthat/test-Compute-chunk_split_dim.R +++ b/tests/testthat/test-Compute-chunk_split_dim.R @@ -8,6 +8,7 @@ test_that("1. The files are not repeated", { repos_exp <- paste0('/esarchive/exp/ecearth/a1tr/cmorfiles/CMIP/EC-Earth-Consortium/', 'EC-Earth3/historical/r24i1p1f1/Amon/$var$/gr/v20190312/', '$var$_Amon_EC-Earth3_historical_r24i1p1f1_gr_$sdate$01-$sdate$12.nc') +repos_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_exp) suppressWarnings( exp <- Start(dat = repos_exp, @@ -32,6 +33,8 @@ dates <- attr(exp, 'Variables')$common$time # 4 3 repos_obs <- '/esarchive/recon/ecmwf/erainterim/monthly_mean/$var$_f6h/$var$_$date$.nc' +repos_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_obs) + suppressWarnings( obs <- Start(dat = repos_obs, var = 'tas', @@ -116,13 +119,16 @@ c(lon = 3, dat = 1, var = 1, sdate = 4, time = 3, lat = 2) test_that("2. The files are repeated", { -ecmwf_path_hc <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily/$var$_f24h/$sdate$/$var$_$syear$.nc') -obs_path <-paste0('/esarchive/recon/ecmwf/era5/daily/$var$-r240x121/$var$_$file_date$.nc') +ecmwf_path_hc <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily_mean/$var$_f6h/$sdate$/$var$_$syear$.nc') +ecmwf_path_hc <- paste0('/esarchive/scratch/aho/startR_unittest_files/', ecmwf_path_hc) +obs_path <- paste0('/esarchive/recon/ecmwf/era5/daily/$var$-r240x121/$var$_$file_date$.nc') +obs_path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', obs_path) + sdates.seq <- c("20161222","20161229", "20170105","20170112") suppressWarnings( hcst <- Start(dat = ecmwf_path_hc, - var = 'sfcWind', + var = 'tas', sdate = sdates.seq, syear = indices(1:2), #'all', time = 'all', @@ -207,15 +213,15 @@ aperm(res3, c(7, 2, 3, 4, 1, 5, 6)) ) expect_equal( dim(res1), -c(time = 47, dat = 1, var = 1, latitude = 1, longitude = 2, sdate = 4, syear = 2) +c(time = 46, dat = 1, var = 1, latitude = 1, longitude = 2, sdate = 4, syear = 2) ) expect_equal( dim(res2), -c(sdate = 4, dat = 1, var = 1, latitude = 1, longitude = 2, syear = 2, time = 47) +c(sdate = 4, dat = 1, var = 1, latitude = 1, longitude = 2, syear = 2, time = 46) ) expect_equal( dim(res3), -c(longitude = 2, dat = 1, var = 1, latitude = 1, sdate = 4, syear = 2, time = 47) +c(longitude = 2, dat = 1, var = 1, latitude = 1, sdate = 4, syear = 2, time = 46) ) diff --git a/tests/testthat/test-Compute-extra_params.R b/tests/testthat/test-Compute-extra_params.R index 9b42e43c126fe53e0f3f015c9247a724ba1a02d8..02eab307b056f1887870d9e91aed8c7e7413a871 100644 --- a/tests/testthat/test-Compute-extra_params.R +++ b/tests/testthat/test-Compute-extra_params.R @@ -7,6 +7,8 @@ test_that("ex2_6", { # Prepare sdates and paths #========================= dataset <- "/esarchive/recon/ecmwf/era5/daily_mean/$var$_f1h/$var$_$sdate$.nc" + dataset <- paste0('/esarchive/scratch/aho/startR_unittest_files/', dataset) + sdates <- paste0(1981:1982, rep(10:12, 2)) #=================== # Get daily winds diff --git a/tests/testthat/test-Compute-inconsistent_target_dim.R b/tests/testthat/test-Compute-inconsistent_target_dim.R index 13d2a44435799415b4091e0a8c6710ff05e78f1d..7ebc6f55f6cda555434ff0b53983cccfd04ce4f8 100644 --- a/tests/testthat/test-Compute-inconsistent_target_dim.R +++ b/tests/testthat/test-Compute-inconsistent_target_dim.R @@ -6,7 +6,9 @@ context("Compute()/ByChunks(): dimension consistence check") test_that("ex2_11", { path.exp <- '/esarchive/exp/ecmwf/system5c3s/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path.exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path.exp) path.obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$date$.nc' +path.obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path.obs) var <- 'tos' y1 <- 1981 diff --git a/tests/testthat/test-Compute-irregular_regrid.R b/tests/testthat/test-Compute-irregular_regrid.R index 00a5c1d7f1b0afc5b39dc9addd970202866a8ecb..c76793d268706af0d89c9dda101d74c9ef7a6d05 100644 --- a/tests/testthat/test-Compute-irregular_regrid.R +++ b/tests/testthat/test-Compute-irregular_regrid.R @@ -7,6 +7,8 @@ test_that("1. ex2_13", { path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/', 'DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/$member$/Omon/$var$/gn/v20200101/', '$var$_*_s$sdate$-$member$_gn_$aux$.nc') +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( data <- Start(dataset = path, var = 'tos', diff --git a/tests/testthat/test-Compute-timedim.R b/tests/testthat/test-Compute-timedim.R index 80d96ff0bf2ade58ae1b30c9f2cadd1c6e6e8e36..d63ae6cd981cfb6851ccbee0e29e6623ce4a8ecf 100644 --- a/tests/testthat/test-Compute-timedim.R +++ b/tests/testthat/test-Compute-timedim.R @@ -3,6 +3,7 @@ context("Compute on time dimension") test_that("ex2_1", { repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, diff --git a/tests/testthat/test-Compute-transform_all.R b/tests/testthat/test-Compute-transform_all.R index e6363f424629366284f1836f66dee47dde1d0b24..a7a67ddb93417dc0f89ab650593313fe6629ae1e 100644 --- a/tests/testthat/test-Compute-transform_all.R +++ b/tests/testthat/test-Compute-transform_all.R @@ -4,6 +4,8 @@ test_that("1. Chunk along non-lat/lon dim", { #skip_on_cran() path <- '/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/Amon/$var$/gr/v20190713/$var$_Amon_*_s$sdate$-$member$_gr_$fyear$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( data <- Start(dat = path, var = 'tos', @@ -54,8 +56,11 @@ test_that("2. chunk along lon", { #NOTE: the results are not identical when exp has extra cells = 2 +path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), diff --git a/tests/testthat/test-Compute-transform_indices.R b/tests/testthat/test-Compute-transform_indices.R index 34ddf4854c8b7823eccb254098f398fd89273add..37decfc538152e6f781227c64dc1de33a746bb2b 100644 --- a/tests/testthat/test-Compute-transform_indices.R +++ b/tests/testthat/test-Compute-transform_indices.R @@ -25,6 +25,7 @@ test_that("1. global", { #skip_on_cran() path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) #----------------------------------- # crop = region @@ -149,6 +150,7 @@ test_that("2. regional, no border", { #skip_on_cran() path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) # crop = region suppressWarnings( @@ -248,6 +250,7 @@ test_that("3. regional, at lon border", { #skip_on_cran() path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) # crop = region suppressWarnings( diff --git a/tests/testthat/test-Compute-transform_values.R b/tests/testthat/test-Compute-transform_values.R index e6b6c26c08711a6c73970df5330e33fce1aba80f..191d6519c33b74b6963d3e75f1a654d7fa1fdd47 100644 --- a/tests/testthat/test-Compute-transform_values.R +++ b/tests/testthat/test-Compute-transform_values.R @@ -17,8 +17,11 @@ lats.max <- 90 # crop = region #NOTE: res1 and res3 differ if extra_cells = 2. But if retrieve = T, extra_cells = 2 or 8 is equal. +path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -95,6 +98,8 @@ tolerance = 0.001 # crop = region, selector is values(c()) library(easyNCDF) pathh <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +pathh <- paste0('/esarchive/scratch/aho/startR_unittest_files/', pathh) + file <- NcOpen(pathh) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -106,8 +111,11 @@ lons <- NcToArray(file, dim_indices = list(longitude = 1:1296), vars_to_read = 'longitude') NcClose(file) +path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -152,8 +160,12 @@ lons.min <- -180 lons.max <- 179.9 lats.min <- -90 lats.max <- 90 + +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -242,8 +254,11 @@ lats.min <- 20 lats.max <- 40 # crop = region +path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', #paste0(2000:2001, '0101'), ensemble = indices(1), #'all', @@ -326,8 +341,11 @@ lats.min <- 21 lats.max <- 41 # crop = region +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', #paste0(2000:2001, '0101'), ensemble = indices(1), #'all', @@ -427,8 +445,11 @@ lats.max <- 40 #NOTE: transform_extra_cells = 8 the results are not equal # crop = region +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -520,8 +541,11 @@ tolerance = 0.001 # crop = region, CircularSort(-180, 180) #NOTE: transform_extra_cells = 8 the results are not equal +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), diff --git a/tests/testthat/test-Compute-two_data.R b/tests/testthat/test-Compute-two_data.R index 9cb7145dc05c5c41b7935342c5b84a5d88147391..735735fb81d66586c366cf85bc528165ff73856b 100644 --- a/tests/testthat/test-Compute-two_data.R +++ b/tests/testthat/test-Compute-two_data.R @@ -5,6 +5,8 @@ test_that("ex2_7", { # exp data repos <- paste0('/esarchive/exp/ecmwf/system4_m1/monthly_mean/', '$var$_f6h/$var$_$sdate$.nc') + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + sdates <- sapply(2013:2014, function(x) paste0(x, sprintf('%02d', 1:12), '01')) suppressWarnings( @@ -24,6 +26,8 @@ suppressWarnings( # obs data repos_obs <- paste0('/esarchive/recon/ecmwf/erainterim/monthly_mean/', '$var$_f6h/$var$_$sdate$.nc') + repos_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_obs) + sdates_obs <- (sapply(2012:2013, function(x) paste0(x, sprintf('%02d', 1:12)))) suppressWarnings( obs <- Start(dat = repos_obs, diff --git a/tests/testthat/test-Compute-use_attribute.R b/tests/testthat/test-Compute-use_attribute.R index 2ca73a770f040f19c650960e1be8e8c97b6e294e..7ec3dc2445918b5e6b7c3da1d27677b7a77edf67 100644 --- a/tests/testthat/test-Compute-use_attribute.R +++ b/tests/testthat/test-Compute-use_attribute.R @@ -3,6 +3,7 @@ context("Compute use attributes") test_that("ex2_2", { repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, diff --git a/tests/testthat/test-Start-DCPP-across-depends.R b/tests/testthat/test-Start-DCPP-across-depends.R index a3d95866a79d9fda53e2c6a594122eb8d7c6c743..c561abd93f63b8332a6c0d032e26abcdbfb574c9 100644 --- a/tests/testthat/test-Start-DCPP-across-depends.R +++ b/tests/testthat/test-Start-DCPP-across-depends.R @@ -1,6 +1,7 @@ context("DCPP successfull retrieved for depends and across parameters.") test_that("Chunks of DCPP files- Local execution", { path <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc' + path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) sdates <- c('2017', '2018') suppressWarnings( @@ -19,24 +20,31 @@ suppressWarnings( ) # [sdate = 2, chunk = 3] +path <- "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r1i1p1f2_gn_202201-202212.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + suppressWarnings( -dat_2018_chunk3 <- Start(dat = '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r1i1p1f2_gn_202201-202212.nc', +dat_2018_chunk3 <- Start(dat = path, var = 'tos', time = 'all', i = indices(1:10), j = indices(1:10), retrieve = TRUE) ) expect_equal(dat[1,1,2,25:36,,], dat_2018_chunk3[1,1,,,]) # [sdate = 1, chunk = 2] +path <- "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2017-r1i1p1f2_gn_202001-202012.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) suppressWarnings( -dat_2017_chunk2 <- Start(dat = '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2017-r1i1p1f2_gn_202001-202012.nc', +dat_2017_chunk2 <- Start(dat = path, var = 'tos', time = 'all', i = indices(1:10), j = indices(1:10), retrieve = TRUE) ) expect_equal(dat[1,1,1,13:24,,], dat_2017_chunk2[1,1,,,]) # [sdate = 2, chunk = 1] +path <- "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r1i1p1f2_gn_202001-202012.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) suppressWarnings( -dat_2018_chunk1 <- Start(dat = '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r1i1p1f2_gn_202001-202012.nc', +dat_2018_chunk1 <- Start(dat = path, var = 'tos', time = 'all', i = indices(1:10), j = indices(1:10), retrieve = TRUE) ) diff --git a/tests/testthat/test-Start-calendar.R b/tests/testthat/test-Start-calendar.R index da63e53a3ef0966bf17aab6ee7c9f8af46cf92f3..0ee4c5e7b971170a184c92a5270e7d08db216401 100644 --- a/tests/testthat/test-Start-calendar.R +++ b/tests/testthat/test-Start-calendar.R @@ -5,6 +5,7 @@ test_that("1. 360_day, daily, unit = 'days since 1850-01-01'", { 'DCPP/MOHC/HadGEM3-GC31-MM/', 'dcppA-hindcast/r1i1p1f2/day/$var$/gn/v20200417/', '$var$_day_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$fyear$.nc') +path_hadgem3 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_hadgem3) sdate <- c('2000', '2001') fyear_hadgem3 <- indices(1) @@ -48,6 +49,7 @@ expect_equal( test_that("2. 365_day, daily, unit = 'days since 1984-01-01'", { path_bcc_csm2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/BCC-CSM2-MR/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200114/$var$_day_BCC-CSM2-MR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_19800101-19891231.nc' +path_bcc_csm2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_bcc_csm2) suppressWarnings( data <- Start(dat = path_bcc_csm2, @@ -82,6 +84,7 @@ test_that("3. standard, daily, unit = 'days since 1850-1-1 00:00:00'", { 'DCPP/MPI-M/MPI-ESM1-2-HR/', 'dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200101/', '$var$_day_MPI-ESM1-2-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_$fyear$.nc') + path_mpi_esm <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_mpi_esm) var <- 'tasmax' sdate <- '2000' @@ -122,6 +125,8 @@ test_that("4. standard, monthly, unit = 'days since 1850-1-1 00:00:00'", { 'DCPP/MPI-M/MPI-ESM1-2-HR/', 'dcppA-hindcast/r1i1p1f1/Amon/$var$/gn/v20200320/', '$var$_Amon_MPI-ESM1-2-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_200011-201012.nc') + path_mpi_esm <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_mpi_esm) + sdate <- '2000' fyear_mpi_esm <- paste0(sdate, '1101-', as.numeric(sdate) + 10, '1231') @@ -154,6 +159,7 @@ expect_equal( test_that("5. proleptic_gregorian, 6hrly, unit = 'hours since 2000-11-01 00:00:00'", { repos_obs <- paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', '$var$/$var$_199405.nc') + repos_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_obs) date <- paste0('1994-05-', sprintf('%02d', 1:31), ' 00:00:00') date <- as.POSIXct(date, tz = 'UTC') # attr(date, 'tzone') <- 'UTC' @@ -189,6 +195,8 @@ expect_equal( test_that("6. standard, monthly, unit = 'months since 1850-01-01 00:00:00'", { repos_obs <- '/esarchive/obs/ukmo/hadisst_v1.1/monthly_mean/$var$/$var$_$date$.nc' + repos_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_obs) + suppressWarnings( obs <- Start(dat = repos_obs, var = 'tos', @@ -246,6 +254,8 @@ c("1960-11-16 00:00:00", "1960-12-16 12:00:00", "1961-01-16 12:00:00", "1961-02- test_that("7. proleptic_gregorian, monthly, unit = 'days since 1850-1-1 00:00:00'", { repos <- '/esarchive/exp/mpi-esm-lr/cmip5-historical_i0p1/monthly_mean/$var$/$var$_$sdate$.nc' + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( data <- Start(dat = repos, var = 'tas', @@ -278,6 +288,7 @@ suppressWarnings( test_that("8. gregorian, 3hrly, unit = 'days since 1850-1-1'", { repos <- '/esarchive/exp/CMIP5/historical/ecearth/cmip5-historical_i0p1/$var$_3hr_EC-EARTH_historical_r6i1p1_$period$.nc' + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, var = 'vas', diff --git a/tests/testthat/test-Start-depends_values.R b/tests/testthat/test-Start-depends_values.R index 18d1b9f92024074ec756e2a938d18fe472efb5e8..e4e4adcb8362c982acf9c4d3b2fbf9f3d4814038 100644 --- a/tests/testthat/test-Start-depends_values.R +++ b/tests/testthat/test-Start-depends_values.R @@ -6,6 +6,7 @@ context("Start() using values() to define dependency relations") path <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) sdates <- c('2016', '2017', '2018') chunks <- array(dim = c(chunk = 3, sdate = 3)) diff --git a/tests/testthat/test-Start-first_file_missing.R b/tests/testthat/test-Start-first_file_missing.R index 392841aa0a44121807d74d27df6456615f252f97..9c232e699a585b6b9586dc6172a8deb618570d46 100644 --- a/tests/testthat/test-Start-first_file_missing.R +++ b/tests/testthat/test-Start-first_file_missing.R @@ -7,6 +7,7 @@ context("Start() retrieves files that the first file is missing") # the data. The parameter 'metadata_dims' can also be used in this case. file <- "/esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/$var$_f24h/$var$_$file_date$.nc" +file <- paste0('/esarchive/scratch/aho/startR_unittest_files/', file) var <- 'tas' sdates1 <- c('20130611') #exists sdates2 <- c('20130618') #does not exist @@ -119,7 +120,7 @@ data <- Start(dat = file, ) expect_equal( as.vector(attr(data, 'NotFoundFiles')), - c("/esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20130618.nc", NA) + c("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20130618.nc", NA) ) }) @@ -149,7 +150,7 @@ data <- Start(dat = file, ) expect_equal( as.vector(attr(data, 'NotFoundFiles')), - c(NA, "/esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20130618.nc") + c(NA, "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20130618.nc") ) }) diff --git a/tests/testthat/test-Start-global-lon-across_meridian.R b/tests/testthat/test-Start-global-lon-across_meridian.R index 34c861f1cb568d9254e582c9a796d50a85dc0d94..0360629fe342dddbce1ea6fe1fe5bcca45a97064 100644 --- a/tests/testthat/test-Start-global-lon-across_meridian.R +++ b/tests/testthat/test-Start-global-lon-across_meridian.R @@ -5,6 +5,7 @@ context("Start() across_meridia global lon length check") test_that("first test", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) var <- 'tas' lon.min <- 0 lon.max <- 359.723 #360 diff --git a/tests/testthat/test-Start-implicit_dependency_by_selector.R b/tests/testthat/test-Start-implicit_dependency_by_selector.R index 10e5545fcb80f3af7eefed32a8f00dadd65feacc..4e89190b571f5d2fb58ea5d95354bc7f36502614 100644 --- a/tests/testthat/test-Start-implicit_dependency_by_selector.R +++ b/tests/testthat/test-Start-implicit_dependency_by_selector.R @@ -8,78 +8,79 @@ #--------------------------------------------------- context("Start() implicit dependency by selector dimension") - -test_that("1. region with different index between files", { - -path <- paste0('/esarchive/exp/ecearth/a35b/diags/DCPP/EC-Earth-Consortium/', - 'EC-Earth3-HR/dcppA-hindcast/r1i1p1f1/Omon/$var$_mixed/gn/v20201107/', - '$var$_Omon_EC-Earth3-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_$chunk$.nc') - -# two sdates have different index for Nino3. -region <- array('Nino3', dim = c(sdate = 2, region = 1)) - -suppressWarnings( -data <- Start(dat = path, - var = 'tosmean', - sdate = c('1993', '2013'), - chunk = indices(1:2), - chunk_depends = 'sdate', - region = region, - time = 'all', - time_across = 'chunk', - merge_across_dims = TRUE, - return_vars = list(time = c('sdate', 'chunk'), - region = 'sdate'), - retrieve = T) -) -suppressWarnings( -data1 <- Start(dat = path, - var = 'tosmean', - sdate = c('1993'), - chunk = indices(1:2), - chunk_depends = 'sdate', - region = 'Nino3', - time = 'all', #c(1:length(forecast_month)), - time_across = 'chunk', - merge_across_dims = TRUE, - return_vars = list(time = c('sdate', 'chunk'), - region = NULL), - retrieve = T) -) -suppressWarnings( -data2 <- Start(dat = path, - var = 'tosmean', - sdate = c('2013'), - chunk = indices(1:2), - chunk_depends = 'sdate', - region = 'Nino3', - time = 'all', #c(1:length(forecast_month)), - time_across = 'chunk', - merge_across_dims = TRUE, - return_vars = list(time = c('sdate', 'chunk'), - region = NULL), - retrieve = T) -) - -expect_equal( -dim(data), -c(dat = 1, var = 1, sdate = 2, region = 1, time = 2) -) -expect_equal( -data[1, 1, 1, 1, ], -data1[1, 1, 1, 1, ] -) -expect_equal( -data[1, 1, 2, 1, ], -data2[1, 1, 1, 1, ] -) - - -}) +#NOTE: The files don't exist anymore. +#test_that("1. region with different index between files", { +# +#path <- paste0('/esarchive/exp/ecearth/a35b/diags/DCPP/EC-Earth-Consortium/', +# 'EC-Earth3-HR/dcppA-hindcast/r1i1p1f1/Omon/$var$_mixed/gn/v20201107/', +# '$var$_Omon_EC-Earth3-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_$chunk$.nc') +# +## two sdates have different index for Nino3. +#region <- array('Nino3', dim = c(sdate = 2, region = 1)) +# +#suppressWarnings( +#data <- Start(dat = path, +# var = 'tosmean', +# sdate = c('1993', '2013'), +# chunk = indices(1:2), +# chunk_depends = 'sdate', +# region = region, +# time = 'all', +# time_across = 'chunk', +# merge_across_dims = TRUE, +# return_vars = list(time = c('sdate', 'chunk'), +# region = 'sdate'), +# retrieve = T) +#) +#suppressWarnings( +#data1 <- Start(dat = path, +# var = 'tosmean', +# sdate = c('1993'), +# chunk = indices(1:2), +# chunk_depends = 'sdate', +# region = 'Nino3', +# time = 'all', #c(1:length(forecast_month)), +# time_across = 'chunk', +# merge_across_dims = TRUE, +# return_vars = list(time = c('sdate', 'chunk'), +# region = NULL), +# retrieve = T) +#) +#suppressWarnings( +#data2 <- Start(dat = path, +# var = 'tosmean', +# sdate = c('2013'), +# chunk = indices(1:2), +# chunk_depends = 'sdate', +# region = 'Nino3', +# time = 'all', #c(1:length(forecast_month)), +# time_across = 'chunk', +# merge_across_dims = TRUE, +# return_vars = list(time = c('sdate', 'chunk'), +# region = NULL), +# retrieve = T) +#) +# +#expect_equal( +#dim(data), +#c(dat = 1, var = 1, sdate = 2, region = 1, time = 2) +#) +#expect_equal( +#data[1, 1, 1, 1, ], +#data1[1, 1, 1, 1, ] +#) +#expect_equal( +#data[1, 1, 2, 1, ], +#data2[1, 1, 1, 1, ] +#) +# +# +#}) test_that("2. time depends on sdate", { repos <- '/esarchive/exp/ecmwf/system4_m1/daily_mean/$var$_f24h/$var$_$sdate$.nc' +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) sdates <- paste0(2001:2003, '0501') tmp <- as.POSIXct(sapply(2001:2003, function(x) paste0(x, '-05-', sprintf('%02d', 1:31))), tz = 'UTC') tmp <- array(tmp, dim = c(time = 31, sdate = 3)) @@ -122,6 +123,7 @@ test_that("3. region depends on member and sdate", { reg <- array('Nino3.4', dim = c(sdate = 3, memb = 2, region = 1)) path_SR <- paste0('/esarchive/exp/ecearth/a42y/diags/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$memb$/Omon/$var$/gn/v*/$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$memb$_gn_$chunk$.nc') +path_SR <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_SR) suppressWarnings( data <- Start(dat = path_SR, var = 'tosmean', diff --git a/tests/testthat/test-Start-implicit_inner_dim.R b/tests/testthat/test-Start-implicit_inner_dim.R index 3788af0f48c41729cf56bcc2a00049bda3561c7e..fcae53ed87c0418ba3103ae2873e7e001e234c06 100644 --- a/tests/testthat/test-Start-implicit_inner_dim.R +++ b/tests/testthat/test-Start-implicit_inner_dim.R @@ -8,6 +8,7 @@ context("Start() implicit inner dimension") test_that("1. time = 1", { obs.path <- "/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$file_date$.nc" +obs.path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', obs.path) variable <- "prlr" dates_file <- c("201311","201312") diff --git a/tests/testthat/test-Start-indices_list_vector.R b/tests/testthat/test-Start-indices_list_vector.R index 82e5cb1dc1183e3092460c59c6499549d925abd0..b225a0a1f499aed02dcd5638c954c032948ce973 100644 --- a/tests/testthat/test-Start-indices_list_vector.R +++ b/tests/testthat/test-Start-indices_list_vector.R @@ -6,12 +6,15 @@ context("List of indices and vector of indices") +repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + test_that("1. transform", { # lat and lon are lists of indices suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -36,7 +39,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of indices suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -74,7 +77,7 @@ test_that("2. no transform", { # lat and lon are lists of indices suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -93,7 +96,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of indices suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -194,7 +197,7 @@ test_that("4. no transform, indices reverse", { # lat and lon are lists of indices suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -215,7 +218,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of indices suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = repos, var = 'tas', sdate = '20000101', ensemble = indices(1), diff --git a/tests/testthat/test-Start-largest_dims_length.R b/tests/testthat/test-Start-largest_dims_length.R index b448f895b44d150ea0bb519b288a8ee78d4c6ee1..6a796a24aba0bb4a19da61c6d41e221b2ecaf710 100644 --- a/tests/testthat/test-Start-largest_dims_length.R +++ b/tests/testthat/test-Start-largest_dims_length.R @@ -7,9 +7,10 @@ test_that("1. inconsistent member length", { # system3: 40 members. repos <- list(list(name = 'system5c3s', - path = "/esarchive/exp/ecmwf/system5c3s/monthly_mean/g500_f12h/$var$_$sdate$.nc"), + path = "/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/ecmwf/system5c3s/monthly_mean/g500_f12h/$var$_$sdate$.nc"), list(name = 'system3_m1-c3s', - path = "/esarchive/exp/cmcc/system3_m1-c3s/monthly_mean/g500_f12h/$var$_$sdate$.nc")) + path = "/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/cmcc/system3_m1-c3s/monthly_mean/g500_f12h/$var$_$sdate$.nc")) + # largest_dims_length = FALSE suppressWarnings( @@ -140,6 +141,7 @@ test_that("2. inconsistent time length, merge_across_dims = T", { path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/EC-Earth3/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/', '$member$/Amon/$var$/gr/v20210309/', '$var$_Amon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc') +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) suppressWarnings( data <- Start(dataset = path, diff --git a/tests/testthat/test-Start-line_order-consistency.R b/tests/testthat/test-Start-line_order-consistency.R index 6b797a89ead97863062c2fec52e7e8fce9f4930d..8bf4564d5661a94cf4d54873c6b7bb684dba948c 100644 --- a/tests/testthat/test-Start-line_order-consistency.R +++ b/tests/testthat/test-Start-line_order-consistency.R @@ -4,6 +4,7 @@ context("Start() line order consistency check") variable <- "tas" obs.path <- "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_$file_date$.nc" + obs.path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', obs.path) dates_file <- "201702" lats.min <- -90 diff --git a/tests/testthat/test-Start-metadata_dims.R b/tests/testthat/test-Start-metadata_dims.R index 4251c71d040ec4bdd8b4f19743ce8eb017a87a0a..569a28e8e65abfb3ac9460cccb149d83f55454f2 100644 --- a/tests/testthat/test-Start-metadata_dims.R +++ b/tests/testthat/test-Start-metadata_dims.R @@ -2,6 +2,7 @@ context("Start() metadata_dims check") test_that("1. One data set, one var", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = list(list(name = 'system5_m1', path = repos)), var = 'tas', @@ -81,7 +82,9 @@ suppressWarnings( test_that("2. Two data sets, one var", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos2) suppressWarnings( data <- Start(dat = list(list(name = 'system4_m1', path = repos2), list(name = 'system5_m1', path = repos)), @@ -186,6 +189,7 @@ test_that("3. One data set, two vars", { repos <- paste0('/esarchive/exp/ecearth/a1tr/cmorfiles/CMIP/EC-Earth-Consortium/', 'EC-Earth3/historical/r24i1p1f1/Amon/$var$/gr/v20190312/', '$var$_Amon_EC-Earth3_historical_r24i1p1f1_gr_185001-185012.nc') + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) var <- c('tas', 'clt') suppressWarnings( data <- Start(dat = repos, @@ -271,7 +275,9 @@ suppressWarnings( test_that("4. Two data sets, two vars", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos2) suppressWarnings( data <- Start(dat = list(list(name = 'system4_m1', path = repos2), list(name = 'system5_m1', path = repos)), @@ -527,7 +533,9 @@ suppressWarnings( test_that("5. Specify metadata_dims with another file dimension", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos2) suppressWarnings( data <- Start(dat = list(list(name = 'system4_m1', path = repos2), list(name = 'system5_m1', path = repos)), @@ -733,6 +741,7 @@ suppressWarnings( test_that("6. One data set, two vars from one file", { mask_path <- '/esarchive/autosubmit/con_files/mask.regions.Ec3.0_O1L46.nc' +mask_path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', mask_path) suppressWarnings( data <- Start(repos = mask_path, var = c('nav_lon', 'nav_lat'), @@ -781,8 +790,10 @@ data <- Start(repos = mask_path, test_that("7. Two data sets, while one is missing", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) # incorrect path. Therefore repos2 doesn't have any valid files repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f2h/$var$_$sdate$.nc" # correct one is _f6h + repos2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos2) var <- 'tas' suppressWarnings( data <- Start(dat = list(list(name = 'system4_m1', path = repos2), @@ -824,7 +835,7 @@ suppressWarnings( ) expect_equal( attr(data, 'Files'), - array(c(NA, "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20170101.nc"), + array(c(NA, "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20170101.nc"), dim = c(dat = 2, var = 1, sdate = 1)) ) @@ -865,7 +876,7 @@ suppressWarnings( ) expect_equal( attr(dataF, 'ExpectedFiles'), - array(c("/esarchive/exp/ecmwf/system4_m1/monthly_mean/tas_f2h/tas_20170101.nc", "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20170101.nc"), + array(c("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system4_m1/monthly_mean/tas_f2h/tas_20170101.nc", "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20170101.nc"), dim = c(dat = 2, var = 1, sdate = 1)) ) @@ -875,12 +886,12 @@ suppressWarnings( test_that("8. Two data sets, both have files but the first file is missing", { path_list <- list( MPI = list(name = 'MPI_ESM', - path = paste0('/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/', + path = paste0('/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/', 'DCPP/MPI-M/MPI-ESM1-2-HR/', 'dcppA-hindcast/$member$/day/$var$/gn/v20200101/', '$var$_day_MPI-ESM1-2-HR_dcppA-hindcast_s$sdate$-$member$_gn_$chunk$.nc')), Had = list(name = 'HadGEM3', - path = paste0('/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/', + path = paste0('/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/', 'DCPP/MOHC/HadGEM3-GC31-MM/', 'dcppA-hindcast/$member$/day/$var$/gn/v20200417/', '$var$_day_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-$member$_gn_$chunk$.nc'))) @@ -927,9 +938,9 @@ data <- Start(dataset = path_list, ) expect_equal( attr(data, 'Files'), - array(c("/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/r1i1p1f1/day/tasmin/gn/v20200101/tasmin_day_MPI-ESM1-2-HR_dcppA-hindcast_s2018-r1i1p1f1_gn_20181101-20281231.nc", + array(c("/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/r1i1p1f1/day/tasmin/gn/v20200101/tasmin_day_MPI-ESM1-2-HR_dcppA-hindcast_s2018-r1i1p1f1_gn_20181101-20281231.nc", NA, NA, NA, NA, NA, NA, - "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r2i1p1f2/day/tasmin/gn/v20200417/tasmin_day_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r2i1p1f2_gn_20181101-20181230.nc"), + "/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r2i1p1f2/day/tasmin/gn/v20200417/tasmin_day_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r2i1p1f2_gn_20181101-20181230.nc"), dim = c(dataset = 2, var = 1, member = 2, sdate = 1, chunk = 2)) ) diff --git a/tests/testthat/test-Start-metadata_filedim_dependency.R b/tests/testthat/test-Start-metadata_filedim_dependency.R index cfd7dfb1d3559a7b67ed0afdfe8633998412eae2..13cac4775c49525fb08ece5ee4742f61a1b9a842 100644 --- a/tests/testthat/test-Start-metadata_filedim_dependency.R +++ b/tests/testthat/test-Start-metadata_filedim_dependency.R @@ -6,6 +6,7 @@ context("Start() metadata filedim dependency") # Preparation: Get the time values repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index 7e9c2801dcf7e659fd06741504122574c80d16a3..92e831b6c36ac29c65b0975c3413bd443e2f8b1d 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -3,8 +3,11 @@ context("Start() metadata reshaping") test_that("1. time across fyear, fyear depends on sdate", { +repos <- "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +data <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -54,7 +57,7 @@ as.vector(seq(as.POSIXct('1962-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1962-12 # retrieve = FALSE suppressWarnings( -dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +dataF <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -82,8 +85,11 @@ dates test_that("2. time across fyear, only one sdate", { +repos <- "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +data <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -118,7 +124,7 @@ as.vector(seq(as.POSIXct('1961-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1961-12 #retrieve = FALSE suppressWarnings( -dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +dataF <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -145,8 +151,11 @@ dates test_that("3. time across fyear, fyear depends on sdate, 1st fyear is empty, 3rd fyear has more indices than 2nd one, 1964 is leap year", { +repos <- "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +data <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -193,7 +202,7 @@ as.vector(seq(as.POSIXct('1963-10-01 12:00:00', tz = 'UTC'), as.POSIXct('1964-03 suppressWarnings( -dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +dataF <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -228,9 +237,10 @@ datess <- as.POSIXct(array(datess, dim = c(time = 31, sdate = 8)), dates_file <- sort(unique(gsub('-', '', sapply(as.character(datess), substr, 1, 7)))) +repos <- "/esarchive/recon/ecmwf/erainterim/6hourly/$var$/$var$_$file_date$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( - data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), + data <- Start(dat = repos, var = 'tas', file_date = dates_file, time = values(datess), #[time = 31, sdate = 8] @@ -277,8 +287,7 @@ as.vector(datess) ) suppressWarnings( - dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), + dataF <- Start(dat = repos, var = 'tas', file_date = dates_file, time = values(datess), #[time = 31, sdate = 8] @@ -310,8 +319,12 @@ dates test_that("5. test 1 but merge_across_dims_narm = F", { + +repos <- "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +data <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -361,7 +374,7 @@ c(as.vector(seq(as.POSIXct('1962-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1962- ) suppressWarnings( -dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', +dataF <- Start(dat = repos, var = 'tasmin', lat = indices(1), lon = indices(1), @@ -392,9 +405,11 @@ test_that("6. split time dim only", { datess <- seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-07-14', tz = 'UTC'), by = 'days') datess <- as.POSIXct(array(datess, dim = c(time = 7, week = 2)), origin = '1970-01-01', tz = 'UTC') +repos <- '/esarchive/recon/ecmwf/erainterim/6hourly/$var$/$var$_199407.nc' +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_199407.nc'), +data <- Start(dat = repos, var = 'tas', # file_date = '199407', time = values(datess), #[time = 7, week = 2] @@ -439,8 +454,7 @@ as.vector(seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-07-14', tz ) suppressWarnings( -dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_199407.nc'), +dataF <- Start(dat = repos, var = 'tas', # file_date = '199407', time = values(datess), #[time = 7, week = 2] @@ -470,9 +484,10 @@ datess <- seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-08-31', tz datess <- as.POSIXct(array(datess, dim = c(time = 31, month = 2)), origin = '1970-01-01', tz = 'UTC') +repos <- '/esarchive/recon/ecmwf/erainterim/6hourly/$var$/$var$_$file_date$.nc' +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( -data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +data <- Start(dat = repos, var = 'tas', file_date = c('199407', '199408'), time = values(datess), #[time = 31, month = 2] @@ -518,8 +533,7 @@ as.vector(seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-08-31', tz ) suppressWarnings( -dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +dataF <- Start(dat = repos, var = 'tas', file_date = c('199407', '199408'), time = values(datess), #[time = 31, month = 2] @@ -549,9 +563,10 @@ test_that("8. split sdate dim", { file_date <- array(c(paste0(1993:1995, '07'), paste0(1993:1995, '08')), dim = c(syear = 3, smonth = 2)) +repos <- '/esarchive/recon/ecmwf/erainterim/6hourly/$var$/$var$_$file_date$.nc' +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( -data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +data <- Start(dat = repos, var = 'tas', file_date = file_date, #[syear = 3, smonth = 2] time = indices(1:2), @@ -596,8 +611,7 @@ seq(as.POSIXct('1993-08-01 06:00:00', tz = 'UTC'), as.POSIXct('1995-08-01 06:00: ) suppressWarnings( -dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +dataF <- Start(dat = repos, var = 'tas', file_date = file_date, #[syear = 3, smonth = 2] time = indices(1:2), @@ -617,8 +631,7 @@ dates # no return_vars suppressWarnings( -data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +data <- Start(dat = repos, var = 'tas', file_date = file_date, #[syear = 3, smonth = 2] time = indices(1:2), @@ -636,8 +649,7 @@ names(attributes(data)$Variables$common), ) suppressWarnings( -data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', - '$var$/$var$_$file_date$.nc'), +data <- Start(dat = repos, var = 'tas', file_date = file_date, #[syear = 3, smonth = 2] time = indices(1:2), @@ -660,8 +672,11 @@ test_that("9. split file dim that contains 'time', and 'time' inner dim is impli dates_arr <- array(c(paste0(1961, '0', 1:5), paste0(1962, '0', 1:5)), dim = c(time = 5, syear = 2)) +repos <- "/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$file_date$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + suppressWarnings( -data <- Start(dat = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$file_date$.nc', +data <- Start(dat = repos, var = 'tas', file_date = dates_arr, # [syear, time] split_multiselected_dims = TRUE, @@ -704,7 +719,7 @@ as.vector(dates_arr) suppressWarnings( -dataF <- Start(dat = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$file_date$.nc', +dataF <- Start(dat = repos, var = 'tas', file_date = dates_arr, # [syear, time] split_multiselected_dims = TRUE, @@ -735,8 +750,10 @@ y2 <- seq(a, b, by = 'days') y2 <- y2[-3] # remove 28 Feb time_array <- array(c(y1, y2), dim = c(time = 3, file_date = 2)) time_array <- as.POSIXct(time_array, origin = '1970-01-01', tz = 'UTC') +repos <- "/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$file_date$.nc" +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( -data <- Start(dat = "/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$file_date$.nc", +data <- Start(dat = repos, var = "tas", file_date = paste0(1994:1995, '1101'), #1996 is leap year time = time_array, #[time = 3, file_date = 2] @@ -772,7 +789,7 @@ as.vector(time_array) ) suppressWarnings( -dataF <- Start(dat = "/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$file_date$.nc", +dataF <- Start(dat = repos, var = "tas", file_date = paste0(1994:1995, '1101'), #1996 is leap year time = time_array, #[time = 3, file_date = 2] diff --git a/tests/testthat/test-Start-multiple-sdates.R b/tests/testthat/test-Start-multiple-sdates.R index d0c4bd38b55050d51b0c2f25487ed7a168ce5529..6467a8420ff14a540912cb04c526fa58b58b8152 100644 --- a/tests/testthat/test-Start-multiple-sdates.R +++ b/tests/testthat/test-Start-multiple-sdates.R @@ -7,10 +7,12 @@ context("Start() multiple sdate with split + merge dim") # It might happen when reading experimental data with many start dates, # and the corresponding observations are required to have the same data structure. -ecmwf_path_hc <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily/$var$_f24h/$sdate$/$var$_$syear$.nc') +ecmwf_path_hc <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily_mean/$var$_f6h/$sdate$/$var$_$syear$.nc') +ecmwf_path_hc <- paste0('/esarchive/scratch/aho/startR_unittest_files/', ecmwf_path_hc) obs_path <-paste0('/esarchive/recon/ecmwf/era5/daily/$var$-r240x121/$var$_$file_date$.nc') +obs_path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', obs_path) -var_name <- 'sfcWind' +var_name <- 'tas' var100_name <- 'windagl100' sdates.seq <- c("20161222","20161229","20170105","20170112") @@ -55,7 +57,7 @@ obs <- Start(dat = obs_path, ) expect_equal( dim(obs), - c(dat = 1, var = 1, latitude = 1, longitude = 1, sdate = 4, syear = 20, time = 47) + c(dat = 1, var = 1, latitude = 1, longitude = 1, sdate = 4, syear = 20, time = 46) ) expect_equal( obs[1, 1, 1, 1, 1, 1, 8:15], @@ -131,7 +133,7 @@ obs <- Start(dat = obs_path, expect_equal( dim(obs), - c(dat = 1, var = 1, latitude = 1, longitude = 1, sdate = 4, syear = 20, time = 47) + c(dat = 1, var = 1, latitude = 1, longitude = 1, sdate = 4, syear = 20, time = 46) ) expect_equal( obs[1, 1, 1, 1, 1, 1, 8:15], diff --git a/tests/testthat/test-Start-path_glob_permissive.R b/tests/testthat/test-Start-path_glob_permissive.R index ddd69be18d24b30380ecfd161bb3a7c63d78aa8d..e32d0b3f67fae8165a8d2b1b98314df5a43bf6d0 100644 --- a/tests/testthat/test-Start-path_glob_permissive.R +++ b/tests/testthat/test-Start-path_glob_permissive.R @@ -8,6 +8,7 @@ years <- paste0(c(1960:1961), '01-', c(1960:1961), '12') repos <- paste0('/esarchive/exp/ecearth/$expid$/diags/CMIP/EC-Earth-Consortium/', 'EC-Earth3/historical/*/Omon/$var$/gn/v*/', '$var$_Omon_EC-Earth3_historical_*_gn_$year$.nc') +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, var = 'tosmean', @@ -30,10 +31,10 @@ data <- Start(dat = repos, ) expect_equal( attr(data, 'Files'), - array(c("/esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196001-196012.nc", - "/esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196001-196012.nc", - "/esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196101-196112.nc", - "/esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196101-196112.nc"), + array(c("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196001-196012.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196001-196012.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196101-196112.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196101-196112.nc"), dim = c(dat = 1, var = 1, expid = 2, year = 2)) ) # NOTE: in R_3.2.0, the following test doesn't have dimension. In R_3.6.2 it does. @@ -48,6 +49,7 @@ data <- Start(dat = repos, repos <- paste0('/esarchive/exp/ecearth/$expid$/diags/CMIP/EC-Earth-Consortium/', 'EC-Earth3/historical/$member$/Omon/$var$/gn/v*/', '$var$_Omon_EC-Earth3_historical_*_gn_$year$.nc') +repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) suppressWarnings( data <- Start(dat = repos, var = 'tosmean', @@ -72,10 +74,10 @@ data <- Start(dat = repos, ) expect_equal( attr(data, 'Files'), - array(c("/esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196001-196012.nc", - "/esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196001-196012.nc", - "/esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196101-196112.nc", - "/esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196101-196112.nc"), + array(c("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196001-196012.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196001-196012.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1st/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r7i1p1f1/Omon/tosmean/gn/v20190302/tosmean_Omon_EC-Earth3_historical_r7i1p1f1_gn_196101-196112.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecearth/a1sx/diags/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r10i1p1f1/Omon/tosmean/gn/v20190308/tosmean_Omon_EC-Earth3_historical_r10i1p1f1_gn_196101-196112.nc"), dim = c(dat = 1, var = 1, expid = 2, year = 2, member = 1)) ) # NOTE: in R_3.2.0, the following test doesn't have dimension. In R_3.6.2 it does. @@ -138,6 +140,7 @@ test_that("2. tag at the end", { path <- "/esarchive/exp/ecmwf/system4_m1/6hourly/$var$/$var$_$year$0*.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) suppressWarnings( data <- Start(dat = path, var = "tas", @@ -152,8 +155,8 @@ data <- Start(dat = path, expect_equal( as.list(attr(data, 'ExpectedFiles')), -list("/esarchive/exp/ecmwf/system4_m1/6hourly/tas/tas_19940501.nc", - "/esarchive/exp/ecmwf/system4_m1/6hourly/tas/tas_19950101.nc") +list("/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system4_m1/6hourly/tas/tas_19940501.nc", + "/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system4_m1/6hourly/tas/tas_19950101.nc") ) }) diff --git a/tests/testthat/test-Start-reorder-lat.R b/tests/testthat/test-Start-reorder-lat.R index 2fe5de95b5f1c1dded90ad24f868e864c1704c92..0ac770167a58a63c5d4b7d26b5c8319464dc6abb 100644 --- a/tests/testthat/test-Start-reorder-lat.R +++ b/tests/testthat/test-Start-reorder-lat.R @@ -13,6 +13,7 @@ context("Start() lat Reorder test") ############################################## path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) ## Origin longitude in file: [0:360] @@ -884,8 +885,10 @@ test_that("4-x-2-12-123-2-1-x", { # 1-1. no Sort(), NULL ## lat should be descending +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) suppressWarnings( -exp1_1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1_1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -904,7 +907,7 @@ exp1_1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$v # 1-2. Sort(), NULL ## lat should be ascending suppressWarnings( -exp1_2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1_2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -923,7 +926,7 @@ exp1_2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$v # 1-3. Sort(drcreasing = T), NULL ## lat should be descending suppressWarnings( -exp1_3 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1_3 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -971,7 +974,7 @@ as.vector(attr(exp1_3, 'Variables')$common$latitude) # 2-1. no Sort(), 'dat' ## lat should be descending suppressWarnings( -exp2_1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2_1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -990,7 +993,7 @@ exp2_1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$v # 2-2. Sort(), 'dat' ## lat should be ascending suppressWarnings( -exp2_2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2_2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -1009,7 +1012,7 @@ exp2_2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$v # 2-3. Sort(drcreasing = T), NULL ## lat should be descending suppressWarnings( -exp2_3 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2_3 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), diff --git a/tests/testthat/test-Start-reorder-latCoarse.R b/tests/testthat/test-Start-reorder-latCoarse.R index 6ca7b15c3c006db1137295003ce69ff8bc001bea..af9c2db255c7c5e8e3c684165cd7c3b62db58c3c 100644 --- a/tests/testthat/test-Start-reorder-latCoarse.R +++ b/tests/testthat/test-Start-reorder-latCoarse.R @@ -16,6 +16,7 @@ context("Start() lat Reorder test") ############################################## #path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' path_exp <- '/esarchive/exp/ncar/cesm-dple/monthly_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) ## Origin longitude in file: [0:358.75] step 1.25 degrees #288 values ## latitude: -90 o 90 {-90, -89.05759 ...} #192 values ############################################## diff --git a/tests/testthat/test-Start-reorder-lon-180to180.R b/tests/testthat/test-Start-reorder-lon-180to180.R index aa209b8353300d849203c37594e5671fec76b1e9..e0a066c3f9d4062716b045f6a38036142b241ecd 100644 --- a/tests/testthat/test-Start-reorder-lon-180to180.R +++ b/tests/testthat/test-Start-reorder-lon-180to180.R @@ -15,6 +15,7 @@ context("Start() lon Reorder non-transform -180to180 test") ## Origin longitude in file: [-179.71875:180] path_exp <- '/esarchive/recon/ecmwf/era5/original_files/reorder/daily_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) variable <- 'tas' sdate <- '199212' diff --git a/tests/testthat/test-Start-reorder-lon-transform_-180to180.R b/tests/testthat/test-Start-reorder-lon-transform_-180to180.R index 4351aa438689ac88f387b95346cd05d891168362..46da00e441253b53d23754c07fa554c1e7bc50af 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_-180to180.R +++ b/tests/testthat/test-Start-reorder-lon-transform_-180to180.R @@ -14,6 +14,7 @@ context("Start() lon Reorder transform -180to180 test") # 3-2 ## Origin longitude in file: [-179.71875:180] path_exp <- '/esarchive/recon/ecmwf/era5/original_files/reorder/daily_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) variable <- 'tas' sdate <- '199212' diff --git a/tests/testthat/test-Start-reorder-lon-transform_0to360.R b/tests/testthat/test-Start-reorder-lon-transform_0to360.R index 3d2047ea27f31f98e84928e0cbcb7ecfef186df2..e05c73109983f01ce54cbddc9199a8b0e30743d0 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_0to360.R +++ b/tests/testthat/test-Start-reorder-lon-transform_0to360.R @@ -14,6 +14,8 @@ context("Start() lon Reorder transform 0to360 test") # 3-2 ## Origin longitude in file: [0:360] path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) + variable <- 'psl' sdate <- '19821201' diff --git a/tests/testthat/test-Start-reorder-lon0to360.R b/tests/testthat/test-Start-reorder-lon0to360.R index 340860aa8f3a9d47fbbb974a741eb368aa2cdd9c..84b0527d5b11ccf48122e3d34d9fe0df968103d1 100644 --- a/tests/testthat/test-Start-reorder-lon0to360.R +++ b/tests/testthat/test-Start-reorder-lon0to360.R @@ -13,6 +13,7 @@ context("Start() lon Reorder non-transform 0to360 test") ############################################## # 3-2 path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) ## Origin longitude in file: [0:359.722222222222] diff --git a/tests/testthat/test-Start-reorder-lon0to360Coarse.R b/tests/testthat/test-Start-reorder-lon0to360Coarse.R index e093a8870cd2640057c2cb025d730488cc9295af..16ad2e0dfdd68962a026c6e01623a310b58f5a5f 100644 --- a/tests/testthat/test-Start-reorder-lon0to360Coarse.R +++ b/tests/testthat/test-Start-reorder-lon0to360Coarse.R @@ -13,6 +13,7 @@ context("Start() lon Reorder non-transform 0to360 test") ############################################## # 3-2 path_exp <- '/esarchive/exp/ncar/cesm-dple/monthly_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) ## Origin longitude in file: [0:358.75] step 1.25 degrees #288 values ## latitude: -90 o 90 {-90, -89.05759 ...} #192 values diff --git a/tests/testthat/test-Start-reorder-retrieve.R b/tests/testthat/test-Start-reorder-retrieve.R index 42a79ce343a0dad629e0b6a5f1577cc98ad03e47..28d8c7933aa3ed237bfc9523fd5aa277809613ab 100644 --- a/tests/testthat/test-Start-reorder-retrieve.R +++ b/tests/testthat/test-Start-reorder-retrieve.R @@ -7,6 +7,7 @@ context("Start() lon Reorder non-transform retrieve test") test_that("original range 0to360", { ## Origin longitude in file: [0:359.722222222222] path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) lons.min <- -2 lons.max <- 2 @@ -86,6 +87,7 @@ res2 <- Start(dat = path_exp, test_that("original range -180to180", { ## Origin longitude in file: [0:359.722222222222] path_exp <- '/esarchive/recon/ecmwf/era5/original_files/reorder/daily_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) variable <- 'tas' sdate <- '199212' diff --git a/tests/testthat/test-Start-reorder_all.R b/tests/testthat/test-Start-reorder_all.R index b8279de2fdf1c2d8c7622da51ce19bb239ad9824..fce2dc41818da4ee1412cc6823f051733b8722b3 100644 --- a/tests/testthat/test-Start-reorder_all.R +++ b/tests/testthat/test-Start-reorder_all.R @@ -7,6 +7,7 @@ context("No transform, reorder test: 'all'") # cdo is used to verify the data values library(easyNCDF) path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -23,6 +24,7 @@ NcClose(file) #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. lat", { # lon_reorder = CircularSort(0, 360) diff --git a/tests/testthat/test-Start-reorder_indices.R b/tests/testthat/test-Start-reorder_indices.R index b2ca0ace687be4d7ff157cc64e4e357d34b0445e..4027b78241e2e5364abc02cb445e7cb7206c9e58 100644 --- a/tests/testthat/test-Start-reorder_indices.R +++ b/tests/testthat/test-Start-reorder_indices.R @@ -6,6 +6,7 @@ context("No transform, reorder test: indices()") # cdo is used to verify the data values library(easyNCDF) path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -22,6 +23,7 @@ NcClose(file) #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. lat", { # lon_reorder = CircularSort(0, 360) diff --git a/tests/testthat/test-Start-reshape.R b/tests/testthat/test-Start-reshape.R index 3d576d806115cf4ce0caec983b475f45ad0a37be..fc7acb69d96020e26d347403b0e2ed47502e4811 100644 --- a/tests/testthat/test-Start-reshape.R +++ b/tests/testthat/test-Start-reshape.R @@ -2,7 +2,9 @@ context("Start() reshape parameters check") # This one is more comprehensive than test-Start-split-merge.R path_exp <- '/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) path_obs <- '/esarchive/recon/ecmwf/era5/daily_mean/$var$_f1h-r360x181/$var$_$date$.nc' +path_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_obs) var <- 'tas' sdate <- paste0(1993:1995, '1201') @@ -31,7 +33,7 @@ easy_sdate <- c('199312', paste0(rep(1994:1995, each = 3), c('01', '02', '12')), easy_array <- c() for (i in 1:length(easy_sdate)) { - easy_file <- NcOpen(paste0('/esarchive/recon/ecmwf/era5/daily_mean/tas_f1h-r360x181/tas_', + easy_file <- NcOpen(paste0('/esarchive/scratch/aho/startR_unittest_files//esarchive/recon/ecmwf/era5/daily_mean/tas_f1h-r360x181/tas_', easy_sdate[i], '.nc')) if (substr(easy_sdate[i], 5, 6) == '02') { sub_time <- 1:28 @@ -400,7 +402,7 @@ exp1 <- Start(dat = path_exp, ) # easyNCDF easy_sdate_exp <- '19931201' -easy_file_exp <- NcOpen(paste0('/esarchive/exp/ecmwf/system5c3s/daily_mean/tas_f6h/tas_', +easy_file_exp <- NcOpen(paste0('/esarchive/scratch/aho/startR_unittest_files//esarchive/exp/ecmwf/system5c3s/daily_mean/tas_f6h/tas_', easy_sdate_exp, '.nc')) easy_exp <- NcToArray(easy_file_exp, vars_to_read = 'tas', dim_indices = list(longitude = c(1), latitude = c(1), ensemble = c(1), @@ -490,7 +492,7 @@ obs2 <- Start(dat = path_obs, retrieve = TRUE) ) # easyNCDF -easy_file_199311 <- NcOpen(paste0('/esarchive/recon/ecmwf/era5/daily_mean/tas_f1h-r360x181/tas_', +easy_file_199311 <- NcOpen(paste0('/esarchive/scratch/aho/startR_unittest_files//esarchive/recon/ecmwf/era5/daily_mean/tas_f1h-r360x181/tas_', '199311', '.nc')) easy_obs_199311 <- NcToArray(easy_file_199311, vars_to_read = 'tas', dim_indices = list(lon = c(1), lat = c(1), time = 1:30)) diff --git a/tests/testthat/test-Start-return_vars_name.R b/tests/testthat/test-Start-return_vars_name.R index 4bf83c66f1be76cd5103d63a3a8d50bdee545695..e97023d86c9bb8fabe72dfda8cbb012b7c4f8c8f 100644 --- a/tests/testthat/test-Start-return_vars_name.R +++ b/tests/testthat/test-Start-return_vars_name.R @@ -3,6 +3,7 @@ context("Start() return_vars name") # be used but will be changed back to the inner dim names. repos_obs <- '/esarchive/obs/ukmo/hadisst_v1.1/monthly_mean/$var$/$var$_$date$.nc' +repos_obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos_obs) #--------------------------------------------------------------- diff --git a/tests/testthat/test-Start-split-merge.R b/tests/testthat/test-Start-split-merge.R index 8793296fd06001424a32a0a8369803a0a1831243..d95fa623aefc3cd617e62b1c72c9dcdbd3e9a78f 100644 --- a/tests/testthat/test-Start-split-merge.R +++ b/tests/testthat/test-Start-split-merge.R @@ -1,7 +1,8 @@ context("Start() split + merge dim and value check") -var_name <- 'sfcWind' -path.exp <- '/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily/$var$_f24h/$sdate$/$var$_$syear$.nc' +var_name <- 'tas' +path.exp <- '/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily_mean/$var$_f6h/$sdate$/$var$_$syear$.nc' +path.exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path.exp) suppressWarnings( hcst <- Start(dat = path.exp, @@ -24,7 +25,8 @@ file_date <- sort(unique(gsub('-', '', sapply(as.character(dates), substr, 1, 7)))) path.obs <- '/esarchive/recon/ecmwf/era5/1hourly/$var$/$var$_$file_date$.nc' - +path.obs <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path.obs) +var_name <- "sfcWind" test_that("1. split + merge + narm", { suppressWarnings( @@ -148,6 +150,7 @@ obs <- Start(dat = path.obs, test_that("4. split only", { obs.path <- "/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$file_date$.nc" +obs.path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', obs.path) variable <- "prlr" dates_file <- c("201311","201312","201411","201412") dim(dates_file) <- c(smonth = 2, syear = 2) diff --git a/tests/testthat/test-Start-time_unit.R b/tests/testthat/test-Start-time_unit.R index 3aa193042caec24d0065c1eb60d987a3fb96aa0a..a05a42a034d5f75052a451cc01323667710747e8 100644 --- a/tests/testthat/test-Start-time_unit.R +++ b/tests/testthat/test-Start-time_unit.R @@ -4,7 +4,7 @@ test_that("1. The data has units like time", { suppressWarnings( -FD <- Start(dat = '/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var$_MON.nc', +FD <- Start(dat = '/esarchive/scratch/aho/startR_unittest_files/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var$_MON.nc', var = 'FD', # units: days time = indices(1), longitude = indices(1), @@ -14,7 +14,7 @@ FD <- Start(dat = '/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var retrieve = TRUE) ) suppressWarnings( -FD2 <- Start(dat = '/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var$_MON.nc', +FD2 <- Start(dat = '/esarchive/scratch/aho/startR_unittest_files/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var$_MON.nc', var = 'FD', # units: days time = indices(1), longitude = indices(1), @@ -39,7 +39,7 @@ test_that("2. The metadata variable name is not time", { # VITIGEOOS vari <- "rsds" -anlgs <- paste0("/esarchive/oper/VITIGEOSS","/output/cfsv2/weekly_mean/", +anlgs <- paste0("/esarchive/scratch/aho/startR_unittest_files/esarchive/oper/VITIGEOSS","/output/cfsv2/weekly_mean/", "$var$/$var$-vitigeoss-cat","_1999-2018_", "$file_date$.nc") file_date_array <- array(dim = c(sweek = 2, sday = 3)) diff --git a/tests/testthat/test-Start-transform-all.R b/tests/testthat/test-Start-transform-all.R index 8a9ca657f61e7cf32a1f55d4531cdc515c781479..7fbac55aff554a4443403774aef71598a1405830 100644 --- a/tests/testthat/test-Start-transform-all.R +++ b/tests/testthat/test-Start-transform-all.R @@ -9,8 +9,8 @@ context("Transform test target grid: lon and lat = 'all'") # cdo is used to verify the data values # Test 1: original grid 'r360x180' library(easyNCDF) -grid1 <- '/esarchive/exp/CMIP6/dcppA-hindcast/CanESM5/DCPP/CCCma/CanESM5/dcppA-hindcast/r1i1p2f1/Omon/tos/gr/v20190429/tos_Omon_CanESM5_dcppA-hindcast_s1980-r1i1p2f1_gr_198101-199012.nc' # 'r128x64' -path <- '/esarchive/exp/CMIP6/dcppA-hindcast/CESM1-1-CAM5-CMIP5/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/r1i1p1f1/Omon/tos/gr/v20191016/tos_Omon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s2015-r1i1p1f1_gr_201511-202512.nc' # 'r360x180' +grid1 <- '/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/CanESM5/DCPP/CCCma/CanESM5/dcppA-hindcast/r1i1p2f1/Omon/tos/gr/v20190429/tos_Omon_CanESM5_dcppA-hindcast_s1980-r1i1p2f1_gr_198101-199012.nc' # 'r128x64' +path <- '/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/CMIP6/dcppA-hindcast/CESM1-1-CAM5-CMIP5/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/r1i1p1f1/Omon/tos/gr/v20191016/tos_Omon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s2015-r1i1p1f1_gr_201511-202512.nc' # 'r360x180' file <- NcOpen(path) arr <- NcToArray(file, @@ -36,6 +36,8 @@ suppressWarnings( #--------------------------------------------------------------- # Test 2: Original grid 'r432x324' path <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Amon/tas/gn/v20200417/tas_Amon_HadGEM3-GC31-MM_dcppA-hindcast_s2009-r1i1p1f2_gn_201501-201512.nc' # 'r432x324' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(lat = 1:324, lon = 1:432, time = 1:2), @@ -53,6 +55,7 @@ suppressWarnings( #--------------------------------------------------------------- path1 <- '/esarchive/exp/CMIP6/dcppA-hindcast/CESM1-1-CAM5-CMIP5/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/r1i1p1f1/Omon/$var$/gr/v20191016/$var$_Omon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s$sdate$-r1i1p1f1_gr_$sdate$11-202512.nc' # 'r360x180' +path1 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path1) test_that("1. 'all'", { suppressWarnings( @@ -108,6 +111,7 @@ test_that("1. 'all'", { #--------------------------------------------------------------- path2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Amon/$var$/gn/v20200417/$var$_Amon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$sdate$11-201512.nc' # 'r432x324' +path2 <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path2) test_that("2. test path 2", { suppressWarnings( diff --git a/tests/testthat/test-Start-transform-border.R b/tests/testthat/test-Start-transform-border.R index dee8b4e0582e007057df58d7deeeafbc6d66cd84..90b48b658d2dd2b1556c953e8e6f24b518ba3184 100644 --- a/tests/testthat/test-Start-transform-border.R +++ b/tests/testthat/test-Start-transform-border.R @@ -26,6 +26,9 @@ context("Transform: check with cdo") # The result of cdo is from CDO version 1.9.8. +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + ############################################## test_that("1. normal regional situation", { @@ -35,7 +38,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -99,7 +102,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -173,7 +176,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -251,7 +254,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -308,7 +311,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -386,7 +389,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -456,7 +459,7 @@ lats.min <- 20 lats.max <- 40 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -517,7 +520,7 @@ lons.min <- 0 lons.max <- 359 suppressWarnings( - exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', + exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -617,7 +620,7 @@ lons.min <- 0.5 lons.max <- 359.9 suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -709,4 +712,4 @@ expect_equal( # [8,] 299.2109 300.3170 300.1524 299.6214 298.8563 # [9,] 299.4723 299.9515 299.4566 299.0601 299.5071 # [10,] 299.5299 299.7573 299.0317 299.1104 300.0644 -############################################## \ No newline at end of file +############################################## diff --git a/tests/testthat/test-Start-transform-lat-Sort-all.R b/tests/testthat/test-Start-transform-lat-Sort-all.R index b41ec0a106162306061b268a0d6e9e66e6a5b60d..3852da98971dd6910cc1e08557bec160545bb4a5 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-all.R +++ b/tests/testthat/test-Start-transform-lat-Sort-all.R @@ -9,7 +9,7 @@ context("Transform and lat_reorder test: 'all'") #--------------------------------------------------------------- # cdo is used to verify the data values library(easyNCDF) -path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +path <- "/esarchive/scratch/aho/startR_unittest_files/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -27,6 +27,7 @@ arr2 <- s2dv::CDORemap(arr, lons = as.vector(lons), lats = as.vector(lats), #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. 'all'", { diff --git a/tests/testthat/test-Start-transform-lat-Sort-indices.R b/tests/testthat/test-Start-transform-lat-Sort-indices.R index 6c3a7976f50b9e3f68dbc072f0321ac0a6d60fc9..f72954529551af600264af51d63581583a435928 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-indices.R +++ b/tests/testthat/test-Start-transform-lat-Sort-indices.R @@ -15,6 +15,7 @@ context("Transform and lat_reorder test: indices") # cdo is used to verify the data values library(easyNCDF) path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -32,6 +33,7 @@ arr2 <- s2dv::CDORemap(arr, lons = as.vector(lons), lats = as.vector(lats), #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. indices(1:640)", { diff --git a/tests/testthat/test-Start-transform-lat-Sort-values.R b/tests/testthat/test-Start-transform-lat-Sort-values.R index 92490ae6fb03f264c4f04ee0d3a4a503cfa995a6..03331019aee2412307e151046dc5540c3c70e8e2 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-values.R +++ b/tests/testthat/test-Start-transform-lat-Sort-values.R @@ -13,6 +13,7 @@ context("Transform and lat_reorder test: values") # cdo is used to verify the data values library(easyNCDF) pathh <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +pathh <- paste0('/esarchive/scratch/aho/startR_unittest_files/', pathh) file <- NcOpen(pathh) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -30,6 +31,7 @@ arr2 <- s2dv::CDORemap(arr, lons = as.vector(lons), lats = as.vector(lats), #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. values(list(-90, 90))", { diff --git a/tests/testthat/test-Start-transform-lon-across_meridian.R b/tests/testthat/test-Start-transform-lon-across_meridian.R index f16404671bb44b0080c851f27b7388e55eeb1595..d3c3dfa774f17d826a49cc120524987961a9e135 100644 --- a/tests/testthat/test-Start-transform-lon-across_meridian.R +++ b/tests/testthat/test-Start-transform-lon-across_meridian.R @@ -5,6 +5,8 @@ context("Start() transform across_meridian lon order check") test_that("first test", { repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/$var$_$sdate$.nc" + repos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', repos) + var <- 'tas' lon.min <- 170 lon.max <- 185 #359.723 #360 diff --git a/tests/testthat/test-Start-transform-metadata.R b/tests/testthat/test-Start-transform-metadata.R index ede3c959c22c258b384dc7789ab68fb299d7040c..62d31da5cb774570dda8369acbcb8c1842e33d8d 100644 --- a/tests/testthat/test-Start-transform-metadata.R +++ b/tests/testthat/test-Start-transform-metadata.R @@ -5,6 +5,7 @@ test_that("1. Sort() and CircularSort(0, 360)", { # Original lon is [-180, 180] path_exp <- '/esarchive/recon/ecmwf/era5/original_files/reorder/daily_mean/$var$/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) lons.min <- -10 lons.max <- 10 lats.min <- 10 @@ -140,6 +141,7 @@ test_that("2. Sort(decreasing = TRUE) and CircularSort(-180, 180)", { # Original lon is [0, 360] path_exp <- '/esarchive/exp/ecmwf/system5_m1/daily_mean/$var$_f6h/$var$_$sdate$.nc' +path_exp <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_exp) lons.min <- 190 lons.max <- 200 lats.min <- 10 diff --git a/tests/testthat/test-Start-transform-three-selectors.R b/tests/testthat/test-Start-transform-three-selectors.R index 657cca3589991dc3ad485090ffdc66cf4b1581b7..500168e789afb0bde4847398616b0fc5a3bc828b 100644 --- a/tests/testthat/test-Start-transform-three-selectors.R +++ b/tests/testthat/test-Start-transform-three-selectors.R @@ -14,6 +14,7 @@ context("Transform: three selector forms") # cdo is used to verify the data values library(easyNCDF) path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) file <- NcOpen(path) arr <- NcToArray(file, dim_indices = list(time = 1, ensemble = 1, @@ -31,6 +32,7 @@ arr2 <- s2dv::CDORemap(arr, lons = as.vector(lons), lats = as.vector(lats), #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) test_that("1. indices", { diff --git a/tests/testthat/test-Start-two_dats.R b/tests/testthat/test-Start-two_dats.R index ff83441a05299f9db2b804a0d9f8234ae1ecaa12..e2fef3b098b34c842b4e234b3f9b6e8354f856eb 100644 --- a/tests/testthat/test-Start-two_dats.R +++ b/tests/testthat/test-Start-two_dats.R @@ -6,9 +6,11 @@ test_that("1. ex1_8, case 1", { path_tas <- paste0('/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/', 'dcppA-hindcast/$member$/Amon/$var$/gr/v20190713/', '$var$_Amon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc') +path_tas <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_tas) path_tos <- paste0('/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/', 'dcppA-hindcast/$member$/Omon/$var$/gr/v20190713/', '$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc') +path_tos <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path_tos) suppressWarnings( data <- Start(dataset = list(list(path = path_tas), list(path = path_tos)), var = c('tas', 'tos'), diff --git a/tests/testthat/test-Start-values_list_vector.R b/tests/testthat/test-Start-values_list_vector.R index 76c4f91d2758665a32cf49098353bc6c98dcf703..a84530f9cbb6400b2aa2798ba563104c075eae1f 100644 --- a/tests/testthat/test-Start-values_list_vector.R +++ b/tests/testthat/test-Start-values_list_vector.R @@ -10,6 +10,7 @@ context("List of values and vector of values") # To get lat and lon vectors library(easyNCDF) pathh <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +pathh <- paste0('/esarchive/scratch/aho/startR_unittest_files/', pathh) file <- NcOpen(pathh) lats <- NcToArray(file, dim_indices = list(latitude = 1:35), vars_to_read = 'latitude') @@ -18,11 +19,14 @@ lons <- NcToArray(file, NcClose(file) #------------------------------------------------------------------ +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +path <- paste0('/esarchive/scratch/aho/startR_unittest_files/', path) + test_that("1. transform", { # lat and lon are lists of values suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -47,7 +51,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of values. This one is a weird usage though... suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -85,7 +89,7 @@ test_that("2. no transform", { # lat and lon are lists of indices suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -104,7 +108,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of indices suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -137,7 +141,7 @@ test_that("3. transform, vector reverse", { # lat and lon are lists of values suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -163,7 +167,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var #WRONG!!!!!!!!!! # lat and lon are vectors of values suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -201,7 +205,7 @@ test_that("4. no transform, vector reverse", { # lat and lon are lists of values suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp1 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), @@ -220,7 +224,7 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # lat and lon are vectors of values suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +exp2 <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1),