diff --git a/.Rbuildignore b/.Rbuildignore index 90018c782d3ac1075895bbfa778c1248ae5259dd..2ef8ba9063900318c7c0be04cc2ac48a636842e4 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,16 +6,16 @@ ^README\.md$ #\..*\.RData$ #^vignettes$ -#^tests$ ^inst/doc$ -#^inst/doc/*$ -#^inst/doc/figures/$ -#^inst/doc/usecase/$ -#^inst/PlotProfiling\.R$ -.gitlab-ci.yml +^\.gitlab-ci\.yml$ +## unit tests should be ignored when building the package for CRAN +^tests$ +^inst/PlotProfiling\.R$ + # Suggested by http://r-pkgs.had.co.nz/package.html ^.*\.Rproj$ # Automatically added by RStudio, ^\.Rproj\.user$ # used for temporary files. ^README\.Rmd$ # An Rmarkdown file used to generate README.md ^cran-comments\.md$ # Comments for CRAN submission -^NEWS\.md$ # A news file written in Markdown +#^NEWS\.md$ # A news file written in Markdown +^\.gitlab-ci\.yml$ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ef540fe63c5bdfb2b3ee3bfb3a3e9e6513f1b2a5..c7deb1af5aac18cff5d07efec0fc1b0949cabf83 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -3,7 +3,7 @@ stages: build: stage: build script: - - module load R/3.6.1-foss-2015a-bare + - module load R/4.1.2-foss-2015a-bare - module load CDO/1.9.8-foss-2015a - R CMD build --resave-data . - R CMD check --as-cran --no-manual --run-donttest startR_*.tar.gz diff --git a/DESCRIPTION b/DESCRIPTION index af073b1b4205e4f57a6eee4980afcfd01203cff7..6b75f77f6ceeeba0d02b5bc1e04cd701ea4aef5e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: startR Title: Automatically Retrieve Multidimensional Distributed Data Sets -Version: 2.2.0-1 +Version: 2.2.1 Authors@R: c( person("Nicolau", "Manubens", , "nicolau.manubens@bsc.es", role = c("aut")), person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("aut", "cre")), @@ -8,6 +8,7 @@ Authors@R: c( person("Javier", "Vegas", , "javier.vegas@bsc.es", role = c("ctb")), person("Pierre-Antoine", "Bretonniere", , "pierre-antoine.bretonniere@bsc.es", role = c("ctb")), person("Roberto", "Serrano", , "rsnotivoli@gmal.com", role = c("ctb")), + person("Eva", "Rifa", , "eva.rifarovira@bsc.es", role = "ctb"), person("BSC-CNS", role = c("aut", "cph"))) Description: Tool to automatically fetch, transform and arrange subsets of multi- dimensional data sets (collections of files) stored in local and/or @@ -39,4 +40,5 @@ License: Apache License 2.0 URL: https://earth.bsc.es/gitlab/es/startR/ BugReports: https://earth.bsc.es/gitlab/es/startR/-/issues SystemRequirements: cdo ecFlow -RoxygenNote: 7.0.1 +Encoding: UTF-8 +RoxygenNote: 7.2.0 diff --git a/NEWS.md b/NEWS.md index 11f5a30f8636c0e1e50d84fdf2d9b0ffc60a4f29..c1dc90bd1044fbd763970d6e8422bd007bfe3ee4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,16 @@ -# startR v2.2.0-1 (Release date: 2022-04-19) +# startR v2.2.1 (Release date: 2022-11-17) +- Reduce warning messages from CDO. +- Reduce repetitive warning messages from CDORemapper() when single core is used. When multiple cores +are used, there are still repetitive messages. +- Bugfix in Start() about ClimProjDiags::Subset inputs. +- Bugfix when longitude selector range is very close but not global. The transform indices are correctly selected now. + +# startR v2.2.0-2 (Release date: 2022-08-25; internally) +- Use the destination grid to decide which indices to take after interpolation. +- Bugfix when Start() parameter "return_vars" is not used. +- Allow netCDF files to not have calendar attributes (force it to be standard calendar) + +# startR v2.2.0-1 (Release date: 2022-04-19; internally) - Bugfix for the case that the variable has units like time, e.g., "days". - Development of metadata reshaping. The metadata should correspond to data if data are reshaped by parameter "merge_across_dims" and "split_multiselected_dims", as well as if data selectors are not continuous indices. - Development of multiple dependency by array selector. An inner dimension indices can vary with multiple file dimensions. diff --git a/R/Compute.R b/R/Compute.R index 0aa94245bd5d5eff3c08fc4d61cb911dac628187..1450b0157b4d5480a8c077c09742f7b02e0e4f12 100644 --- a/R/Compute.R +++ b/R/Compute.R @@ -25,7 +25,7 @@ #' to use for the computation. The default value is 1. #'@param cluster A list of components that define the configuration of the #' machine to be run on. The comoponents vary from the different machines. -#' Check \href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab} for more +#' Check \href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{Practical guide on GitLab} for more #' details and examples. Only needed when the computation is not run locally. #' The default value is NULL. #'@param ecflow_suite_dir A character string indicating the path to a folder in diff --git a/R/NcDataReader.R b/R/NcDataReader.R index 9c85e331b04d1ce237f7269b2841c225541cc9e6..1c83d0af68c155c35bbe0dfb103342001fe3f594 100644 --- a/R/NcDataReader.R +++ b/R/NcDataReader.R @@ -210,6 +210,17 @@ NcDataReader <- function(file_path = NULL, file_object = NULL, } else if (grepl(' since ', units)) { # Find the calendar calendar <- attr(result, 'variables')[[var_name]]$calendar + # Calendar types recognized by as.PCICt() + cal.list <- c("365_day", "365", "noleap", "360_day", "360", "gregorian", "standard", "proleptic_gregorian") + + if (is.null(calendar)) { + warning("Calendar is missing. Use the standard calendar to calculate time values.") + calendar <- 'gregorian' + } else if (!calendar %in% cal.list) { + # if calendar is not recognized by as.PCICt(), forced it to be standard + warning("The calendar type '", calendar, "' is not recognized by NcDataReader(). It is forced to be standard type.") + calendar <- 'gregorian' + } if (calendar == 'standard') calendar <- 'gregorian' parts <- strsplit(units, ' since ')[[1]] @@ -291,6 +302,7 @@ NcDataReader <- function(file_path = NULL, file_object = NULL, result <- result * 30 * 24 * 60 * 60 # day to sec } else { #old code. The calendar is not in any of the above. + #NOTE: Should not have a chance to be used because the calendar types are forced to be standard above already. result <- result * 30.5 result <- result * 24 * 60 * 60 # day to sec } diff --git a/R/Start.R b/R/Start.R index db573a86685a3e3cd363c87cc9803ce807c04921..1251634b83286f0cabc6af45dffe63c14ea05019 100644 --- a/R/Start.R +++ b/R/Start.R @@ -1221,7 +1221,7 @@ Start <- function(..., # dim = indices/selectors, dims_to_check <- debug debug <- TRUE } - + ############################## READING FILE DIMS ############################ # Check that no unrecognized variables are present in the path patterns # and also that no file dimensions are requested to THREDDs catalogs. @@ -1409,7 +1409,7 @@ Start <- function(..., # dim = indices/selectors, # Chunk it only if it is defined dim (i.e., list of character with names of depended dim) if (!(length(dat_selectors[[depending_dim_name]]) == 1 && dat_selectors[[depending_dim_name]] %in% c('all', 'first', 'last'))) { - if (sapply(dat_selectors[[depending_dim_name]], is.character)) { + if (any(sapply(dat_selectors[[depending_dim_name]], is.character))) { dat_selectors[[depending_dim_name]] <- dat_selectors[[depending_dim_name]][desired_chunk_indices] } @@ -1947,6 +1947,10 @@ Start <- function(..., # dim = indices/selectors, transformed_common_vars_unorder_indices <- NULL transform_crop_domain <- NULL + # store warning messages from transform + warnings1 <- NULL + warnings2 <- NULL + for (i in 1:length(dat)) { if (dataset_has_files[i]) { indices <- indices_of_first_files_with_data[[i]] @@ -2101,11 +2105,16 @@ Start <- function(..., # dim = indices/selectors, } # Transform the variables - transformed_data <- do.call(transform, c(list(data_array = NULL, - variables = vars_to_transform, - file_selectors = selectors_of_first_files_with_data[[i]], - crop_domain = transform_crop_domain), - transform_params)) + tmp <- .withWarnings( + do.call(transform, c(list(data_array = NULL, + variables = vars_to_transform, + file_selectors = selectors_of_first_files_with_data[[i]], + crop_domain = transform_crop_domain), + transform_params)) + ) + transformed_data <- tmp$value + warnings1 <- c(warnings1, tmp$warnings) + # Discard the common transformed variables if already transformed before if (!is.null(transformed_common_vars)) { common_ones <- which(names(picked_common_vars) %in% names(transformed_data$variables)) @@ -2661,8 +2670,14 @@ Start <- function(..., # dim = indices/selectors, selector_indices_to_take <- which(selector_file_dim_array == j, arr.ind = TRUE)[1, ] names(selector_indices_to_take) <- names(selector_file_dims) selector_store_position[names(selector_indices_to_take)] <- selector_indices_to_take - sub_array_of_selectors <- Subset(selector_array, names(selector_indices_to_take), - as.list(selector_indices_to_take), drop = 'selected') + # "selector_indices_to_take" is an array if "selector_file_dims" is not 1 (if + # selector is an array with a file_dim dimname, i.e., time = [sdate = 2, time = 4]. + if (!is.null(names(selector_indices_to_take))) { + sub_array_of_selectors <- Subset(selector_array, names(selector_indices_to_take), + as.list(selector_indices_to_take), drop = 'selected') + } else { + sub_array_of_selectors <- selector_array + } if (debug) { if (inner_dim %in% dims_to_check) { @@ -2681,8 +2696,14 @@ Start <- function(..., # dim = indices/selectors, } else { if (length(names(var_file_dims)) > 0) { var_indices_to_take <- selector_indices_to_take[which(names(selector_indices_to_take) %in% names(var_file_dims))] - sub_array_of_values <- Subset(var_with_selectors, names(var_indices_to_take), - as.list(var_indices_to_take), drop = 'selected') + if (!is.null(names(var_indices_to_take))) { + sub_array_of_values <- Subset(var_with_selectors, names(var_indices_to_take), + as.list(var_indices_to_take), drop = 'selected') + } else { + # time across some file dim (e.g., "file_date") but doesn't have + # this file dim as dimension (e.g., time: [sdate, time]) + sub_array_of_values <- var_with_selectors + } } else { sub_array_of_values <- var_with_selectors } @@ -2967,12 +2988,16 @@ Start <- function(..., # dim = indices/selectors, inner_dim, sub_array_of_fri) } } - - transformed_subset_var <- do.call(transform, c(list(data_array = NULL, - variables = subset_vars_to_transform, - file_selectors = selectors_of_first_files_with_data[[i]], - crop_domain = transform_crop_domain), - transform_params))$variables[[var_with_selectors_name]] + tmp <- .withWarnings( + do.call(transform, c(list(data_array = NULL, + variables = subset_vars_to_transform, + file_selectors = selectors_of_first_files_with_data[[i]], + crop_domain = transform_crop_domain), + transform_params))$variables[[var_with_selectors_name]] + ) + transformed_subset_var <- tmp$value + warnings2 <- c(warnings2, tmp$warnings) + # Sorting the transformed variable and working out the indices again after transform. if (!is.null(dim_reorder_params[[inner_dim]])) { transformed_subset_var_reorder <- dim_reorder_params[[inner_dim]](transformed_subset_var) @@ -3046,97 +3071,18 @@ Start <- function(..., # dim = indices/selectors, sub_array_of_sri <- sub_array_of_sri[[1]]:sub_array_of_sri[[2]] } - # Chunk sub_array_of_sri if this inner_dim needs to be chunked - #TODO: Potential problem: the transformed_subset_var value falls between - # the end of sub_sub_array_of_values of the 1st chunk and the beginning - # of sub_sub_array_of_values of the 2nd chunk. Then, one sub_array_of_sri - # will miss. 'previous_sri' is checked and will be included if this - # situation happens, but don't know if the transformed result is - # correct or not. - # NOTE: The chunking criteria may not be 100% correct. The current way - # is to pick the sri that larger than the minimal sub_sub_array_of_values - # and smaller than the maximal sub_sub_array_of_values; if it's - # the first chunk, make sure the 1st sri is included; if it's the - # last chunk, make sure the last sri is included. - if (chunks[[inner_dim]]["n_chunks"] > 1) { - sub_array_of_sri_complete <- sub_array_of_sri - if (is.list(sub_sub_array_of_values)) { # list - sub_array_of_sri <- - which(transformed_subset_var >= min(unlist(sub_sub_array_of_values)) & - transformed_subset_var <= max(unlist(sub_sub_array_of_values))) - # if it's 1st chunk & the first sri is not included, include it. - if (chunks[[inner_dim]]["chunk"] == 1 & - !(sub_array_of_sri_complete[1] %in% sub_array_of_sri)) { - sub_array_of_sri <- c(sub_array_of_sri_complete[1], sub_array_of_sri) - } - # if it's last chunk & the last sri is not included, include it. - if (chunks[[inner_dim]]["chunk"] == chunks[[inner_dim]]["n_chunks"] & - !(tail(sub_array_of_sri_complete, 1) %in% sub_array_of_sri)) { - sub_array_of_sri <- c(sub_array_of_sri, tail(sub_array_of_sri_complete, 1)) - } +#======================================================== - # Check if sub_array_of_sri perfectly connects to the previous sri. - # If not, inlclude the previous sri. - #NOTE 1: don't know if the transform for the previous sri is - # correct or not. - #NOTE 2: If crop = T, sub_array_of_sri always starts from 1. - # Don't know if the cropping will miss some sri or not. - if (sub_array_of_sri[1] != 1) { - if (!is.null(previous_sub_sub_array_of_values)) { - # if decreasing = F - if (transformed_subset_var[1] < transformed_subset_var[2]) { - previous_sri <- max(which(transformed_subset_var <= previous_sub_sub_array_of_values)) - } else { - # if decreasing = T - previous_sri <- max(which(transformed_subset_var >= previous_sub_sub_array_of_values)) - } - if (previous_sri + 1 != sub_array_of_sri[1]) { - sub_array_of_sri <- (previous_sri + 1):sub_array_of_sri[length(sub_array_of_sri)] - } - } - } - - } else { # is vector - tmp <- which(transformed_subset_var >= min(sub_sub_array_of_values) & - transformed_subset_var <= max(sub_sub_array_of_values)) - # Ensure tmp and sub_array_of_sri are both ascending or descending - if (is.unsorted(tmp) != is.unsorted(sub_array_of_sri)) { - tmp <- rev(tmp) - } - # Include first or last sri if tmp doesn't have. It's only for - # ""vectors"" because vectors look for the closest value. - #NOTE: The condition here is not correct. The criteria should be - # 'vector' instead of indices. - if (chunks[[inner_dim]]["chunk"] == 1) { - sub_array_of_sri <- unique(c(sub_array_of_sri[1], tmp)) - } else if (chunks[[inner_dim]]["chunk"] == - chunks[[inner_dim]]["n_chunks"]) { # last chunk - sub_array_of_sri <- unique(c(tmp, sub_array_of_sri[length(sub_array_of_sri)])) - } else { - sub_array_of_sri <- tmp - } - # Check if sub_array_of_sri perfectly connects to the previous sri. - # If not, inlclude the previous sri. - #NOTE 1: don't know if the transform for the previous sri is - # correct or not. - #NOTE 2: If crop = T, sub_array_of_sri always starts from 1. - # Don't know if the cropping will miss some sri or not. - if (sub_array_of_sri[1] != 1) { - if (!is.null(previous_sub_sub_array_of_values)) { - # if decreasing = F - if (transformed_subset_var[1] < transformed_subset_var[2]) { - previous_sri <- max(which(transformed_subset_var <= previous_sub_sub_array_of_values)) - } else { - # if decreasing = T - previous_sri <- max(which(transformed_subset_var >= previous_sub_sub_array_of_values)) - } - if (previous_sri + 1 != which(sub_array_of_sri[1] == sub_array_of_sri_complete)) { - sub_array_of_sri <- (previous_sri + 1):sub_array_of_sri[length(sub_array_of_sri)] - } - } - } - } +# Instead of using values to find sri, directly use the destination grid to count. +#NOTE: sub_array_of_sri seems to start at 1 always (because crop = c(lonmin, lonmax, latmin, latmax) already?) + if (chunks[[inner_dim]]["n_chunks"] > 1) { + sub_array_of_sri <- sub_array_of_sri[get_chunk_indices( + length(sub_array_of_sri), + chunks[[inner_dim]]["chunk"], + chunks[[inner_dim]]["n_chunks"], + inner_dim)] } +#======================================================== ordered_sri <- sub_array_of_sri sub_array_of_sri <- transformed_subset_var_unorder[sub_array_of_sri] @@ -3333,7 +3279,11 @@ Start <- function(..., # dim = indices/selectors, selector_store_position <- chunk } sub_array_of_indices <- transformed_indices[which(indices_chunk == chunk)] - sub_array_of_indices <- transformed_indices[which(indices_chunk == chunk)] + + #NOTE: This 'with_transform' part is probably not tested because + # here is for the inner dim that goes across a file dim, which + # is normally not lat and lon dimension. If in the future, we + # can interpolate time, this part needs to be examined. if (with_transform) { # If the provided selectors are expressed in the world # before transformation @@ -3716,11 +3666,13 @@ Start <- function(..., # dim = indices/selectors, tmp_fun <- function (x, y) { any(names(dim(x)) %in% y) } - inner_dim_has_split_dim <- names(which(unlist(lapply( - picked_common_vars, tmp_fun, names(all_split_dims))))) - if (!identical(inner_dim_has_split_dim, character(0))) { - # If merge_across_dims also, it will be replaced later - saved_reshaped_attr <- attr(picked_common_vars[[inner_dim_has_split_dim]], 'variables') + if (!is.null(picked_common_vars)) { + inner_dim_has_split_dim <- names(which(unlist(lapply( + picked_common_vars, tmp_fun, names(all_split_dims))))) + if (!identical(inner_dim_has_split_dim, character(0))) { + # If merge_across_dims also, it will be replaced later + saved_reshaped_attr <- attr(picked_common_vars[[inner_dim_has_split_dim]], 'variables') + } } } } @@ -3785,7 +3737,7 @@ Start <- function(..., # dim = indices/selectors, if (!merge_across_dims & split_multiselected_dims & identical(inner_dim_has_split_dim, character(0))) { final_dims_fake_metadata <- NULL } else { - if (!merge_across_dims & split_multiselected_dims) { + if (!merge_across_dims & split_multiselected_dims & !is.null(picked_common_vars)) { if (any(names(all_split_dims[[1]]) %in% names(dim(picked_common_vars[[inner_dim_has_split_dim]]))) & names(all_split_dims)[1] != inner_dim_has_split_dim) { if (inner_dim_has_split_dim %in% names(final_dims)) { @@ -3803,7 +3755,10 @@ Start <- function(..., # dim = indices/selectors, final_dims_fake, dims_of_merge_dim, all_split_dims) } } - + + # store warning messages from transform + warnings3 <- NULL + # The following several lines will only run if retrieve = TRUE if (retrieve) { @@ -3882,10 +3837,12 @@ Start <- function(..., # dim = indices/selectors, # the appropriate work pieces. work_pieces <- retrieve_progress_message(work_pieces, num_procs, silent) + # NOTE: In .LoadDataFile(), metadata is saved in metadata_folder/1 or /2 etc. Before here, # the path name is created in work_pieces but the path hasn't been built yet. if (num_procs == 1) { - found_files <- lapply(work_pieces, .LoadDataFile, + tmp <- .withWarnings( + lapply(work_pieces, .LoadDataFile, shared_matrix_pointer = shared_matrix_pointer, file_data_reader = file_data_reader, synonims = synonims, @@ -3893,9 +3850,15 @@ Start <- function(..., # dim = indices/selectors, transform_params = transform_params, transform_crop_domain = transform_crop_domain, silent = silent, debug = debug) + ) + found_files <- tmp$value + warnings3 <- c(warnings3, tmp$warnings) + } else { cluster <- parallel::makeCluster(num_procs, outfile = "") # Send the heavy work to the workers + ##NOTE: .withWarnings() can't catch warnings like it does above (num_procs == 1). The warnings + ## show below when "bigmemory::as.matrix(data_array)" is called. Don't know how to fix it for now. work_errors <- try({ found_files <- parallel::clusterApplyLB(cluster, work_pieces, .LoadDataFile, shared_matrix_pointer = shared_matrix_pointer, @@ -4000,7 +3963,7 @@ Start <- function(..., # dim = indices/selectors, picked_common_vars[[across_inner_dim]] <- metadata_tmp attr(picked_common_vars[[across_inner_dim]], 'variables') <- saved_reshaped_attr } - if (split_multiselected_dims) { + if (split_multiselected_dims & !is.null(picked_common_vars)) { if (!identical(inner_dim_has_split_dim, character(0))) { metadata_tmp <- array(picked_common_vars[[inner_dim_has_split_dim]], dim = final_dims_fake_metadata) # Convert numeric back to dates @@ -4129,7 +4092,7 @@ Start <- function(..., # dim = indices/selectors, picked_common_vars[[across_inner_dim]] <- metadata_tmp attr(picked_common_vars[[across_inner_dim]], 'variables') <- saved_reshaped_attr } - if (split_multiselected_dims) { + if (split_multiselected_dims & !is.null(picked_common_vars)) { if (!identical(inner_dim_has_split_dim, character(0))) { metadata_tmp <- array(picked_common_vars[[inner_dim_has_split_dim]], dim = final_dims_fake_metadata) # Convert numeric back to dates @@ -4143,6 +4106,16 @@ Start <- function(..., # dim = indices/selectors, } } + # Print the warnings from transform + if (!is.null(c(warnings1, warnings2, warnings3))) { + transform_warnings_list <- lapply(c(warnings1, warnings2, warnings3), function(x) { + return(x$message) + }) + transform_warnings_list <- unique(transform_warnings_list) + for (i in 1:length(transform_warnings_list)) { + .warning(transform_warnings_list[[i]]) + } + } # Change final_dims_fake back because retrieve = FALSE will use it for attributes later if (exists("final_dims_fake_output")) { diff --git a/R/Utils.R b/R/Utils.R index 9bf23e995b4f2c75e262ebc675547b9960feb132..3d4d864a31c93f80529c2730dc3436e0149a3251 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -859,3 +859,13 @@ } return(unlist(new_list)) } + +.withWarnings <- function(expr) { + myWarnings <- NULL + wHandler <- function(w) { + myWarnings <<- c(myWarnings, list(w)) + invokeRestart("muffleWarning") + } + val <- withCallingHandlers(expr, warning = wHandler) + list(value = val, warnings = myWarnings) +} diff --git a/R/zzz.R b/R/zzz.R index 6b6189b3c0697892ba35a3f78eecb0db42832343..381c9d3ac69ce95bc4d1b8aa6c68bde291e5437c 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -566,7 +566,7 @@ generate_vars_to_transform <- function(vars_to_transform, picked_vars, transform # Turn indices to values for transform_crop_domain generate_transform_crop_domain_values <- function(transform_crop_domain, picked_vars) { - if (transform_crop_domain == 'all') { + if (any(transform_crop_domain == 'all')) { transform_crop_domain <- c(picked_vars[1], tail(picked_vars, 1)) } else { # indices() if (is.list(transform_crop_domain)) { @@ -692,9 +692,11 @@ generate_sub_array_of_fri <- function(with_transform, goes_across_prime_meridian } else if (start_padding < beta) { # left side too close to border, need to go to right side sub_array_of_fri <- c((first_index - start_padding):(last_index + end_padding), (n - (beta - start_padding - 1)):n) + sub_array_of_fri <- unique(sub_array_of_fri) } else if (end_padding < beta) { # right side too close to border, need to go to left side sub_array_of_fri <- c(1: (beta - end_padding), (first_index - start_padding):(last_index + end_padding)) + sub_array_of_fri <- unique(sub_array_of_fri) } } @@ -706,6 +708,7 @@ generate_sub_array_of_fri <- function(with_transform, goes_across_prime_meridian } } } + if (print_warning) { .warning(paste0("Adding parameter transform_extra_cells = ", beta, " to the transformed index excesses ", diff --git a/inst/doc/faq.md b/inst/doc/faq.md index 83065d47e3ecef79af49e958188888d7eb45f4c7..f92b798a94570169ac7a7f1c6bfdf00d3ebb90d1 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -28,6 +28,8 @@ This document intends to be the first reference for any doubts that you may have 22. [Define the selector when the indices in the files are not aligned](#22-define-the-selector-when-the-indices-in-the-files-are-not-aligned) 23. [The best practice of using vector and list for selectors](#23-the-best-practice-of-using-vector-and-list-for-selectors) 24. [Do both interpolation and chunking on spatial dimensions](#24-do-both-interpolation-and-chunking-on-spatial-dimensions) + 25. [What to do if your function has too many target dimensions](#25-what-to-do-if-your-function-has-too-many-target-dimensions) + 26. [Use merge_across_dims_narm to remove NAs](#26-use-merge_across_dims_narm-to-remove-nas) 2. **Something goes wrong...** @@ -82,12 +84,12 @@ all the possibilities and make the output data structure reasonable all the time Therefore, it is recommended to understand the way Start() rolls first, then you know what you should expect from the output and will not get confused with what it returns to you. -If you want to connet xxx along yyy, the parameter 'merge_across_dims' and 'merge_across_dims_narm' can help you achieve it. -See Example 1. If 'merge_across_dims = TRUE', the chunk dimension will disappear. -'merge_across_dims' simply attaches data one after another, so the NA values (if exist) will be the same places as the unmerged one (see Example 2). +Now we understand the cross relationship between dimensions, we can talk about how to merge them: use the parameters `merge_across_dims` and `merge_across_dims_narm`. +See Example 1. If `merge_across_dims = TRUE`, the chunk dimension will disappear. +`merge_across_dims` simply attaches data one after another, so the NA values (if exist) will be the same places as the unmerged one (see Example 2). -If you want to remove those additional NAs, you can use 'merge_across_dims_narm = TRUE', -then the NAs will be removed when merging into one dimension. (see Example 2). +If you want to remove those additional NAs, you can use `merge_across_dims_narm = TRUE`, +then the NAs will be removed when merging into one dimension. (see Example 2). To know more about `merge_across_dims_narm`, check [How-to-26](#26-use-merge-across-dims-narm-to-remove-nas). You can find more use cases at [ex1_2_exp_obs_attr.R](inst/doc/usecase/ex1_2_exp_obs_attr.R) and [ex1_3_attr_loadin.R](inst/doc/usecase/ex1_3_attr_loadin.R). @@ -463,7 +465,7 @@ data <- Start(dat = repos, If you want to interpolate data by s2dv::CDORemap in function, you need to tell the machine which CDO module to use. Therefore, `CDO_module = 'CDO/1.9.5-foss-2018b'` should be -added in Compute() cluster list. See the example in usecase [ex2_3_cdo.R](inst/doc/usecase/ex2_3_cdo.R). +added in Compute() cluster list. See the example in usecase [ex2_3](inst/doc/usecase/ex2_3_cdo.R). ### 10. The number of members depends on the start date @@ -475,9 +477,9 @@ When trying to load both start dates at once using Start(), the order in which t - `sdates = c('19991101', '19990901')`, the member dimension will be of length 51, showing missing values for the members 26 to 51 in the second start date; - `sdates = c('19990901', '19991101')`, the member dimension will be of length 25, any member will be missing. -To ensure that all the members are retrieved, we can use parameter 'largest_dims_length'. See [FAQ 21](https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/faq.md#21-retrieve-the-complete-data-when-the-dimension-length-varies-among-files) for details. +To ensure that all the members are retrieved, we can use parameter `largest_dims_length`. See [FAQ 21](https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/faq.md#21-retrieve-the-complete-data-when-the-dimension-length-varies-among-files) for details. -The code to reproduce this behaviour could be found in the Use Cases section, [example 1.4](/inst/doc/usecase/ex1_4_variable_nmember.R). +The code to reproduce this behaviour could be found in the usecase [ex1_4](/inst/doc/usecase/ex1_4_variable_nmember.R). ### 11. Select the longitude/latitude region @@ -850,18 +852,18 @@ same. For example, the member number in one experiment is 25 in the early years increase to 51 later. If you assign `member = 'all'` in Start() call, the returned member dimension length will be 25 only. -The parameter 'largest_dims_length' is for this case. Its default value is `FALSE`, meaning +The parameter `largest_dims_length` is for this case. Its default value is `FALSE`, meaning that Start() can only use the first valid file to decide the dimensions. If it is changed to `TRUE`, Start() will examine all the required files to find the largest length for all the inner dimensions. It is time- and resource-consuming, but useful when you are not sure how the dimensions in all the files look like. -If you know the expected dimension length, it is recommended to assign 'largest_dims_length' +If you know the expected dimension length, it is recommended to assign `largest_dims_length` by a named integer vector, for example, `largest_dims_length = c(member = 51)`. Start() will adopt the provided ones and use the first valid file to decide the rest of dimensions. By this means, the efficiency can be similar to `largest_dims_length = FALSE`. -Find example in [use case ex1_4](/inst/doc/usecase/ex1_4_variable_nmember.R). +Find example in use case [ex1_4](/inst/doc/usecase/ex1_4_variable_nmember.R). ### 22. Define the selector when the indices in the files are not aligned @@ -973,6 +975,24 @@ the usage of those parameters to avoid unecessary errors. We provide some [use cases](inst/doc/usecase/ex2_12_transform_and_chunk.R) showing the secure ways of transformation + chunking. +### 25. What to do if your function has too many target dimensions +Unfortunately, we don't have a perfect solution now before we have multiple steps feature. Talk to maintainers to see how to generate a workaround for your case. + + +### 26. Use merge_across_dims_narm to remove NAs +The Start() parameter `merge_across_dims_narm` can be useful when you want to merge two dimensions together (e.g., time across chunk.) If you're not familiar with the usage of `xxx_across = yyy` and `merge_across_dims` yet, check [How-to-2](#2-indicate-dependent-dimension-and-use-merge-parameters-in-start) first. + +First thing to notice is that `merge_across_dims_narm` can only remove **the NAs that are created by Start()** during the reshaping process. +It doesn't remove the NAs in the original data. For example, in Example 2 in How-to-2, the NAs are removed because those NAs are added by Start(). + +Second, if the files don't share the same length of the merged dimension, you need to use `largest_dims_length = T` along. +This parameter tells Start() that it needs to look into each file to know the dimension length. By doing this, Start() knows that the NAs in the files with shorter dimension are added by it, so `merge_across_dims_narm = T` can remove those NAs correctly. +A typical example is reading daily data and merging time dimension together. The 30-day months will have one NA at the end of time dimension, if `merge_across_dims_narm = T` and `largest_dims_length = T` are not used. +Check usecase [ex1_16](/inst/doc/usecase/ex1_16_files_different_time_dim_length.R) for the example script. +See [How-to-21](#21-retrieve-the-complete-data-when-the-dimension-length-varies-among-files) for more details of `largest_dims_length`. + + + # Something goes wrong... ### 1. No space left on device diff --git a/inst/doc/practical_guide.md b/inst/doc/practical_guide.md index 378f6486bd7a5cb60fadb35918a15bd056f55379..70b29a61b9d73cb7db017c3da9ed8ebf10c15be3 100644 --- a/inst/doc/practical_guide.md +++ b/inst/doc/practical_guide.md @@ -1,27 +1,30 @@ # Practical guide for processing large data sets with startR -This guide includes explanations and practical examples for you to learn how to use startR to efficiently process large data sets in parallel on the BSC's HPCs (CTE-Power 9, Marenostrum 4, ...). See the main page of the [**startR**](README.md) project for a general overview of the features of startR, without actual guidance on how to use it. +This guide includes explanations and practical examples for you to learn how to use startR to efficiently process large data sets in parallel on the BSC's HPCs (Nord3-v2, CTE-Power 9, Marenostrum 4, ...). See the main page of the [**startR**](README.md) project for a general overview of the features of startR, without actual guidance on how to use it. If you would like to start using startR rightaway on the BSC infrastructure, you can directly go through the "Configuring startR" section, copy/paste the basic startR script example shown at the end of the "Introduction" section onto the text editor of your preference, adjust the paths and user names specified in the `Compute()` call, and run the code in an R session after loading the R and ecFlow modules. ## Index -1. [**Motivation**](inst/doc/practical_guide.md#motivation) -2. [**Introduction**](inst/doc/practical_guide.md#introduction) -3. [**Configuring startR**](inst/doc/practical_guide.md#configuring-startr) -4. [**Using startR**](inst/doc/practical_guide.md#using-startr) - 1. [**Start()**](inst/doc/practical_guide.md#start) - 2. [**Step() and AddStep()**](inst/doc/practical_guide.md#step-and-addstep) - 3. [**Compute()**](inst/doc/practical_guide.md#compute) - 1. [**Compute() locally**](inst/doc/practical_guide.md#compute-locally) - 2. [**Compute() on CTE-Power 9**](inst/doc/practical_guide.md#compute-on-cte-power-9) - 3. [**Compute() on the fat nodes and other HPCs**](inst/doc/practical_guide.md#compute-on-the-fat-nodes-and-other-hpcs) - 4. [**Collect() and the EC-Flow GUI**](inst/doc/practical_guide.md#collect-and-the-ec-flow-gui) -5. [**Additional information**](inst/doc/practical_guide.md#additional-information) -6. [**Other examples**](inst/doc/practical_guide.md#other-examples) -7. [**Compute() cluster templates**](inst/doc/practical_guide.md#compute-cluster-templates) - -## Motivation +1. [**Motivation**](#1-motivation) +2. [**Introduction**](#2-introduction) +3. [**Configuring startR**](#3-configuring-startr) +4. [**Using startR**](#4-using-startr) + 1. [**Start()**](#4-1-start) + 2. [**Step() and AddStep()**](#4-2-step-and-addstep) + 3. [**Compute()**](#4-3-compute) + 1. [**Compute() locally**](#4-3-1-compute-locally) + 2. [**Compute() on HPCs**](#4-3-2-compute-on-hpcs) + 4. [**Collect() and the EC-Flow GUI**](#4-4-collect-and-the-ec-flow-gui) +5. [**Additional information**](#5-additional-information) + 1. [**How to choose the number of chunks, jobs and cores**](#5-1-how-to-choose-the-number-of-chunks-jobs-and-cores) + 2. [**How to clean a failed execution**](#5-2-how-to-clean-a-failed-execution) + 3. [**Visualizing the profiling of the execution**](#5-3-visualizing-the-profiling-of-the-execution) + 4. [**Pending features**](#5-4-pending-features) +6. [**Other examples**](#6-other-examples) +7. [**Compute() cluster templates**](#7-compute-cluster-templates) + +## 1. Motivation What would you do if you had to apply a custom statistical analysis procedure to a 10TB climate data set? Probably, you would need to use a scripting language to write a procedure which is able to retrieve a subset of data from the file system (it would rarely be possible to handle all of it at once on a single node), code the custom analysis procedure in that language, and apply it carefully and efficiently to the data. Afterwards, you would need to think of and develop a mechanism to dispatch the job mutiple times in parallel to an HPC of your choice, each of the jobs processing a different subset of the data set. You could do this by hand but, ideally, you would rather use EC-Flow or a similar general purpose workflow manager which would orchestrate the work for you. Also, it would allow you to visually monitor and control the progress, as well as keep an easy-to-understand record of what you did, in case you need to re-use it in the future. The mentioned solution, although it is the recommended way to go, is a demanding one and you could easily spend a few days until you get it running smoothly. Additionally, when developing the job script, you would be exposed to the difficulties of efficiently managing the data and applying the coded procedure to it. @@ -43,7 +46,7 @@ _**Note 1**_: The data files do not need to be migrated to a database system, no _**Note 2**_: The HPCs startR is designed to run on are understood as multi-core multi-node clusters. startR relies on a shared file system across all HPC nodes, and does not implement any kind of distributed storage system for now. -## Introduction +## 2. Introduction In order to use startR you will need to follow the configuration steps listed in the "Configuring startR" section of this guide to make sure startR works on your workstation with the HPC of your choice. @@ -53,7 +56,7 @@ Afterwards, you will need to understand and use five functions, all of them incl - **Compute()**, for specifying the HPC to be employed, the execution parameters (e.g. number of chunks and cores), and to trigger the computation - **Collect()** and the **EC-Flow graphical user interface**, for monitoring of the progress and collection of results -Next, you can see an example startR script performing the ensemble mean of a small data set on CTE-Power9, for you to get a broad picture of how the startR functions interact and the information that is represented in a startR script. Note that the `queue_host`, `temp_dir` and `ecflow_suite_dir` parameters in the `Compute()` call are user-specific. +Next, you can see an example startR script performing the ensemble mean of a small data set on an HPC cluster such as Nord3-v2 or CTE-Power9, for you to get a broad picture of how the startR functions interact and the information that is represented in a startR script. Note that the `queue_host`, `temp_dir` and `ecflow_suite_dir` parameters in the `Compute()` call are user-specific. ```r library(startR) @@ -79,25 +82,27 @@ step <- Step(fun, wf <- AddStep(data, step) -res <- Compute(wf, - chunks = list(latitude = 2, - longitude = 2), - threads_load = 2, - threads_compute = 4, - cluster = list(queue_host = 'p9login1.bsc.es', - queue_type = 'slurm', - temp_dir = '/gpfs/scratch/bsc32/bsc32473/startR_hpc/', - r_module = 'R/3.5.0-foss-2018b', - job_wallclock = '00:10:00', - cores_per_job = 4, - max_jobs = 4, - bidirectional = FALSE, - polling_period = 10 - ), - ecflow_suite_dir = '/home/Earth/nmanuben/startR_local/') + res <- Compute(wf, + chunks = list(latitude = 2, + longitude = 2), + threads_load = 2, + threads_compute = 4, + cluster = list(queue_host = 'nord4.bsc.es', + queue_type = 'slurm', + temp_dir = '/gpfs/scratch/bsc32/bsc32734/startR_hpc/', + cores_per_job = 4, + job_wallclock = '00:10:00', + max_jobs = 4, + extra_queue_params = list('#SBATCH --constraint=medmem'), + bidirectional = FALSE, + polling_period = 10 + ), + ecflow_suite_dir = '/home/Earth/aho/startR_local/', + wait = TRUE + ) ``` -## Configuring startR +## 3. Configuring startR At BSC, the only configuration step you need to follow is to set up passwordless connection with the HPC. You do not need to follow the complete list of deployment steps under [**Deployment**](inst/doc/deployment.md) since all dependencies are already installed for you to use. @@ -121,7 +126,17 @@ Specifically, you need to set up passwordless, userless access from your machine After following these steps for the connections in both directions (although from the HPC to the workstation might not be possible), you are good to go. -Do not forget adding the following lines in your .bashrc on CTE-Power if you are planning to run on CTE-Power: +Do not forget adding the following lines in your .bashrc on the HPC machine. + +If you are planning to run it on Nord3-v2, you have to add: +``` +if [ $BSC_MACHINE == "nord3v2" ]; then + module purge + module use /gpfs/projects/bsc32/software/suselinux/11/modules/all + module unuse /apps/modules/modulefiles/applications /apps/modules/modulefiles/compilers /apps/modules/modulefiles/tools /apps/modules/modulefiles/libraries /apps/modules/modulefiles/environment +fi +``` +If you are using CTE-Power: ``` if [[ $BSC_MACHINE == "power" ]] ; then module unuse /apps/modules/modulefiles/applications @@ -135,7 +150,7 @@ alias ctp='ssh -XY username@hostname_or_ip' alias start='module load R CDO ecFlow' ``` -## Using startR +## 4. Using startR If you have successfully gone through the configuration steps, you will just need to run the following commands in a terminal session and a fresh R session will pop up with the startR environment ready to use. @@ -150,7 +165,7 @@ The library can be loaded as follows: library(startR) ``` -### Start() +### 4-1. Start() In order for startR to recognize the data sets you want to process, you first need to declare them. The first step in the declaration of a data set is to build a special path string that encodes where all the involved NetCDF files to be processed are stored. It contains some wildcards in those parts of the path that vary across files. This special path string is also called "path pattern". @@ -311,7 +326,7 @@ _**Note 3**_ _on the order of dimensions_: neither the file dimensions nor the i _**Note 4**_ _on the size of the data in R_: if you check the size of the involved file in the example `Start()` call used above ('/esarchive/exp/ecmwf/system5_m1/6hourly/tas/tas_19930101.nc'), you will realize it only weighs 34GB. Why is the data reported to occupy 134GB then? This is due to two facts: by one side, NetCDF files are usually compressed, and their uncompressed size can be substantially greater. In this case, the uncompressed data would occupy about 72GB. Besides, the variable we are targetting in the example ('tas') is defined as a float variable inside the NetCDF file. This means each value is a 4-byte real number. However, R cannot represent 4-byte real numbers; it always takes 8 bytes to represent a real number. This is why, when float numbers are represented in R, the effective size of the data is doubled. In this case, the 72GB of uncompressed float numbers need to be represented using 132GB of RAM in R. -### Step() and AddStep() +### 4-2. Step() and AddStep() Once the data sources are declared, you can define the operation to be applied to them. The operation needs to be encapsulated in the form of an R function receiving one or more multidimensional arrays with named dimensions (plus additional helper parameters) and returning one or more multidimensional arrays, which should also have named dimensions. For example: @@ -352,11 +367,11 @@ If the step involved more than one data source, a list of data sources could be It is not possible for now to define workflows with more than one step, but this is not a crucial gap since a step function can contain more than one statistical analysis procedure. Furthermore, it is usually enough to perform only the first or two first steps of the analysis workflow on the HPCs because, after these steps, the volume of data involved is reduced substantially and the analysis can go on with conventional methods. -### Compute() +### 4-3. Compute() Once the data sources are declared and the workflow is defined, you can proceed to specify the execution parameters (including which platform to run on) and trigger the execution with the `Compute()` function. -Next, a few examples are shown with `Compute()` calls to trigger the processing of a dataset locally (only on the machine where the R session is running) and on two different HPCs (the Earth Sciences fat nodes and CTE-Power9). However, let's first define a `Start()` call that involves a smaller subset of data in order not to make the examples too heavy. +Next, a few examples are shown with `Compute()` calls to trigger the processing of a dataset locally (only on the machine where the R session is running) and different HPCs (Nord3-v2, CTE-Power9 and other HPCs). However, let's first define a `Start()` call that involves a smaller subset of data in order not to make the examples too heavy. ```r library(startR) @@ -406,7 +421,7 @@ Warning messages: ! dimension with pattern specifications. ``` -#### Compute() locally +#### 4-3-1. Compute() locally When only your own workstation is available, startR can still be useful to process a very large dataset by chunks, thus avoiding a RAM memory overload and consequent crash of the workstation. startR will simply load and process the dataset by chunks, one after the other. The operations defined in the workflow will be applied to each chunk, and the results will be stored on a temporary file. `Compute()` will finally gather and merge the results of each chunk and return a single data object, including one or multiple multidimensional data arrays, and additional metadata. @@ -551,34 +566,39 @@ res <- Compute(wf, * max: 8.03660178184509 ``` -#### Compute() on CTE-Power 9 +#### 4-3-2. Compute() on HPCs -In order to run the computation on a HPC, such as the BSC CTE-Power 9, you will need to make sure the passwordless connection with the login node of that HPC is configured, as shown at the beginning of this guide. If possible, in both directions. Also, you will need to know whether there is a shared file system between your workstation and that HPC, and will need information on the number of nodes, cores per node, threads per core, RAM memory per node, and type of workload used by that HPC (Slurm, PBS and LSF supported). +In order to run the computation on a HPC, you will need to make sure the passwordless connection with the login node of that HPC is configured, as shown at the beginning of this guide. If possible, in both directions. Also, you will need to know whether there is a shared file system between your workstation and that HPC, and will need information on the number of nodes, cores per node, threads per core, RAM memory per node, and type of workload used by that HPC (Slurm, PBS and LSF supported). You will need to add two parameters to your `Compute()` call: `cluster` and `ecflow_suite_dir`. The parameter `ecflow_suite_dir` expects a path to a folder in the workstation where to store temporary files generated for the automatic management of the workflow. As you will see later, the EC-Flow workflow manager is used transparently for this purpose. -The parameter `cluster` expects a list with a number of components that will have to be provided a bit differently depending on the HPC you want to run on. You can see next an example cluster configuration that will execute the previously defined workflow on CTE-Power 9. +The parameter `cluster` expects a list with a number of components that will have to be provided a bit differently depending on the HPC you want to run on. You can see next an example cluster configuration that will execute the previously defined workflow on Nord3-v2. ```r -res <- Compute(wf, - chunks = list(latitude = 2, - longitude = 2), - threads_load = 2, - threads_compute = 4, - cluster = list(queue_host = 'p9login1.bsc.es', - queue_type = 'slurm', - temp_dir = '/gpfs/scratch/bsc32/bsc32473/startR_hpc/', - r_module = 'R/3.5.0-foss-2018b', - cores_per_job = 4, - job_wallclock = '00:10:00', - max_jobs = 4, - extra_queue_params = list('#SBATCH --mem-per-cpu=3000'), - bidirectional = FALSE, - polling_period = 10 - ), - ecflow_suite_dir = '/home/Earth/nmanuben/startR_local/' - ) + # user-defined + temp_dir <- '/gpfs/scratch/bsc32/bsc32734/startR_hpc/' + ecflow_suite_dir <- '/home/Earth/aho/startR_local/' + + res <- Compute(wf, + chunks = list(latitude = 2, + longitude = 2), + threads_load = 2, + threads_compute = 4, + cluster = list(queue_host = 'nord4.bsc.es', + queue_type = 'slurm', + temp_dir = temp_dir, + r_module = 'R/4.1.2-foss-2019b' + cores_per_job = 4, + job_wallclock = '00:10:00', + max_jobs = 4, + extra_queue_params = list('#SBATCH --constraint=medmem'), + bidirectional = FALSE, + polling_period = 10 + ), + ecflow_suite_dir = ecflow_suite_dir, + wait = TRUE + ) ``` The cluster components and options are explained next: @@ -590,9 +610,10 @@ The cluster components and options are explained next: - `cores_per_job`: number of computing cores to be requested when submitting the job for each chunk to the HPC queue. Each node may be capable of supporting more than one computing thread. - `job_wallclock`: amount of time to reserve the resources when submitting the job for each chunk. Must follow the specific format required by the specified `queue_type`. - `max_jobs`: maximum number of jobs (chunks) to be queued simultaneously onto the HPC queue. Submitting too many jobs could overload the bandwidth between the HPC nodes and the storage system, or could overload the queue system. -- `extra_queue_params`: list of character strings with additional queue headers for the jobs to be submitted to the HPC. Mainly used to specify the amount of memory to book for each job (e.g. '#SBATCH --mem-per-cpu=30000'), to request special queuing (e.g. '#SBATCH --qos=bsc_es'), or to request use of specific software (e.g. '#SBATCH --reservation=test-rhel-7.5'). +- `extra_queue_params`: list of character strings with additional queue headers for the jobs to be submitted to the HPC. Mainly used to specify the amount of memory to book for each job (e.g. '#SBATCH --mem-per-cpu=30000'; __NOTE: this line does not work on Nord3v2__), to request special queuing (e.g. '#SBATCH --qos=bsc_es'), or to request use of specific software (e.g. '#SBATCH --reservation=test-rhel-7.5'). - `bidirectional`: whether the connection between the R workstation and the HPC login node is bidirectional (TRUE) or unidirectional from the workstation to the login node (FALSE). - `polling_period`: when the connection is unidirectional, the workstation will ask the HPC login node for results each `polling_period` seconds. An excessively small value can overload the login node or result in temporary banning. +- `special_setup`: name of the machine if the computation requires an special setup. Only Marenostrum 4 needs this parameter (e.g. special_setup = 'marenostrum4'). After the `Compute()` call is executed, an EC-Flow server is automatically started on your workstation, which will orchestrate the work and dispatch jobs onto the HPC. Thanks to the use of EC-Flow, you will also be able to monitor visually the progress of the execution. See the "Collect and the EC-Flow GUI" section. @@ -609,15 +630,15 @@ server is already started At this point, you may want to check the jobs are being dispatched and executed properly onto the HPC. For that, you can either use the EC-Flow GUI (covered in the next section), or you can `ssh` to the login node of the HPC and check the status of the queue with `squeue` or `qstat`, as shown below. ``` -[bsc32473@p9login1 ~]$ squeue +[bsc32734@login4 ~]$ squeue JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 1142418 main /STARTR_ bsc32473 R 0:12 1 p9r3n08 - 1142419 main /STARTR_ bsc32473 R 0:12 1 p9r3n08 - 1142420 main /STARTR_ bsc32473 R 0:12 1 p9r3n08 - 1142421 main /STARTR_ bsc32473 R 0:12 1 p9r3n08 + 757026 main /STARTR_ bsc32734 R 0:46 1 s02r2b24 + 757027 main /STARTR_ bsc32734 R 0:46 1 s04r1b61 + 757028 main /STARTR_ bsc32734 R 0:46 1 s04r1b63 + 757029 main /STARTR_ bsc32734 R 0:46 1 s04r1b64 ``` -Here the output of the execution on CTE-Power 9 after waiting for about a minute: +Here the output of the execution after waiting for about a minute: ```r * Remaining time estimate (neglecting queue and merge time) (at * 2019-01-28 01:16:59): 0 mins (46.22883 secs per chunk) @@ -665,55 +686,33 @@ Usually, in use cases with larger data inputs, it will be preferrable to add the As mentioned above in the definition of the `cluster` parameters, it is strongly recommended to check the section on "How to choose the number of chunks, jobs and cores". -#### Compute() on the fat nodes and other HPCs +You can find the `cluster` configuration for other HPCs at the end of this guide [Compute() cluster templates](#compute-cluster-templates) -The `Compute()` call with the parameters to run the example in this section on the BSC ES fat nodes is provided below (you will need to adjust some of the parameters before using it). As you can see, the only thing that needs to be changed to execute startR on a different HPC is the definition of the `cluster` parameters. -The `cluster` configuration for the fat nodes, CTE-Power 9, Marenostrum 4, Nord III, Minotauro and ECMWF cca/ccb are all provided at the very end of this guide. - -```r -res <- Compute(wf, - chunks = list(latitude = 2, - longitude = 2), - threads_load = 2, - threads_compute = 4, - cluster = list(queue_host = 'bsceslogin01.bsc.es', - queue_type = 'slurm', - temp_dir = '/home/Earth/nmanuben/startR_hpc/', - cores_per_job = 2, - job_wallclock = '00:10:00', - max_jobs = 4, - bidirectional = TRUE - ), - ecflow_suite_dir = '/home/Earth/nmanuben/startR_local/') -``` - -### Collect() and the EC-Flow GUI +### 4-4. Collect() and the EC-Flow GUI Usually, in use cases where large data inputs are involved, it is convenient to add the parameter `wait = FALSE` to your `Compute()` call. With this parameter, `Compute()` will immediately return an object with information about your startR execution. You will be able to store this object onto disk. After doing that, you will not need to worry in case your workstation turns off in the middle of the computation. You will be able to close your R session, and collect the results later on with the `Collect()` function. ```r -res <- Compute(wf, - chunks = list(latitude = 2, - longitude = 2), - threads_load = 2, - threads_compute = 4, - cluster = list(queue_host = 'p9login1.bsc.es', - queue_type = 'slurm', - temp_dir = '/gpfs/scratch/bsc32/bsc32473/startR_hpc/', - r_module = 'R/3.5.0-foss-2018b', - cores_per_job = 4, - job_wallclock = '00:10:00', - max_jobs = 4, - extra_queue_params = list('#SBATCH --mem-per-cpu=3000'), - bidirectional = FALSE, - polling_period = 10 - ), - ecflow_suite_dir = '/home/Earth/nmanuben/startR_local/', - wait = FALSE - ) - -saveRDS(res, file = 'test_collect.Rds') + res <- Compute(wf, + chunks = list(latitude = 2, + longitude = 2), + threads_load = 2, + threads_compute = 4, + cluster = list(queue_host = 'nord4.bsc.es', + queue_type = 'slurm', + temp_dir = '/gpfs/scratch/bsc32/bsc32734/startR_hpc/', + cores_per_job = 4, + job_wallclock = '00:10:00', + max_jobs = 4, + extra_queue_params = list('#SBATCH --constraint=medmem'), + bidirectional = FALSE, + polling_period = 10 + ), + ecflow_suite_dir = '/home/Earth/aho/startR_local/', + wait = FALSE + ) + saveRDS(res, file = 'test_collect.Rds') ``` At this point, after storing the descriptor of the execution and before calling `Collect()`, you may want to visually check the status of the execution. You can do that with the EC-Flow graphical user interface. You need to open a new terminal, load the EC-Flow module if needed, and start the GUI: @@ -778,9 +777,10 @@ You can also run `Collect()` with `wait = FALSE`. This will crash with an error `Collect()` also has a parameter called `remove`, which by default is set to `TRUE` and triggers removal of all data results received from the HPC (and stored under `ecflow_suite_dir`). If you would like to preserve the data, you can set `remove = FALSE` and `Collect()` it as many times as desired. Alternatively, you can `Collect()` with `remove = TRUE` and store the merged array right after with `saveRDS()`. -## Additional information +## 5. Additional information +You can find more FAQ in [faq.md](inst/doc/faq.md). -### How to choose the number of chunks, jobs and cores +### 5-1. How to choose the number of chunks, jobs and cores #### Number of chunks and memory per job @@ -808,7 +808,7 @@ The number of cores per job should be as large as possible, with two limitations - If the HPC nodes have each "M" total amount of memory with "m" amount of memory per memory module, and have each "N" amount of cores, the requested amount of cores "n" should be such that "n" / "N" does not exceed "m" / "M". - Depending on the shape of the chunks, startR has a limited capability to exploit multiple cores. It is recommended to make small tests increasing the number of cores to work out a reasonable number of cores to be requested. -### How to clean a failed execution +### 5-2. How to clean a failed execution - Work out the startR execution ID, either by inspecting the execution description by `Compute()` when called with the parameter `wait = FALSE`, or by checking the `ecflow_suite_dir` with `ls -ltr`. - ssh to the HPC login node and cancel all jobs of your startR execution. @@ -818,7 +818,7 @@ The number of cores per job should be as large as possible, with two limitations - Optionally remove the data under `data_dir` on the HPC login node if the file system is not shared between the workstation and the HPC and you do not want to keep the data in the `data_dir`, used as caché for future startR executions. - Open the EC-Flow GUI and remove the workflow entry (a.k.a. suite) named with your startR execution ID with right click > "Remove". -### Visualizing the profiling of the execution +### 5-3. Visualizing the profiling of the execution As seen in previous sections, profiling measurements of the execution are provided together with the data output. These measurements can be visualized with the `PlotProfiling()` function made available in the source code of the startR package. @@ -836,16 +836,17 @@ A chart displays the timings for the different stages of the computation, as sho You can click on the image to expand it. -### What to do if your function has too many target dimensions -### Pending features -- Adding feature for `Copute()` to run on multiple HPCs or workstations. +### 5-4. Pending features +- Adding feature for `Compute()` to run on multiple HPCs or workstations. - Adding plug-in to read CSV files. - Supporting multiple steps in a workflow defined by `AddStep()`. - Adding feature in `Start()` to read sparse grid points. - Allow for chunking along "essential" (a.k.a. "target") dimensions. -## Other examples + +## 6. Other examples +You can find more use cases in [usecase.md](inst/doc/usecase.md). ### Using experimental and (date-corresponding) observational data @@ -909,10 +910,6 @@ res <- Compute(step, list(system4, erai), wait = FALSE) ``` -### Example of computation of weekly means - -### Example with data on an irregular grid with selection of a region - ### Example on MareNostrum 4 ```r @@ -1015,7 +1012,38 @@ r <- Compute(wf, ) ``` -## Compute() cluster templates +## 7. Compute() cluster templates + +### Nord3-v2 + +```r +cluster = list(queue_host = 'nord4.bsc.es', + queue_type = 'slurm', + temp_dir = '/gpfs/scratch/bsc32/bsc32734/startR_hpc/', + cores_per_job = 2, + job_wallclock = '01:00:00', + max_jobs = 4, + bidirectional = FALSE, + polling_period = 10 + ) +``` + +### Nord3 (deprecated) + +```r +cluster = list(queue_host = 'nord1.bsc.es', + queue_type = 'lsf', + data_dir = '/gpfs/projects/bsc32/share/startR_data_repos/', + temp_dir = '/gpfs/scratch/bsc32/bsc32473/startR_hpc/', + init_commands = list('module load intel/16.0.1'), + cores_per_job = 2, + job_wallclock = '00:10', + max_jobs = 4, + extra_queue_params = list('#BSUB -q bsc_es'), + bidirectional = FALSE, + polling_period = 10 + ) +``` ### CTE-Power9 @@ -1023,7 +1051,6 @@ r <- Compute(wf, cluster = list(queue_host = 'p9login1.bsc.es', queue_type = 'slurm', temp_dir = '/gpfs/scratch/bsc32/bsc32473/startR_hpc/', - r_module = 'R/3.5.0-foss-2018b', cores_per_job = 4, job_wallclock = '00:10:00', max_jobs = 4, @@ -1032,7 +1059,7 @@ cluster = list(queue_host = 'p9login1.bsc.es', ) ``` -### BSC ES fat nodes +### BSC ES fat nodes (deprecated) ```r cluster = list(queue_host = 'bsceslogin01.bsc.es', @@ -1062,25 +1089,6 @@ cluster = list(queue_host = 'mn2.bsc.es', ) ``` -### Nord III - -```r -cluster = list(queue_host = 'nord1.bsc.es', - queue_type = 'lsf', - data_dir = '/gpfs/projects/bsc32/share/startR_data_repos/', - temp_dir = '/gpfs/scratch/bsc32/bsc32473/startR_hpc/', - init_commands = list('module load intel/16.0.1'), - r_module = 'R/3.3.0', - cores_per_job = 2, - job_wallclock = '00:10', - max_jobs = 4, - extra_queue_params = list('#BSUB -q bsc_es'), - bidirectional = FALSE, - polling_period = 10, - special_setup = 'marenostrum4' - ) -``` - ### MinoTauro ```r diff --git a/inst/doc/tutorial/PATC2022/Figures/.gitkeep b/inst/doc/tutorial/PATC2022/Figures/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/inst/doc/tutorial/PATC2022/Figures/handson_4_fig_rmsss.png b/inst/doc/tutorial/PATC2022/Figures/handson_4_fig_rmsss.png new file mode 100644 index 0000000000000000000000000000000000000000..1633a1c01aee042a5fb82e3277250338d81e4893 Binary files /dev/null and b/inst/doc/tutorial/PATC2022/Figures/handson_4_fig_rmsss.png differ diff --git a/inst/doc/tutorial/PATC2022/Figures/handson_4_fig_rpss.png b/inst/doc/tutorial/PATC2022/Figures/handson_4_fig_rpss.png new file mode 100644 index 0000000000000000000000000000000000000000..6277483b3b0ece5a2d54db69ecd897b90e1e45aa Binary files /dev/null and b/inst/doc/tutorial/PATC2022/Figures/handson_4_fig_rpss.png differ diff --git a/inst/doc/tutorial/PATC2022/Figures/handson_5_fig1.png b/inst/doc/tutorial/PATC2022/Figures/handson_5_fig1.png new file mode 100644 index 0000000000000000000000000000000000000000..30be4766de563f11be0a786400997907abe5c97b Binary files /dev/null and b/inst/doc/tutorial/PATC2022/Figures/handson_5_fig1.png differ diff --git a/inst/doc/tutorial/PATC2022/Figures/handson_5_fig2.png b/inst/doc/tutorial/PATC2022/Figures/handson_5_fig2.png new file mode 100644 index 0000000000000000000000000000000000000000..1db8c76efb51d65fe0e1002a41bbbf792e1f4860 Binary files /dev/null and b/inst/doc/tutorial/PATC2022/Figures/handson_5_fig2.png differ diff --git a/inst/doc/tutorial/PATC2022/Figures/handson_5_fig3.png b/inst/doc/tutorial/PATC2022/Figures/handson_5_fig3.png new file mode 100644 index 0000000000000000000000000000000000000000..1d1aba9fd14f6f52b9a73c674360506d1d930079 Binary files /dev/null and b/inst/doc/tutorial/PATC2022/Figures/handson_5_fig3.png differ diff --git a/inst/doc/tutorial/PATC2022/Figures/handson_5_fig4.png b/inst/doc/tutorial/PATC2022/Figures/handson_5_fig4.png new file mode 100644 index 0000000000000000000000000000000000000000..00e1730deaa35a0e137dfe0ce91dcd9882c47e6e Binary files /dev/null and b/inst/doc/tutorial/PATC2022/Figures/handson_5_fig4.png differ diff --git a/inst/doc/tutorial/PATC2022/handson_3-rainfarm.md b/inst/doc/tutorial/PATC2022/handson_3-rainfarm.md new file mode 100644 index 0000000000000000000000000000000000000000..a7098b5e291be30dab46329ca4be50b2f9e01e92 --- /dev/null +++ b/inst/doc/tutorial/PATC2022/handson_3-rainfarm.md @@ -0,0 +1,131 @@ +# Hands-on 3: Load data by startR + +## Goal +Use startR to load the data used in CSTools [RainFARM vignette](https://earth.bsc.es/gitlab/external/cstools/-/blob/master/vignettes/RainFARM_vignette.Rmd). Learn how to adjust data while loading data. + +## 0. Load required packages + +```r +# Clean the session +rm(list = ls()) + +library(startR) +library(CSTools) +``` + +## 1. Load data from data repository (esarchive/) + +**Data description**: +This sample data set contains a small cutout of gridded seasonal precipitation +forecast data from the Copernicus Climate Change ECMWF-System 5 forecast system. +Specifically, for the 'prlr' (precipitation) variable, for the first 6 forecast +ensemble members, daily values, for all 31 days in March following the forecast +starting dates in November of years 2010 to 2012, for a small 4x4 pixel cutout in +a region in the North-Western Italian Alps (44N-47N, 6E-9E). The data resolution is 1 degree. + +Use the above information to define the variable, start dates, longitude and latitude. + +```r + # Use this one if on workstation or nord3 (have access to /esarchive) + repos <- "/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_s0-24h/$var$_$sdate$.nc" + # Use this one if on Marenostrum4 and log in with PATC2021 account + repos <- paste0('/gpfs/scratch/nct01/nct01127/d3_R_handson/esarchive/', + 'exp/ecmwf/system5c3s/daily_mean/', + '$var$_s0-24h/$var$_$sdate$.nc') + + var <- ______ + sdate <- paste0(______, '1101') + lon.min <- ______ + lon.max <- ______ + lat.min <- ______ + lat.max <- ______ +``` + +Calculate the forecast time step by package "lubridate". Desired days: the whole March (leap year ignored). +Check the usage of the following functions by type `?` or check official website https://lubridate.tidyverse.org/reference/. + +```r + library(lubridate) + ## Check which year is leap year + leap_year(2011:2013) + ## Find the first time step we want + ftime_s <- as.integer(ymd("2011-03-01", tz = "UTC") - ymd("2010-11-01", tz = "UTC")) + 1 + ftime <- ftime_s:(ftime_s + 30) + print(ftime) +``` + +Use Start() to load the data. + +```r + data <- Start(dat = repos, + var = var, + sdate = sdate, + ensemble = indices(______), + time = ftime, + latitude = values(list(lat.min, lat.max)), + latitude_reorder = Sort(), + longitude = values(list(lon.min, lon.max)), + longitude_reorder = CircularSort(0, 360), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = NULL, latitude = NULL), + retrieve = TRUE) +``` + + +## 2. Compare with CSTools lonlat_prec + +Load CSTools data `lonlat_prec`. + +```r + stored_data <- CSTools::lonlat_prec +``` + +### Dimensions + +```r + dim(data) + dim(stored_data$data) +``` + +### Dates + +```r + data_time_attr <- attr(data, 'Variables')$common$time + dim(data_time_attr) + dim(stored_data$Dates$start) + str(stored_data$Dates$start) +``` + +### Latitude and Longitude + +```r + data_lat_attr <- attr(data, 'Variables')$common$latitude + as.vector(data_lat_attr) + as.vector(stored_data$lat) + + data_lon_attr <- attr(data, 'Variables')$common$longitude + as.vector(data_lon_attr) + as.vector(stored_data$lon) +``` + +## 3. Adjust Start() call to get more consistent result with lonlat_prec + +You should already see that the output of Start() `data` does have the same values +as `lonlat_prec`, but there are some differences. For example, the "member" and "time" +dimensions have different names; latitude vector order is different. + +1. We can use parameter `synonims` to change the dimension name freely. +In Start(), change `ensemble` to `member` and `time` to `ftime`, and define their synonims. + +2. Sort() in startR is a wrapper function of base function sort(). You can find it in Start() +call above, `latitude_reorder = Sort()`. Type `?sort` in R console to find how to change +latitude to descending order. + +3. To switch `member` and `sdate` dimension order, simply switch the order in Start() call. + +```r +# Write a new Start() call based on the previous script + data <- Start(...) +``` diff --git a/inst/doc/tutorial/PATC2022/handson_3-rainfarm_ans.md b/inst/doc/tutorial/PATC2022/handson_3-rainfarm_ans.md new file mode 100644 index 0000000000000000000000000000000000000000..23c3ea804a550e49b35d2e1a87f14faebc0f4462 --- /dev/null +++ b/inst/doc/tutorial/PATC2022/handson_3-rainfarm_ans.md @@ -0,0 +1,176 @@ +# Hands-on 3: Load data by startR + +## Goal +Use startR to load the data used in CSTools [RainFARM vignette](https://earth.bsc.es/gitlab/external/cstools/-/blob/master/vignettes/RainFARM_vignette.Rmd). Learn how to adjust data while loading data. + +## 0. Load required packages + +```r +# Clean the session +rm(list = ls()) + +library(startR) +library(CSTools) +``` + +## 1. Load data from data repository (esarchive/) + +**Data description**: +This sample data set contains a small cutout of gridded seasonal precipitation +forecast data from the Copernicus Climate Change ECMWF-System 5 forecast system. +Specifically, for the 'prlr' (precipitation) variable, for the first 6 forecast +ensemble members, daily values, for all 31 days in March following the forecast +starting dates in November of years 2010 to 2012, for a small 4x4 pixel cutout in +a region in the North-Western Italian Alps (44N-47N, 6E-9E). The data resolution is 1 degree. + +Use the above information to define the variable, start dates, longitude and latitude. + +```r + # Use this one if on workstation or nord3 (have access to /esarchive) + repos <- "/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_s0-24h/$var$_$sdate$.nc" + # Use this one if on Marenostrum4 and log in with PATC2021 account + repos <- paste0('/gpfs/scratch/nct01/nct01127/d3_R_handson/esarchive/', + 'exp/ecmwf/system5c3s/daily_mean/', + '$var$_s0-24h/$var$_$sdate$.nc') + + var <- 'prlr' + sdate <- paste0(2010:2012, '1101') + lon.min <- 6 + lon.max <- 9 + lat.min <- 44 + lat.max <- 47 +``` + +Calculate the forecast time step by package "lubridate". Desired days: the whole March (leap year ignored). +Check the usage of the following functions by type `?` or check official website https://lubridate.tidyverse.org/reference/. + +```r + library(lubridate) + ## Check which year is leap year + leap_year(2011:2013) + ## Find the first time step we want + ftime_s <- as.integer(ymd("2011-03-01", tz = "UTC") - ymd("2010-11-01", tz = "UTC")) + 1 + ftime <- ftime_s:(ftime_s + 30) + print(ftime) +# [1] 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 +#[20] 140 141 142 143 144 145 146 147 148 149 150 151 + +``` + +Use Start() to load the data. + +```r + data <- Start(dat = repos, + var = var, + sdate = sdate, + ensemble = indices(1:6), + time = ftime, + latitude = values(list(lat.min, lat.max)), + latitude_reorder = Sort(), + longitude = values(list(lon.min, lon.max)), + longitude_reorder = CircularSort(0, 360), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = NULL, latitude = NULL), + retrieve = TRUE) +``` + + +## 2. Compare with CSTools lonlat_prec + +Load CSTools data `lonlat_prec`. + +```r + stored_data <- CSTools::lonlat_prec +``` + +### Dimensions + +```r + dim(data) +# dat var sdate ensemble time latitude longitude +# 1 1 3 6 31 4 4 + + dim(stored_data$data) +#dataset member sdate ftime lat lon +# 1 6 3 31 4 4 +``` + +### Dates + +```r + data_time_attr <- attr(data, 'Variables')$common$time + dim(data_time_attr) +#sdate time +# 3 31 + + dim(stored_data$Dates$start) +#NULL + + str(stored_data$Dates$start) +# POSIXct[1:93], format: "2011-03-01" "2011-03-02" "2011-03-03" "2011-03-04" "2011-03-05" ... + +``` + +### Latitude and Longitude + +```r + data_lat_attr <- attr(data, 'Variables')$common$latitude + as.vector(data_lat_attr) +#[1] 44 45 46 47 + + as.vector(stored_data$lat) +#[1] 47 46 45 44 + + data_lon_attr <- attr(data, 'Variables')$common$longitude + as.vector(data_lon_attr) +#[1] 6 7 8 9 + + as.vector(stored_data$lon) +#[1] 6 7 8 9 + +``` + +## 3. Adjust Start() call to get more consistent result with lonlat_prec + +You should already see that the output of Start() `data` does have the same values +as `lonlat_prec`, but there are some differences. For example, the "member" and "time" +dimensions have different names; the dimension orders are different; latitude vector order is different. + +1. We can use parameter `synonims` to change the dimension name freely. +In Start(), change `ensemble` to `member` and `time` to `ftime`, and define their synonims. + +2. Sort() in startR is a wrapper function of base function sort(). You can find it in Start() +call above, `latitude_reorder = Sort()`. Type `?sort` in R console to find how to change +latitude to descending order. + +3. To switch `member` and `sdate` dimension order, simply switch the order in Start() call. + +```r + data <- Start(dat = repos, + var = var, + member = indices(1:6), + sdate = sdate, + ftime = ftime, + latitude = values(list(lat.min, lat.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lon.min, lon.max)), + longitude_reorder = CircularSort(0, 360), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + member = c('member', 'ensemble'), + ftime = c('ftime', 'time')), + return_vars = list(ftime = 'sdate', + longitude = NULL, latitude = NULL), + retrieve = TRUE) + + dim(data) +# dat var member sdate ftime latitude longitude +# 1 1 6 3 31 4 4 + + data_lat_attr <- attr(data, 'Variables')$common$latitude + as.vector(data_lat_attr) +#[1] 47 46 45 44 + +``` diff --git a/inst/doc/tutorial/PATC2022/handson_4-skill_workflow.md b/inst/doc/tutorial/PATC2022/handson_4-skill_workflow.md new file mode 100644 index 0000000000000000000000000000000000000000..80a6c54507a5afe58d824184e84b32d74dff5147 --- /dev/null +++ b/inst/doc/tutorial/PATC2022/handson_4-skill_workflow.md @@ -0,0 +1,192 @@ +# Hands-on 4: Seasonal forecast verification + +## Goal +Learn how to use the startR workflow to finish a piece of analysis, including defining and pre-processing the desired data, +defining the operation, building the workflow, and executing the operation. + +In this use case, the ranked probability skill score (RPSS) and the root mean square error skill score (RMSSS) are computed to verify the forecast. +To make the process faster, the required data size is small here so we can run the execution on workstation. + +## 0. Load required packages + +```r +# Clean the session +rm(list = ls()) + +library(startR) +library(s2dv) +``` + +## 1. Define data +The first step of the analysis is use Start() to identify the data needed. + +Hints: +(1) The data paths use two '$' as the wildcard, and it can be given any names you like. +(2) The variable we want to use is 'tas'. +(3) The required time (sdate) period is November 1991 to 2011. +(4) Read global data (i.e., full spatial grids.) +(5) Take all the ensemble members. +(6) Because we are going to use the whole startR workflow, we only need to create a pointer to the data repository rather than retrieve it to the workstation. + +```r +# exp data + # Use this one if on workstation or nord3 (have access to /esarchive) + repos_exp <- paste0('/esarchive/exp/ecmwf/system5c3s/monthly_mean/', + '$var$_f6h/$var$_$sdate$.nc') + # Use this one if on Marenostrum4 and log in with PATC2021 account + repos_exp <- paste0('/gpfs/scratch/nct01/nct01127/d3_R_handson/esarchive/', + 'exp/ecmwf/system5c3s/monthly_mean/', + '$var$_f6h/$var$_$sdate$.nc') + + sdates_exp <- sapply(1991:2011, function(x) paste0(x, '1101')) + + exp <- Start(dat = repos_exp, + var = ______, + sdate = sdates_exp, + time = indices(1), + ensemble = ______, + latitude = ______, + latitude_reorder = Sort(), + longitude = ______, + longitude_reorder = CircularSort(0, 360), + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + return_vars = list(latitude = NULL, longitude = NULL, time = 'sdate'), + retrieve = FALSE) + +# obs data + # Use this one if on workstation or nord3 (have access to /esarchive) + repos_obs <- paste0('/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r360x180/', + '$var$_$sdate$.nc') + # Use this one if on Marenostrum4 and log in with PATC2022 account + repos_obs <- paste0('/gpfs/scratch/nct01/nct01127/d3_R_handson/esarchive/', + 'recon/ecmwf/era5/monthly_mean/', + '$var$_f1h-r360x180/$var$_$sdate$.nc') + + sdates_obs <- sapply(1991:2011, function(x) paste0(x, '11')) + + obs <- Start(dat = repos_obs, + var = ______, + sdate = sdates_obs, + time = indices(1), + latitude = ______, + latitude_reorder = Sort(), + longitude = ______, + longitude_reorder = CircularSort(0, 360), + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + return_vars = list(latitude = NULL, longitude = NULL, time = 'sdate'), + retrieve = FALSE) +``` + +Question: + +1. What are the dimensions of these two data? + +2. What is the size of these two data? + +3. Check the latitude, longitude, and time values of the two data. Are they consistent? + + +## 2. Define operation and workflow +It is recommended to define the function and write Step() together, because the latter one helps you clarify the input and output dimensions of the function. + +In the self-defined function, we want to use the two functions, 's2dv::RPSS' and 's2dv::RMSSS', to calculate the skill scores. +You can check the functions by typing `?s2dv::RPSS` and `?s2dv::RMSSS`, or on [s2dv GitLab](https://earth.bsc.es/gitlab/es/s2dv/-/blob/master/R/). + +Hint: +(1) The self-defined function is for exp and obs data. Therefore, target_dims and output_dims in Step() should be a list with two vector elements. +(2) The essential dimensions are "ensemble" and "sdate" for `exp`, and "sdate" for `obs`. These dimensions should be the 'target_dims' in Step(). +(3) To return the two skill scores as well as the significance test results, put them in a list of 4 elements. +(4) What are the dimensions of the outputs? These dimensions should be 'output_dims' in Step(). +(5) The first input of AddStep() should also be a list containing exp and obs. + +```r + # self-defined function + skill_func <- function(exp, obs) { + # exp: [ensemble, sdate] + # obs: [sdate] + + # Calculate RPSS + rpss <- s2dv::RPSS(exp, obs, memb_dim = ______) + + # Calculate RMSSS + ## ensemble mean + exp_ens <- s2dv::MeanDims(exp, ______, na.rm = T) + rmsss <- s2dv::RMSSS(exp_ens, obs, time_dim = ______, dat_dim = NULL) + + return(list(rpss = rpss$rpss, rpss_sign = rpss$sign, + rmsss = rmsss$rmsss, rmsss_pval = rmsss$p.val)) + } + step <- Step(skill_func, target_dims = list(exp = ______, obs = ______), + output_dims = list(rpss = NULL, rpss_sign = NULL, rmsss = NULL, rmsss_pval = NULL)) + wf <- AddStep(list(exp, obs), step) +``` + +Question: +1. Which dimensions are used in operation? Which dimensions are free? + +2. Can we use more dimensions as target_dims? What are the disadvantages? + + +## 3. Execute locally +To avoid potential technical problems in the connection and configuration, +we choose to run the execution locally. +Noted that it is recommended to submit jobs to HPCs if the data size is big or +the operation is heavy. + +Hint: +(1) Use the free dimensions (i.e., those are not 'target_dims' in Step()) to chunk. +(2) It is safe to divide the data into pieces of which the size each is 1/2-1/3 of RAM. In this case, +the data size is only around 270Mb, so you can play with it without worrying about crashing the terminal. + +```r + res <- Compute(wf$rpss, + chunks = list(______), + threads_load = 2, + threads_compute = 4) +``` + +## 4. Check the results + +1. What is the Compute summary printed on the screen? Do you see the total time, time for load, time for compute? + +2. Check the list structure using function `str()`. What are the items in the list? + +3. What are the dimensions of each item? + +4. Plot the maps + +Use `s2dv::PlotEquiMap` or other visualization tools to plot the maps. Check the usage of PlotEquiMap() on s2dv GitLab or type `?s2dv::PlotEquiMap`. + +```r + # Get latitude and longitude from the attributes + lat <- as.vector(attr(exp, 'Variables')$common$latitude) + lon <- as.vector(attr(exp, 'Variables')$common$longitude) + + # Set the color bars: check data range to decide the appropriate color bar + print(range(res$rpss, na.rm = TRUE)) + brks_rpss <- seq(______, ______, by = ______) + print(range(res$rmsss, na.rm = TRUE)) + brks_rmsss <- seq(______, ______, by = ______) + + # Plot RPSS + ##NOTE: Remove 'fileout' to view the figure in pop-up window + PlotEquiMap(______, lon = lon, lat = lat, + dots = ______, dot_size = 1.5, dot_symbol = 3, + brks = brks_rpss, + filled.continents = FALSE, triangle_ends = c(TRUE, TRUE), + toptitle = 'ECMWF system5 monthly mean tas RPSS 1991-2011', title_scale = 0.6, + fileout = '~/handson_4_fig_rpss.png') + + # Plot RMSSS + PlotEquiMap(______, lon = lon, lat = lat, + color_fun = clim.palette('yellowred'), + brks = brks_rmsss, + filled.continents = FALSE, triangle_ends = c(TRUE, TRUE), + toptitle = 'ECMWF system5 monthly mean tas RMSSS 1991-2011', title_scale = 0.6, + fileout = '~/handson_4_fig_rmsss.png') +``` + + +**BONUS**: Try to use only one thread for loading and computing (i.e., `threads_load = 1`, `threads_compute = 1`.) Is it slower than using more threads? diff --git a/inst/doc/tutorial/PATC2022/handson_4-skill_workflow_ans.md b/inst/doc/tutorial/PATC2022/handson_4-skill_workflow_ans.md new file mode 100644 index 0000000000000000000000000000000000000000..0a19c23c64872f9577d031af3b4fad5d6cda8414 --- /dev/null +++ b/inst/doc/tutorial/PATC2022/handson_4-skill_workflow_ans.md @@ -0,0 +1,314 @@ +# Hands-on 4: Seasonal forecast verification + +## Goal +Learn how to use the startR workflow to finish a piece of analysis, including defining and pre-processing the desired data, +defining the operation, building the workflow, and executing the operation. + +In this use case, the ranked probability skill score (RPSS) and the root mean square error skill score (RMSSS) are computed to verify the forecast. +To make the process faster, the required data size is small here so we can run the execution on workstation. + +## 0. Load required packages + +```r +# Clean the session +rm(list = ls()) + +library(startR) +library(s2dv) +``` + +## 1. Define data +The first step of the analysis is use Start() to identify the data needed. + +Hints: +(1) The data paths use two '$' as the wildcard, and it can be given any names you like. +(2) The variable we want to use is 'tas'. +(3) The required time (sdate) period is November 1991 to 2011. +(4) Read global data (i.e., full spatial grids.) +(5) Take all the ensemble members. +(6) Because we are going to use the whole startR workflow, we only need to create a pointer to the data repository rather than retrieve it to the workstation. + +```r +# exp data + # Use this one if on workstation or nord3 (have access to /esarchive) + repos_exp <- paste0('/esarchive/exp/ecmwf/system5c3s/monthly_mean/', + '$var$_f6h/$var$_$sdate$.nc') + # Use this one if on Marenostrum4 and log in with PATC2021 account + repos_exp <- paste0('/gpfs/scratch/nct01/nct01127/d3_R_handson/esarchive/', + 'exp/ecmwf/system5c3s/monthly_mean/', + '$var$_f6h/$var$_$sdate$.nc') + + sdates_exp <- sapply(1991:2011, function(x) paste0(x, '1101')) + + exp <- Start(dat = repos_exp, + var = 'tas', + sdate = sdates_exp, + time = indices(1), + ensemble = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = CircularSort(0, 360), + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + return_vars = list(latitude = NULL, longitude = NULL, time = 'sdate'), + retrieve = FALSE) + +# obs data + # Use this one if on workstation or nord3 (have access to /esarchive) + repos_obs <- paste0('/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r360x180/', + '$var$_$sdate$.nc') + # Use this one if on Marenostrum4 and log in with PATC2022 account + repos_obs <- paste0('/gpfs/scratch/nct01/nct01127/d3_R_handson/esarchive/', + 'recon/ecmwf/era5/monthly_mean/', + '$var$_f1h-r360x180/$var$_$sdate$.nc') + + sdates_obs <- sapply(1991:2011, function(x) paste0(x, '11')) + + obs <- Start(dat = repos_obs, + var = 'tas', + sdate = sdates_obs, + time = indices(1), + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = CircularSort(0, 360), + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + return_vars = list(latitude = NULL, longitude = NULL, time = 'sdate'), + retrieve = FALSE) +``` + +Question: + +1. What are the dimensions of these two data? +```r +attr(exp, 'Dimensions') + dat var sdate time ensemble latitude longitude + 1 1 21 1 25 181 360 + +attr(obs, 'Dimensions') + dat var sdate time latitude longitude + 1 1 21 1 181 360 +``` + +2. What is the size of these two data? +exp: 261Mb; obs: 10.4Mb as shown on screen from Start() call. + +3. Check the latitude, longitude, and time values of the two data. Are they consistent? + +```r +# latitude +as.vector(attr(exp, 'Variables')$common$lat) +as.vector(attr(obs, 'Variables')$common$lat) +# longitude +as.vector(attr(exp, 'Variables')$common$lon) +as.vector(attr(obs, 'Variables')$common$lon) +# time +attr(exp, 'Variables')$common$time +attr(obs, 'Variables')$common$time +``` + +## 2. Define operation and workflow +It is recommended to define the function and write Step() together, because the latter one helps you clarify the input and output dimensions of the function. + +In the self-defined function, we want to use the two functions, 's2dv::RPSS' and 's2dv::RMSSS', to calculate the skill scores. +You can check the functions by typing `?s2dv::RPSS` and `?s2dv::RMSSS`, or on [s2dv GitLab](https://earth.bsc.es/gitlab/es/s2dv/-/blob/master/R/). + +Hint: +(1) The self-defined function is for exp and obs data. Therefore, target_dims and output_dims in Step() should be a list with two vector elements. +(2) The essential dimensions are "ensemble" and "sdate" for `exp`, and "sdate" for `obs`. These dimensions should be the 'target_dims' in Step(). +(3) To return the two skill scores as well as the significance test results, put them in a list of 4 elements. +(4) What are the dimensions of the outputs? These dimensions should be 'output_dims' in Step(). +(5) The first input of AddStep() should also be a list containing exp and obs. + +```r + # self-defined function + skill_func <- function(exp, obs) { + # exp: [ensemble, sdate] + # obs: [sdate] + + # Calculate RPSS + rpss <- s2dv::RPSS(exp, obs, memb_dim = 'ensemble') + + # Calculate RMSSS + ## ensemble mean + exp_ens <- s2dv::MeanDims(exp, 'ensemble', na.rm = T) + rmsss <- s2dv::RMSSS(exp_ens, obs, time_dim = 'sdate', dat_dim = NULL) + + return(list(rpss = rpss$rpss, rpss_sign = rpss$sign, + rmsss = rmsss$rmsss, rmsss_pval = rmsss$p.val)) + } + step <- Step(skill_func, target_dims = list(exp = c('ensemble', 'sdate'), obs = c('sdate')), + output_dims = list(rpss = NULL, rpss_sign = NULL, rmsss = NULL, rmsss_pval = NULL)) + wf <- AddStep(list(exp, obs), step) +``` + +Question: +1. Which dimensions are used in operation? Which dimensions are free? +'ensemble' and 'sdate' are used in the self-defined function. The other dimensions (dat, var, time, latitude, and longitude) +are not necessary for the function. These free dimensions will be looped by multiApply during the execution. + +2. Can we use more dimensions as target_dims? What are the disadvantages? +It still works if putting more dimensions as target_dims, but we will lose the choices for chunking in the next step. Also, the lighter the function is, the quicker the operation runs. + + +## 3. Execute locally +To avoid potential technical problems in the connection and configuration, +we choose to run the execution locally. +Noted that it is recommended to submit jobs to HPCs if the data size is big or +the operation is heavy. + +Hint: +(1) Use the free dimensions (i.e., those are not 'target_dims' in Step()) to chunk. +(2) It is safe to divide the data into pieces of which the size each is 1/2-1/3 of RAM. In this case, +the data size is only around 270Mb, so you can play with it without worrying about crashing the terminal. + +```r + res <- Compute(wf$rpss, + chunks = list(latitude = 2, longitude = 2), + threads_load = 2, + threads_compute = 4) +``` + +## 4. Check the results + +1. What is the Compute summary printed on the screen? Do you see the total time, time for load, time for compute? + +```r +* Computation ended successfully. +* Number of chunks: 4 +* Max. number of concurrent chunks (jobs): 1 +* Requested cores per job: NA +* Load threads per chunk: 2 +* Compute threads per chunk: 4 +* Total time (s): 331.454513549805 +* Chunking setup: 0.00129866600036621 +* Data upload to cluster: 0 +* All chunks: 328.215093135834 +* Transfer results from cluster: 0 +* Merge: 3.23812174797058 +* Each chunk: +* queue: +* mean: 0 +* min: 0 +* max: 0 +* job setup: +* mean: 0 +* min: 0 +* max: 0 +* load: +* mean: 6.95802348852158 +* min: 6.30401253700256 +* max: 8.70350694656372 +* compute: +* mean: 75.0939234495163 +* min: 74.6092278957367 +* max: 75.3746023178101 +``` + +2. Check the list structure using function `str()`. What are the items in the list? +```r +str(res) +List of 4 + $ rpss : num [1, 1, 1, 1:181, 1:360] 0.1014 -0.0406 0.1024 0.2097 0.1854 ... + $ rpss_sign : logi [1, 1, 1, 1:181, 1:360] FALSE FALSE FALSE FALSE FALSE FALSE ... + $ rmsss : num [1, 1, 1, 1:181, 1:360] 0.994 0.993 0.993 0.993 0.993 ... + $ rmsss_pval: num [1, 1, 1, 1:181, 1:360] 0 0 0 0 0 0 0 0 0 0 ... + .. +``` + +3. What are the dimensions of each item? +```r +dim(res$rpss) + dat var time latitude longitude + 1 1 1 181 360 +dim(res$rpss_sign) + dat var time latitude longitude + 1 1 1 181 360 +dim(res$rmsss) + dat var time latitude longitude + 1 1 1 181 360 +dim(res$rmsss_pval) + dat var time latitude longitude + 1 1 1 181 360 +``` + +4. Plot the maps + +Use `s2dv::PlotEquiMap` or other visualization tools to plot the maps. Check the usage of PlotEquiMap() on s2dv GitLab or type `?s2dv::PlotEquiMap`. + +```r + # Get latitude and longitude from the attributes + lat <- as.vector(attr(exp, 'Variables')$common$latitude) + lon <- as.vector(attr(exp, 'Variables')$common$longitude) + + # Set the color bars: check data range to decide the appropriate color bar + print(range(res$rpss, na.rm = TRUE)) +#[1] -1.0729143 0.9982857 + brks_rpss <- seq(-1, 0.9, by = 0.1) + print(range(res$rmsss, na.rm = TRUE)) +#[1] 0.9513331 0.9997396 + brks_rmsss <- seq(0.95, 0.99, by = 0.01) + + # Plot RPSS + ##NOTE: Remove 'fileout' to view the figure in pop-up window + PlotEquiMap(res$rpss[1, 1, 1, , ], lon = lon, lat = lat, + dots = res$rpss_sign[1, 1, 1, , ], dot_size = 1.5, dot_symbol = 3, + brks = brks_rpss, + filled.continents = FALSE, triangle_ends = c(TRUE, TRUE), + toptitle = 'ECMWF system5 monthly mean tas RPSS 1991-2011', title_scale = 0.6, + fileout = '~/handson_4_fig_rpss.png') + + # Plot RMSSS + PlotEquiMap(res$rmsss[1, 1, 1, , ], lon = lon, lat = lat, + color_fun = clim.palette('yellowred'), + brks = brks_rmsss, + filled.continents = FALSE, triangle_ends = c(TRUE, TRUE), + toptitle = 'ECMWF system5 monthly mean tas RMSSS 1991-2011', title_scale = 0.6, + fileout = '~/handson_4_fig_rmsss.png') +``` + +![RPSS map](./Figures/handson_4_fig_rpss.png) + +![RMSSS map](./Figures/handson_4_fig_rmsss.png) + + + +**BONUS**: Try to use only one thread for loading and computing (i.e., `threads_load = 1`, `threads_compute = 1`.) Is it slower than using more threads? +It is slower with only one thread. For example, the total running time is 1214 seconds with one thread (see below) +but 331 seconds with multiple threads (see above 4.1) The loading and computing time are both more with one thread. +But the efficiency also depends on the situation of the machine. If the machine is very loaded, it can be slow for multiple threads as well. + +```r +* Computation ended successfully. +* Number of chunks: 4 +* Max. number of concurrent chunks (jobs): 1 +* Requested cores per job: NA +* Load threads per chunk: 1 +* Compute threads per chunk: 1 +* Total time (s): 1214.47402715683 +* Chunking setup: 0.000868797302246094 +* Data upload to cluster: 0 +* All chunks: 1211.30233049393 +* Transfer results from cluster: 0 +* Merge: 3.17082786560059 +* Each chunk: +* queue: +* mean: 0 +* min: 0 +* max: 0 +* job setup: +* mean: 0 +* min: 0 +* max: 0 +* load: +* mean: 13.2360381484032 +* min: 12.5131008625031 +* max: 14.2744646072388 +* compute: +* mean: 289.587656855583 +* min: 286.497741699219 +* max: 294.805396080017 +``` + diff --git a/inst/doc/tutorial/PATC2022/handson_5-interpolation.md b/inst/doc/tutorial/PATC2022/handson_5-interpolation.md new file mode 100644 index 0000000000000000000000000000000000000000..63b9ea6a7455fd40d8709e875a71a257c830c837 --- /dev/null +++ b/inst/doc/tutorial/PATC2022/handson_5-interpolation.md @@ -0,0 +1,134 @@ +# Hands-on 5: Spatial grid interpolation + +## Goal +To learn how to use Start() to load the data and do the interpolation. +In this use case, we only use Start() and load the data in the local memory. +The default transformation function is startR::CDORemapper, a wrapper function of s2dv::CDORemap that uses CDO inside. + +In detail, we are going to: +(1) Learn different ways to assign longitude and latitude in Start(). +(2) Use parameter '_reorder' to specify the sorted order. +(3) Use transform-related parameters. + +## 0. Load required modules and packages + +We need to have CDO module loaded for interpolation. If you did not load it, quit the +R console and load it. As for R packages, we need startR for data loading and s2dv for plo +tting. + +```r +# Clean the session +rm(list = ls()) + +library(startR) +library(s2dv) +``` + +## 1. Define longitude and latitude +```r + # Use this one if on workstation or nord3 + repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + # Use this one if on Marenostrum4 and log in with PATC2022 account + repos <- paste0('/gpfs/scratch/nct01/nct01127/d3_R_handson/esarchive/', + 'exp/ecmwf/system5_m1/monthly_mean/', + '$var$_f6h/$var$_$sdate$.nc') + var <- 'tas' + sdate <- c('20170101') + lon.min <- 0 + lon.max <- 359.9 + lat.min <- -90 + lat.max <- 90 + + data <- Start(dat = repos, + var = var, + sdate = sdate, + ensemble = indices(1), + time = 'all', + latitude = values(list(lat.min, lat.max)), + latitude_reorder = Sort(), + longitude = values(list(lon.min, lon.max)), + longitude_reorder = CircularSort(0, 360), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = 'dat', + latitude = 'dat'), + retrieve = TRUE + ) +``` + +1. Run the above script. Check the dimensions, the warning messages, and the values of +longitude and latitude. What is the range of longitude and latitude? + +2. Why 'lon.max' is 359.9 but not 360? What will happen if it is 360? + +3. Now, change + - `latitude_reorder = Sort()` to `latitude_reorder = Sort(decreasing = TRUE)` + - `longitude_reorder = CircularSort(0, 360)` to `longitude_reorder = CircularSort(-180, 180)` + - Set `lon.min <- -180` and `lon.max <- 179.9` + +Check the values of longitude and latitude again. Is it different from the original script? + + +## 2. Interpolation + +Now, let us add the transformation parameters. To understand the usage of the `transform` parameters, check FAQ on GitLab https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/faq.md#5-do-interpolation-in-start-using-parameter-transform + +```r + data_1deg <- Start(dat = repos, + var = var, + sdate = sdate, + ensemble = indices(1), + time = 'all', + latitude = values(list(lat.min, lat.max)), + latitude_reorder = Sort(), + longitude = values(list(lon.min, lon.max)), + longitude_reorder = CircularSort(0, 360), + ## transformation + transform = CDORemapper, + transform_extra_cells = 2, + transform_params = list(grid = 'r360x181', + method = 'conservative'), + transform_vars = c('latitude', 'longitude'), + ## + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = NULL, + latitude = NULL), + retrieve = TRUE) +``` + +1. Run the above script. Check the dimensions and the values of longitude and latitude. + +2. Plot maps +Plot the two data and see the difference. + +```r +# original data +lon_ori <- attr(data, 'Variables')$dat1$longitude +lat_ori <- attr(data, 'Variables')$dat1$latitude +PlotEquiMap(drop(data)[1, , ], lon = lon_ori, lat = lat_ori, + filled.continent = FALSE, + toptitle = "Original data: 640x1296, method: con", + fileout = "~/handson_5_fig1.png") + +# regridded data +lon_1deg <- attr(data_1deg, 'Variables')$dat1$longitude +lat_1deg <- attr(data_1deg, 'Variables')$dat1$latitude +PlotEquiMap(drop(data_1deg)[1, , ], lon = lon_1deg, lat = lat_1deg, + filled.continent = FALSE, + toptitle = "Regridded data: 181x360, method: con", + fileout = "~/handson_5_fig2.png") +``` + +3. Try different grids and interpolation methods + +- Change `method` to "bil", "bic", or other available options (check s2dv::CDORemap for details.) + +- Change `grid` to "r100x50", "t106grid", or other available options. + +- Plot the maps to see the difference. + + + diff --git a/inst/doc/tutorial/PATC2022/handson_5-interpolation_ans.md b/inst/doc/tutorial/PATC2022/handson_5-interpolation_ans.md new file mode 100644 index 0000000000000000000000000000000000000000..522c513bbeeaa3def64e9bc23b4a9195813ef0c2 --- /dev/null +++ b/inst/doc/tutorial/PATC2022/handson_5-interpolation_ans.md @@ -0,0 +1,203 @@ +# Hands-on 5: Spatial grid interpolation + +## Goal +To learn how to use Start() to load the data and do the interpolation. +In this use case, we only use Start() and load the data in the local memory. +The default transformation function is startR::CDORemapper, a wrapper function of s2dv::CDORemap that uses CDO inside. + +In detail, we are going to: +(1) Learn different ways to assign longitude and latitude in Start(). +(2) Use parameter '_reorder' to specify the sorted order. +(3) Use transform-related parameters. + +## 0. Load required modules and packages + +We need to have CDO module loaded for interpolation. If you did not load it, quit the +R console and load it. As for R packages, we need startR for data loading and s2dv for plotting. + +```r +# Clean the session +rm(list = ls()) + +library(startR) +library(s2dv) +``` + +## 1. Define longitude and latitude +```r + # Use this one if on workstation or nord3 + repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + # Use this one if on Marenostrum4 and log in with PATC2022 account + repos <- paste0('/gpfs/scratch/nct01/nct01127/d3_R_handson/esarchive/', + 'exp/ecmwf/system5_m1/monthly_mean/', + '$var$_f6h/$var$_$sdate$.nc') + + var <- 'tas' + sdate <- c('20170101') + lon.min <- 0 + lon.max <- 359.9 + lat.min <- -90 + lat.max <- 90 + + data <- Start(dat = repos, + var = var, + sdate = sdate, + ensemble = indices(1), + time = 'all', + latitude = values(list(lat.min, lat.max)), + latitude_reorder = Sort(), + longitude = values(list(lon.min, lon.max)), + longitude_reorder = CircularSort(0, 360), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = 'dat', + latitude = 'dat'), + retrieve = TRUE + ) +``` + +1. Run the above script. Check the dimensions, the warning messages, and the values of +longitude and latitude. What is the range of longitude and latitude? +```r +dim(data) + dat var sdate ensemble time latitude longitude + 1 1 1 1 7 640 1296 + +longitude <- attr(data, 'Variables')$dat1$longitude +range(longitude) +[1] 0.0000 359.7222 + +latitude <- attr(data, 'Variables')$dat1$latitude +latitude[1] +[1] -89.78488 +latitude[640] +[1] 89.78488 +``` + +2. Why 'lon.max' is 359.9 but not 360? What will happen if it is 360? +If it is 360, you only get one point of longitude. Because of `longitude_reorder = CircularSort(0, 360)`, Start() regards 0 and 360 as the same point. Therefore, we need to set a number slightly smaller than 360 but bigger than the maximum value in the original data, so we can get the whole range. + +3. Now, change + - `latitude_reorder = Sort()` to `latitude_reorder = Sort(decreasing = TRUE)` + - `longitude_reorder = CircularSort(0, 360)` to `longitude_reorder = CircularSort(-180, 180)` + - Set `lon.min <- -180` and `lon.max <- 179.9` + +Check the values of longitude and latitude again. Is it different from the original script? +```r + lon.min <- -180 + lon.max <- 179.9 + + data <- Start(dat = repos, + var = var, + sdate = sdate, + ensemble = indices(1), + time = 'all', + latitude = values(list(lat.min, lat.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lon.min, lon.max)), + longitude_reorder = CircularSort(-180, 180), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = 'dat', + latitude = 'dat'), + retrieve = TRUE) + +dim(data) + dat var sdate ensemble time latitude longitude + 1 1 1 1 7 640 1296 + +longitude <- attr(data, 'Variables')$dat1$longitude +range(longitude) +[1] -180.0000 179.7222 + +latitude <- attr(data, 'Variables')$dat1$latitude +latitude[1] +[1] 89.78488 +latitude[640] +[1] -89.78488 + +``` +The dimensions are the same. The longitude range changes to [-180, 180], and the latitude sorts from 90 to -90. + + +## 2. Interpolation +Now, let us add the transformation parameters. To understand the usage of the `transform` parameters, check FAQ on GitLab https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/faq.md#5-do-interpolation-in-start-using-parameter-transform + +```r + data_1deg <- Start(dat = repos, + var = var, + sdate = sdate, + ensemble = indices(1), + time = 'all', + latitude = values(list(lat.min, lat.max)), + latitude_reorder = Sort(), + longitude = values(list(lon.min, lon.max)), + longitude_reorder = CircularSort(0, 360), + ## transformation + transform = CDORemapper, + transform_extra_cells = 2, + transform_params = list(grid = 'r360x181', + method = 'conservative'), + transform_vars = c('latitude', 'longitude'), + ## + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = 'dat', + latitude = 'dat'), + retrieve = TRUE) +``` + +1. Run the above script. Check the dimensions and the values of longitude and latitude. +```r +dim(data_1deg) + dat var sdate ensemble time latitude longitude + 1 1 1 1 7 181 360 + +longitude <- attr(data_1deg, 'Variables')$dat1$longitude +range(longitude) +[1] 0 359 + +latitude <- attr(data_1deg, 'Variables')$dat1$latitude +range(latitude) +[1] -90 90 +``` + +2. Plot maps +Plot the two data and see the difference. + +```r +# original data +lon_ori <- attr(data, 'Variables')$dat1$longitude +lat_ori <- attr(data, 'Variables')$dat1$latitude +PlotEquiMap(drop(data)[1, , ], lon = lon_ori, lat = lat_ori, + filled.continent = FALSE, + toptitle = "Original data: 640x1296, method: con", + fileout = "~/handson_5_fig1.png") + +# regridded data +lon_1deg <- attr(data_1deg, 'Variables')$dat1$longitude +lat_1deg <- attr(data_1deg, 'Variables')$dat1$latitude +PlotEquiMap(drop(data_1deg)[1, , ], lon = lon_1deg, lat = lat_1deg, + filled.continent = FALSE, + toptitle = "Regridded data: 181x360, method: con", + fileout = "~/handson_5_fig2.png") +``` +![original grid, method = 'con'](./Figures/handson_5_fig1.png) + +![regrid to 1 degree, method = 'con'](./Figures/handson_5_fig2.png) + + +3. Try different grids and interpolation methods + +- Change `method` to "bil", "bic", or other available options (check s2dv::CDORemap for details.) + +- Change `grid` to "r100x50", "t106grid", or other available options. + +- Plot the map to see the difference. + +![regrid to r100x50, method = 'bil'](./Figures/handson_5_fig3.png) + +![regrid to t106grid, method = 'bic'](./Figures/handson_5_fig4.png) diff --git a/inst/doc/tutorial/PATC2022/nord3_demo.R b/inst/doc/tutorial/PATC2022/nord3_demo.R new file mode 100644 index 0000000000000000000000000000000000000000..ffa4dec1f236696a27fce3ebf715e72c8e2d6af2 --- /dev/null +++ b/inst/doc/tutorial/PATC2022/nord3_demo.R @@ -0,0 +1,76 @@ +library(startR) + + repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + var <- 'tas' + sdate <- c('20170101', '20170201') + lon.min <- 0 + lon.max <- 359.9 + lat.min <- -90 + lat.max <- 90 + + data <- Start(dat = repos, + var = var, + sdate = sdate, + ensemble = 'all', + time = 'all', + latitude = values(list(lat.min, lat.max)), + latitude_reorder = Sort(), + longitude = values(list(lon.min, lon.max)), + longitude_reorder = CircularSort(0, 360), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = NULL, latitude = NULL), + retrieve = FALSE + ) + + func <- function(x, conf, pval) { + # x: [ensemble, time] + # ensemble mean + ens_mean <- apply(x, 2, mean) + # temporal trend + trend <- s2dv::Trend(ens_mean, conf = conf, pval = pval)$trend + + return(trend) + } + + step <- Step(func, target_dims = c('ensemble', 'time'), + output_dims = list(trend = 'stats'), + use_libraries = c('s2dv')) + + wf <- AddStep(data, step, conf = FALSE, pval = FALSE) + +#-------------------user-defined--------------------- + queue_host <- 'nord4' # short name in .ssh/config + temp_dir <- '/gpfs/scratch/bsc32/bsc32734/startR_hpc/' + ecflow_suite_dir <- '/home/Earth/aho/startR_local/' +#---------------------------------------------------- + + # Nord3-v2 + res <- Compute(wf, + chunks = list(latitude = 2, + longitude = 2), + threads_load = 2, + threads_compute = 4, + cluster = list( + queue_host = 'nord4', + queue_type = 'slurm', + temp_dir = temp_dir, + cores_per_job = 16, + job_wallclock = '01:00:00', + max_jobs = 4, +# extra_queue_params = list('#SBATCH --constraint=medmem'), + bidirectional = FALSE, + polling_period = 10 + ), + ecflow_suite_dir = ecflow_suite_dir, + wait = TRUE + ) + +# Save the result if needed +saveRDS(res, file = '~/nord3_demo_result.Rds') + +# Check the result +str(res) +dim(res$trend) + diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index 0ffacc7f618336a7253c121a56a0c0f36dfdc303..c7748f6f2a8b57d82f2b723f18aeebfedc5b6ff0 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -68,6 +68,12 @@ this use case is for two file dimensions (i.e., the usage of *_depends). 15. [Load irregular grid data](inst/doc/usecase/ex1_15_irregular_grid_CDORemap.R) This script shows how to use Start() to load irregular grid data , then regrid it by s2dv::CDORemap. + 16. [Merge files with different time dimension length](inst/doc/usecase/ex1_16_files_different_time_dim_length.R) + This script shows how to use Start() to load files with different time dimension length +and reshape the array without undesired NAs. + + + 2. **Execute computation (use `Compute()`)** 1. [Function working on time dimension](inst/doc/usecase/ex2_1_timedim.R) 2. [Function using attributes of the data](inst/doc/usecase/ex2_2_attr.R) diff --git a/inst/doc/usecase/ex1_16_files_different_time_dim_length.R b/inst/doc/usecase/ex1_16_files_different_time_dim_length.R new file mode 100644 index 0000000000000000000000000000000000000000..8b54bbce1b78afe962a76a02efd8be6d91872872 --- /dev/null +++ b/inst/doc/usecase/ex1_16_files_different_time_dim_length.R @@ -0,0 +1,52 @@ +#---------------------------------------------------------------------------- +# Author: An-Chi Ho +# Date: 11th November 2022 +# +# This script shows how to use Start() to load files with different time dimension length +# and reshape the array without undesired NAs. To know more about the usage of parameter +# "merge_across_dims_narm" and "largest_dims_length", check faq.md How-to-26 (https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/faq.md#26-use-merge-across-dims-narm-to-remove-nas) +# +# The used data is daily data, so the time dimension length is the days in each month. +# "largest_dims_length = T" needs to be used to tell Start() the different time dimension lengths +# in each file, and "merge_across_dims_narm = T" can remove the NAs that are added by Start() +# during the reshaping process. +#---------------------------------------------------------------------------- + +library(startR) + +# exp +repos <- paste0('/esarchive/recon/ecmwf/era5/daily_mean/$var$_f1h/$var$_$sdate$.nc') +lons.min <- -10 +lons.max <- 30 +lats.min <- 30 +lats.max <- 60 +sdates <- clock::date_format(clock::date_build(2019, 1:12, 1), format = "%Y%m") + +data <- Start(dat = repos, + var = 'tas', + sdate = sdates, + time = 'all', + time_across = 'sdate', + merge_across_dims = TRUE, # merge 'time' dim along 'sdate' + merge_across_dims_narm = TRUE, # remove additional NAs + largest_dims_length = TRUE, # to tell Start() that each file has different dim length + lat = values(list(lats.min, lats.max)), + lat_reorder = Sort(), + lon = values(list(lons.min, lons.max)), + lon_reorder = CircularSort(-180, 180), + synonims = list(lat = c('lat', 'latitude'), lon = c('lon', 'longitude')), + return_vars = list(lon = NULL, lat = NULL, time = 'sdate'), + num_procs = 16, + retrieve = TRUE) + +dim(data) +# dat var time lat lon +# 1 1 365 107 142 + +dim(attr(data, 'Variables')$common$time) +#time +# 365 + +#NOTE: If merge_across_dims_narm = FALSE or largest_dims_length = FALSE, time dimension will +# be 372 (= 31 days * 12 months) and NAs will be at the end of each small month, i.e., +# 60:62 (29-31 Feb), 124 (31 Apr), 186 (31 Jun), 279 (31 Sep), 341 (31 Nov). diff --git a/inst/doc/usecase/ex1_1_tranform.R b/inst/doc/usecase/ex1_1_tranform.R index a544f84c96005aea28ae6f99f3e61fc5b2fc9c88..e735280c0c9b08e5d0d96c674d58e7973177fb39 100644 --- a/inst/doc/usecase/ex1_1_tranform.R +++ b/inst/doc/usecase/ex1_1_tranform.R @@ -39,8 +39,7 @@ obs <- Start(dat = obs_path, transform = CDORemapper, transform_extra_cells = 2, transform_params = list(grid = 'r360x181', - method = 'conservative', - crop = c(lons.min, lons.max, lats.min, lats.max)), + method = 'conservative'), transform_vars = c('latitude', 'longitude'), return_vars = list(time = NULL, latitude = 'dat', diff --git a/inst/doc/usecase/ex1_5_latlon_reorder.R b/inst/doc/usecase/ex1_5_latlon_reorder.R index c54314f5f7a2da70c2d7f1672eb7cc1c3578ae6d..75469bf059f46e91c1c48830ada0c55bb9b2a061 100644 --- a/inst/doc/usecase/ex1_5_latlon_reorder.R +++ b/inst/doc/usecase/ex1_5_latlon_reorder.R @@ -98,9 +98,7 @@ res <- Start(dat = path_exp, longitude_reorder = CircularSort(-180, 180), transform = CDORemapper, transform_params = list(grid ='r360x181', - method = 'con', - crop = c(lons.min, lons.max, - lats.min, lats.max)), + method = 'con'), transform_vars = c('longitude', 'latitude'), synonims = list(latitude=c('lat', 'latitude'), longitude=c('lon', 'longitude')), @@ -151,9 +149,7 @@ res <- Start(dat = path_exp, longitude_reorder = CircularSort(0, 360), transform = CDORemapper, transform_params = list(grid ='r360x181', - method = 'con', - crop = c(lons.min, lons.max, - lats.min, lats.max)), + method = 'con'), transform_vars = c('longitude', 'latitude'), synonims = list(latitude=c('lat', 'latitude'), longitude=c('lon', 'longitude')), diff --git a/inst/doc/usecase/ex2_10_existing_mask.R b/inst/doc/usecase/ex2_10_existing_mask.R index 20d2db7c51919109ad75b49893d3b6eaeaa6f102..93c1abab2dec03e99a552dc2c7aba741c015e49c 100644 --- a/inst/doc/usecase/ex2_10_existing_mask.R +++ b/inst/doc/usecase/ex2_10_existing_mask.R @@ -67,9 +67,9 @@ res <- Compute(workflow = wf_mask, chunks = list(lat = 2, lon = 2)) -# Submit to Nord3 +# Submit to Nord3v2 #-------------------user-defined--------------------- - queue_host <- 'nord1' + queue_host <- 'nord4' temp_dir <- '/gpfs/scratch/bsc32/bsc32734/startR_hpc/' ecflow_suite_dir <- '/home/Earth/aho/startR_local/' #---------------------------------------------------- @@ -79,12 +79,11 @@ res <- Compute(workflow = wf_mask, threads_load = 2, threads_compute = 4, cluster = list(queue_host = queue_host, - queue_type = 'lsf', + queue_type = 'slurm', temp_dir = temp_dir, cores_per_job = 2, job_wallclock = '05:00', max_jobs = 4, - extra_queue_params = list('#BSUB -q bsc_es'), bidirectional = FALSE, polling_period = 10 ), @@ -201,9 +200,9 @@ res <- Compute(workflow = wf_mask, chunks = list(lat = 2, lon = 2)) -# Submit to Nord3 +# Submit to Nord3v2 #-------------------user-defined--------------------- - queue_host <- 'nord1' + queue_host <- 'nord4' temp_dir <- '/gpfs/scratch/bsc32/bsc32734/startR_hpc/' ecflow_suite_dir <- '/home/Earth/aho/startR_local/' #---------------------------------------------------- @@ -213,12 +212,11 @@ res <- Compute(workflow = wf_mask, threads_load = 2, threads_compute = 4, cluster = list(queue_host = queue_host, - queue_type = 'lsf', + queue_type = 'slurm', temp_dir = temp_dir, cores_per_job = 2, job_wallclock = '05:00', max_jobs = 4, - extra_queue_params = list('#BSUB -q bsc_es'), bidirectional = FALSE, polling_period = 10 ), diff --git a/inst/doc/usecase/ex2_12_transform_and_chunk.R b/inst/doc/usecase/ex2_12_transform_and_chunk.R index 8b2eb831878c0844b5262a3c501c7f8ccdbb5882..d89448c96aba28bbec50b5a8fb5079d2a17cabf0 100644 --- a/inst/doc/usecase/ex2_12_transform_and_chunk.R +++ b/inst/doc/usecase/ex2_12_transform_and_chunk.R @@ -43,8 +43,7 @@ exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$ longitude_reorder = CircularSort(0, 360), transform = CDORemapper, transform_params = list(grid = 'r100x50', - method = 'con', - crop = c(lons.min, lons.max, lats.min, lats.max)), + method = 'con'), transform_vars = c('latitude', 'longitude'), transform_extra_cells = 8, synonims = list(latitude = c('latitude', 'lat'), @@ -93,8 +92,7 @@ exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$ longitude_reorder = CircularSort(0, 360), transform = CDORemapper, transform_params = list(grid = 'r100x50', - method = 'con', - crop = FALSE), + method = 'con'), transform_vars = c('latitude', 'longitude'), transform_extra_cells = 8, synonims = list(latitude = c('latitude', 'lat'), @@ -136,8 +134,7 @@ exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$ longitude_reorder = CircularSort(0, 360), transform = CDORemapper, transform_params = list(grid = 'r100x50', - method = 'con', - crop = FALSE), + method = 'con'), transform_vars = c('latitude', 'longitude'), transform_extra_cells = 8, synonims = list(latitude = c('latitude', 'lat'), diff --git a/inst/doc/usecase/ex2_13_irregular_regrid.R b/inst/doc/usecase/ex2_13_irregular_regrid.R index e09a691333ad40754315d98e548fda55ba7aa9f0..3fe3fc1d8cbb090eeb724daa294142759d29a3d1 100644 --- a/inst/doc/usecase/ex2_13_irregular_regrid.R +++ b/inst/doc/usecase/ex2_13_irregular_regrid.R @@ -10,8 +10,8 @@ library(startR) library(s2dv) -path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/cmcc-cm2-sr5/cmip6-dcppA-hindcast_i1p1/', - 'DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/$member$/Omon/$var$/gn/v20210312/', +path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/', + 'DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/$member$/Omon/$var$/gn/v20200101/', '$var$_*_s$sdate$-$member$_gn_$aux$.nc') data <- Start(dataset = path, @@ -19,38 +19,35 @@ data <- Start(dataset = path, sdate = c('1960', '1961'), aux = 'all', aux_depends = 'sdate', - j = indices(2:361), # remove two indices to avoid white strips - i = indices(2:291), # remove two indices to avoid white strips + x = indices(2:361), # remove two indices to avoid white strips + y = indices(2:291), # remove two indices to avoid white strips time = indices(1:12), member = 'r1i1p1f1', - return_vars = list(j = NULL, i = NULL, - latitude = NULL, longitude = NULL), + return_vars = list(nav_lat = NULL, nav_lon = NULL), retrieve = F) attr(data, 'Dimensions') -#dataset var sdate aux j i time member +#dataset var sdate aux x y time member # 1 1 2 1 360 290 12 1 -dim(attr(data, 'Variables')$common$longitude) -# j i -#360 290 -dim(attr(data, 'Variables')$common$latitude) -# j i -#360 290 +dim(attr(data, 'Variables')$common$nav_lon) +# x y +#362 292 +dim(attr(data, 'Variables')$common$nav_lat) +# x y +#362 292 func_regrid <- function(data) { - lons <- attr(data, 'Variables')$common$longitude - lats <- attr(data, 'Variables')$common$latitude - data <- s2dv::CDORemap(data, lons, lats, grid = 'r360x180', - method = 'bil', crop = FALSE) + lons <- attr(data, 'Variables')$common$nav_lon + lats <- attr(data, 'Variables')$common$nav_lat + data <- s2dv::CDORemap(data, lons[2:361, 2:291], lats[2:361, 2:291], + grid = 'r360x180', method = 'bil', crop = FALSE) lons_reg <- data[['lons']] lats_reg <- data[['lats']] return(list(data = data[[1]], lats = lats_reg, lons = lons_reg)) } -#NOTE: The data transposes if target_dims are only 'j' and 'i'. -# If only 'j' and 'i', output_dims will be 'lat', 'lon'. step <- Step(fun = func_regrid, - target_dims = list(data = c('j', 'i')), + target_dims = list(data = c('x', 'y')), output_dims = list(data = c('lon', 'lat'), lats = 'lat', lons = 'lon'), use_attributes = list(data = "Variables")) diff --git a/inst/doc/usecase/ex2_1_timedim.R b/inst/doc/usecase/ex2_1_timedim.R index f8253c85162db28d4471e56c47d1da855de42861..15ef37da6dd8c955dcd433a73c5aa470e3b799d4 100644 --- a/inst/doc/usecase/ex2_1_timedim.R +++ b/inst/doc/usecase/ex2_1_timedim.R @@ -68,7 +68,7 @@ library(startR) ## on Nord3 #-----------modify according to your personal info--------- - queue_host = 'nord3' #your own host name for power9 + queue_host = 'nord4' temp_dir = '/gpfs/scratch/bsc32/bsc32339/startR_hpc/' ecflow_suite_dir = '/home/Earth/nperez/startR_local/' #your own local directory #------------------------------------------------------------ @@ -78,8 +78,7 @@ library(startR) threads_load = 2, threads_compute = 4, cluster = list(queue_host = queue_host, - queue_type = 'lsf', - extra_queue_params = list('#BSUB -q bsc_es'), + queue_type = 'slurm', cores_per_job = 2, temp_dir = temp_dir, polling_period = 10, diff --git a/inst/doc/usecase/ex2_2_attr.R b/inst/doc/usecase/ex2_2_attr.R index f06ec46c76128aa5303468eccb220dfcf228fcea..fbd2eac9e24207f5d51498ca9c8974daf23e72bb 100644 --- a/inst/doc/usecase/ex2_2_attr.R +++ b/inst/doc/usecase/ex2_2_attr.R @@ -71,9 +71,9 @@ library(multiApply) ecflow_suite_dir = ecflow_suite_dir, #your own local directory wait = TRUE) -## on Nord3 +## on Nord3v2 #-----------modify according to your personal info--------- - queue_host = 'nord3' #your own host name for power9 + queue_host = 'nord4' #your own host name for power9 temp_dir = '/gpfs/scratch/bsc32/bsc32339/startR_hpc/' ecflow_suite_dir = '/home/Earth/nperez/startR_local/' #your own local directory #------------------------------------------------------------ @@ -83,10 +83,9 @@ library(multiApply) threads_load = 2, threads_compute = 4, cluster = list(queue_host = queue_host, #your own host name for power9 - queue_type = 'lsf', + queue_type = 'slurm', cores_per_job = 2, temp_dir = temp_dir, - extra_queue_params = list('#BSUB -q bsc_es'), polling_period = 10, job_wallclock = '01:00', max_jobs = 40, diff --git a/inst/doc/usecase/ex2_3_cdo.R b/inst/doc/usecase/ex2_3_cdo.R index 8c398f7ca5b33b2a079cd92561a167b57ef0b0a8..9dca0450a20bced2b71136aa5a2427766fa60228 100644 --- a/inst/doc/usecase/ex2_3_cdo.R +++ b/inst/doc/usecase/ex2_3_cdo.R @@ -75,9 +75,9 @@ library(startR) wait = TRUE) -## on Nord 3 +## on Nord3v2 #-----------modify according to your personal info--------- - queue_host = 'nord3' #your own host name for power9 + queue_host = 'nord4.bsc.es' temp_dir = '/gpfs/scratch/bsc32/bsc32339/startR_hpc/' ecflow_suite_dir = '/home/Earth/nperez/startR_local/' #your own local directory #------------------------------------------------------------ @@ -87,11 +87,10 @@ library(startR) threads_load = 2, threads_compute = 4, cluster = list(queue_host = queue_host, #your own host name for power9 - queue_type = 'lsf', + queue_type = 'slurm', cores_per_job = 1, temp_dir = temp_dir, CDO_module = 'CDO', - extra_queue_params = list('#BSUB -q bsc_es'), polling_period = 100, job_wallclock = '01:00', max_jobs = 4, diff --git a/inst/doc/usecase/ex2_4_two_func.R b/inst/doc/usecase/ex2_4_two_func.R index 2e638bd60ea6d3f90b7ff2cca385e9f89d7d587e..c76eda994a17a7efbca2fbf1a07e8e96bf152702 100644 --- a/inst/doc/usecase/ex2_4_two_func.R +++ b/inst/doc/usecase/ex2_4_two_func.R @@ -72,9 +72,9 @@ summary(res4$output1) ecflow_suite_dir = ecflow_suite_dir, #your own local directory wait = TRUE) -## on Nord 3 +## on Nord3v2 #-----------modify according to your personal info----------- - queue_host = 'nord3' #your own host name for power9 + queue_host = 'nord4.bsc.es' temp_dir = '/gpfs/scratch/bsc32/bsc32339/startR_hpc/' ecflow_suite_dir = '/home/Earth/nperez/startR_local/' #your own local directory #------------------------------------------------------------ @@ -84,11 +84,10 @@ summary(res4$output1) threads_load = 2, threads_compute = 4, cluster = list(queue_host = queue_host, #your own host name for power9 - queue_type = 'lsf', + queue_type = 'slurm', cores_per_job = 1, temp_dir = temp_dir, CDO_module = 'CDO', - extra_queue_params = list('#SBATCH -q bsc_es'), polling_period = 10, job_wallclock = '01:00', max_jobs = 6, diff --git a/inst/doc/usecase/ex2_6_ext_param_func.R b/inst/doc/usecase/ex2_6_ext_param_func.R index 8bd128fa6e1465cbb247bb093ad89b82ca2e9363..760e30d2c1e8422d560c9ff6d10da5206138bf71 100644 --- a/inst/doc/usecase/ex2_6_ext_param_func.R +++ b/inst/doc/usecase/ex2_6_ext_param_func.R @@ -106,7 +106,7 @@ res$strat[1:5, 1:2, 1] # ------------------------------------------------------------- -# ---- To be tried for comparison of results: +#Power9 res <- Compute(workflow$strat, chunks = list(latitude = 2, longitude = 2), threads_load = 2, @@ -123,20 +123,24 @@ res$strat[1:5, 1:2, 1] ecflow_suite_dir = '/esarchive/scratch/nperez/ecFlow', #user-specific wait = TRUE) -# ---- On Nord 3: +# Nord3-v2 +#-------------------user-defined--------------------- + queue_host <- 'nord4' + temp_dir <- '/gpfs/scratch/bsc32/bsc32734/startR_hpc/' + ecflow_suite_dir <- '/home/Earth/aho/startR_local/' +#---------------------------------------------------- res <- Compute(workflow$strat, chunks = list(latitude = 2, longitude = 2), threads_load = 2, threads_compute = 2, - cluster = list(queue_host = 'nord3', #user-specific - queue_type = 'lsf', - temp_dir = '/gpfs/scratch/bsc32/bsc32339/', #user-specific + cluster = list(queue_host = queue_host, #user-specific + queue_type = 'slurm', + temp_dir = temp_dir, #user-specific cores_per_job = 2, - extra_queue_params = list('#BSUB -q bsc_es'), job_wallclock = '10:00', max_jobs = 4, bidirectional = FALSE, polling_period = 10), - ecflow_suite_dir = '/esarchive/scratch/nperez/ecFlow', #user-specific + ecflow_suite_dir = ecflow_suite_dir, #user-specific wait = TRUE) diff --git a/inst/doc/usecase/ex2_7_seasonal_forecast_verification.R b/inst/doc/usecase/ex2_7_seasonal_forecast_verification.R index b28e6a6d88b0a65ff4890e78a0018791f010ebca..b752383453fb1db660cf692c599a0a08fb179f6d 100644 --- a/inst/doc/usecase/ex2_7_seasonal_forecast_verification.R +++ b/inst/doc/usecase/ex2_7_seasonal_forecast_verification.R @@ -77,22 +77,21 @@ wait = TRUE ) -# Compute() on Nord 3 +# Compute() on Nord3v2 res <- Compute(wf, chunks = list(latitude = 2, longitude = 2), threads_load = 2, threads_compute = 4, - cluster = list(queue_host = 'nord3', - queue_type = 'lsf', - temp_dir = '/gpfs/scratch/bsc32/bsc32339/startR_hpc/', + cluster = list(queue_host = 'nord4', # your alias + queue_type = 'slurm', + temp_dir = '/gpfs/scratch/bsc32/bsc32339/startR_hpc/', # your own path job_wallclock = '00:30', - extra_queue_params = list('#BSUB -q bsc_es'), cores_per_job = 4, max_jobs = 4, bidirectional = FALSE, polling_period = 50), - ecflow_suite_dir = '/home/Earth/nperez/startR_local/', + ecflow_suite_dir = '/home/Earth/nperez/startR_local/', # your own path wait = TRUE) # Results dim(res$output1) diff --git a/inst/doc/usecase/ex2_8_calibration.R b/inst/doc/usecase/ex2_8_calibration.R index 23ab3b01d89d0f7f8e651bbc753e4e1056af4a19..a36f349f59f6c3840363ba2f3f25298bb5dfd7d0 100644 --- a/inst/doc/usecase/ex2_8_calibration.R +++ b/inst/doc/usecase/ex2_8_calibration.R @@ -66,31 +66,28 @@ res <- Compute(wf, chunks = list(latitude = 2, longitude = 2), # Declaration of HPC and execution ## ECFlow is required -# On Fatnodes -res_fat2 <- Compute(wf, - chunks = list(latitude = 2, longitude = 2), - threads_load = 2, - threads_compute = 4, - cluster = list(queue_host = "bsceslogin01.bsc.es", - cores_per_job = 2, - max_jobs = 4, job_wallclock = '00:10:00'), - ecflow_suite_dir = "/esarchive/scratch/nperez/ecflow") # your path! -# On Nord3 +# On Nord3v2 +#-------------------user-defined--------------------- + queue_host <- 'nord4.bsc.es' + temp_dir <- '/gpfs/scratch/bsc32/bsc32734/startR_hpc/' + ecflow_suite_dir <- '/home/Earth/aho/startR_local/' +#---------------------------------------------------- res_nord3 <- Compute(wf, chunks = list(latitude = 2, longitude = 2), threads_load = 2, threads_compute = 4, - cluster = list(queue_host = "nord3", - queue_type = 'lsf', - extra_queue_params = list('#BSUB -q bsc_es'), + cluster = list(queue_host = queue_host, + queue_type = 'slurm', + extra_queue_params = list("#SBATCH --constraint=medmem"), cores_per_job = 2, - temp_dir = '/gpfs/scratch/bsc32/bsc32339/startR_hpc/', + temp_dir = temp_dir, polling_period = 10, job_wallclock = '01:00', max_jobs = 4, bidirectional = FALSE), - ecflow_suite_dir = "/esarchive/scratch/nperez/ecflow") # your path! + ecflow_suite_dir = ecflow_suite_dir, + wait = T) # Results dim(res$output1) diff --git a/man/Compute.Rd b/man/Compute.Rd index 7d6db4da4d82cf462cb8cb83d1dddfa5ab516a9f..5b03abd18895ba0332f7245be2046384486745ae 100644 --- a/man/Compute.Rd +++ b/man/Compute.Rd @@ -36,7 +36,7 @@ to use for the computation. The default value is 1.} \item{cluster}{A list of components that define the configuration of the machine to be run on. The comoponents vary from the different machines. -Check \href{https://earth.bsc.es/gitlab/es/startR/}{startR GitLab} for more +Check \href{https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/practical_guide.md}{Practical guide on GitLab} for more details and examples. Only needed when the computation is not run locally. The default value is NULL.} diff --git a/tests/testthat/test-Compute-chunk_depend_dim.R b/tests/testthat/test-Compute-chunk_depend_dim.R index a08b1e53926dba2e470c756283ebd43d288b2bc3..ce92b94e39f5a0f2bcdde360518592adf07a7605 100644 --- a/tests/testthat/test-Compute-chunk_depend_dim.R +++ b/tests/testthat/test-Compute-chunk_depend_dim.R @@ -8,8 +8,8 @@ context("Chunk over dimensions that have dependency relationship") -path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/hadgem3-gc31-mm/', - 'cmip6-dcppA-hindcast_i1p1/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/', +path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/', + 'HadGEM3-GC31-MM/dcppA-hindcast/', 'r1i1p1f2/Omon/tos/gn/v20200417/', '$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc') sdates <- c('2016', '2017', '2018') diff --git a/tests/testthat/test-Compute-irregular_regrid.R b/tests/testthat/test-Compute-irregular_regrid.R index 928fa493be57d7b9f975c57bbf229f986dab6d94..00a5c1d7f1b0afc5b39dc9addd970202866a8ecb 100644 --- a/tests/testthat/test-Compute-irregular_regrid.R +++ b/tests/testthat/test-Compute-irregular_regrid.R @@ -4,8 +4,8 @@ context("Irregular regriding in the workflow") test_that("1. ex2_13", { -path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/cmcc-cm2-sr5/cmip6-dcppA-hindcast_i1p1/', - 'DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/$member$/Omon/$var$/gn/v20210312/', +path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/', + 'DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/$member$/Omon/$var$/gn/v20200101/', '$var$_*_s$sdate$-$member$_gn_$aux$.nc') suppressWarnings( data <- Start(dataset = path, @@ -13,28 +13,27 @@ data <- Start(dataset = path, sdate = '1960', aux = 'all', aux_depends = 'sdate', - j = indices(2:361), # remove two indices to avoid white strips - i = indices(2:291), # remove two indices to avoid white strips + x = indices(2:361), # remove two indices to avoid white strips + y = indices(2:291), # remove two indices to avoid white strips time = indices(1), member = 'r1i1p1f1', - return_vars = list(j = NULL, i = NULL, - latitude = NULL, longitude = NULL), +# synonims = list(j = c(j, x), i = c(i, y)), + return_vars = list(#x = NULL, y = NULL, + nav_lat = NULL, nav_lon = NULL), retrieve = F) ) func_regrid <- function(data) { - lons <- attr(data, 'Variables')$common$longitude - lats <- attr(data, 'Variables')$common$latitude - data <- s2dv::CDORemap(data, lons, lats, grid = 'r360x180', + lons <- attr(data, 'Variables')$common$nav_lon + lats <- attr(data, 'Variables')$common$nav_lat + data <- s2dv::CDORemap(data, lons[2:361, 2:291], lats[2:361, 2:291], grid = 'r360x180', method = 'bil', crop = FALSE) lons_reg <- data[['lons']] lats_reg <- data[['lats']] return(list(data = data[[1]], lats = lats_reg, lons = lons_reg)) } -#NOTE: The data transposes if target_dims are only 'j' and 'i'. -# If only 'j' and 'i', output_dims will be 'lat', 'lon'. step <- Step(fun = func_regrid, - target_dims = list(data = c('j', 'i')), + target_dims = list(data = c('x', 'y')), output_dims = list(data = c('lon', 'lat'), lats = 'lat', lons = 'lon'), use_attributes = list(data = "Variables")) @@ -55,11 +54,11 @@ c(lon = 360, dataset = 1, var = 1, sdate = 1, aux = 1, time = 1, member = 1) ) expect_equal( attr(data, 'Dimensions'), -c(dataset = 1, var = 1, sdate = 1, aux = 1, j = 360, i = 290, time = 1, member = 1) +c(dataset = 1, var = 1, sdate = 1, aux = 1, x = 360, y = 290, time = 1, member = 1) ) expect_equal( mean(res$data, na.rm = T), -13.20951, +8.782398, tolerance = 0.0001 ) expect_equal( @@ -67,5 +66,10 @@ drop(res$data)[120,105:110], c(28.32521, 28.07044, 27.59033, 27.02514, 26.55184, 26.67090), tolerance = 0.0001 ) +expect_equal( +range(res$data), +c(-1.799982, 31.130471), +tolerance = 0.0001 +) }) diff --git a/tests/testthat/test-Compute-transform_all.R b/tests/testthat/test-Compute-transform_all.R index 2a676e44671e2d5fdb7e0b02be512d3ca344bedb..e6363f424629366284f1836f66dee47dde1d0b24 100644 --- a/tests/testthat/test-Compute-transform_all.R +++ b/tests/testthat/test-Compute-transform_all.R @@ -15,33 +15,36 @@ data <- Start(dat = path, member = indices(1:2), transform = CDORemapper, transform_extra_cells = 2, - transform_params = list(grid = 'r100x50', method = 'conservative', crop = FALSE), - transform_vars = c('lat','lon'), + transform_params = list(grid = 'r100x50', method = 'conservative'), + transform_vars = c('lat', 'lon'), synonims = list(lon = c('lon', 'longitude'), lat = c('lat', 'latitude')), return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'), retrieve = FALSE) ) - func <- function(x) { - a <- mean(x, na.rm = TRUE) - return(a) - } - step <- Step(func, target_dims = c('time'), - output_dims = NULL) - wf <- AddStep(data, step) +func <- function(x) { + return(x) +} + +step <- Step(func, target_dims = c('time'), + output_dims = 'time') +wf <- AddStep(data, step) + +#--- suppressWarnings( - res <- Compute(wf, - chunks = list(member = 2)) +resT <- eval(data) +) +suppressWarnings( +res1 <- Compute(wf, chunks = list(member = 2)) ) expect_equal( -dim(res$output1), -c(dat = 1, var = 1, sdate = 1, lat = 50, lon = 100, fyear = 1, member = 2) +dim(res1$output1), +c(time = 2, dat = 1, var = 1, sdate = 1, lat = 50, lon = 100, fyear = 1, member = 2) ) expect_equal( -res$output1[1, 1, 1, 10:12, 20, 1, 1], -c(274.2808, 275.8509, 277.7623), -tolerance = 0.0001 +as.vector(resT), +as.vector(res1$output1) ) }) @@ -49,8 +52,7 @@ tolerance = 0.0001 test_that("2. chunk along lon", { #skip_on_cran() -#!!!!!!!!!!!!!!!!!!!NOTE: the results are not identical when exp has extra cells = 2!!!!!!!!!!!!!!!!!! -# But exp2 (retrieve = T) has the same results with extra_cells = 2 and 8. +#NOTE: the results are not identical when exp has extra cells = 2 suppressWarnings( exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', @@ -64,8 +66,8 @@ exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$ longitude_reorder = CircularSort(0, 360), transform = CDORemapper, transform_params = list(grid = 'r100x50', - method = 'con', - crop = FALSE), + method = 'con'), +# crop = FALSE), transform_vars = c('latitude', 'longitude'), transform_extra_cells = 8, synonims = list(latitude = c('lat', 'latitude'), @@ -81,46 +83,35 @@ exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$ } step <- Step(func, target_dims = 'time', output_dims = 'time') wf <- AddStep(exp, step) + +#--- suppressWarnings( - res <- Compute(wf, chunks = list(longitude = 2)) +resT <- eval(exp) ) suppressWarnings( - res2 <- Compute(wf, chunks = list(ensemble = 1)) +res1 <- Compute(wf, chunks = list(longitude = 2))$output1 ) - -expect_equal( -res$output1, -res2$output1 +#suppressWarnings( +# res2 <- Compute(wf, chunks = list(ensemble = 1)) +#) +suppressWarnings( +res3 <- Compute(wf, chunks = list(longitude = 2, latitude = 2))$output1 ) - -# Check with retrieve = TRUE suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', - var = 'tas', - sdate = '20000101', - ensemble = indices(1), - time = indices(1), - latitude = 'all', - latitude_reorder = Sort(), - longitude = 'all', - longitude_reorder = CircularSort(0, 360), - transform = CDORemapper, - transform_params = list(grid = 'r100x50', - method = 'con', - crop = FALSE), - transform_vars = c('latitude', 'longitude'), - transform_extra_cells = 2, - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('longitude', 'lon')), - return_vars = list(latitude = 'dat',#NULL, - longitude = 'dat',#NULL, - time = 'sdate'), - retrieve = T) +res4 <- Compute(wf, chunks = list(longitude = 3))$output1 ) expect_equal( -as.vector(res$output1), -as.vector(exp2) +as.vector(resT), +as.vector(res1) +) +expect_equal( +res1, +res3 +) +expect_equal( +res1, +res4 ) }) diff --git a/tests/testthat/test-Compute-transform_indices.R b/tests/testthat/test-Compute-transform_indices.R index 3d7bb7d33174ebdf0af9e2f962e6d43bbea0a091..34ddf4854c8b7823eccb254098f398fd89273add 100644 --- a/tests/testthat/test-Compute-transform_indices.R +++ b/tests/testthat/test-Compute-transform_indices.R @@ -40,8 +40,8 @@ exp <- Start(dat = path, lon_reorder = CircularSort(0, 360), transform = CDORemapper, transform_extra_cells = 8, - transform_params = list(grid = 'r100x50', method = 'conservative', - crop = c(0, 360, -90, 90)), + transform_params = list(grid = 'r100x50', method = 'conservative'), +# crop = c(0, 360, -90, 90)), transform_vars = c('lat','lon'), synonims = list(lon = c('lon', 'longitude'), lat = c('lat', 'latitude')), return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'), @@ -53,20 +53,24 @@ func <- function(x) { step <- Step(func, target_dims = 'time', output_dims = 'time') wf <- AddStep(exp, step) +#--- suppressWarnings( -res1 <- Compute(wf, chunks = list(lon = 2)) +resT <- eval(exp) ) suppressWarnings( -res2 <- Compute(wf, chunks = list(ensemble = 1)) +res1 <- Compute(wf, chunks = list(lon = 2)) ) +#suppressWarnings( +#res2 <- Compute(wf, chunks = list(ensemble = 1)) +#) suppressWarnings( res3 <- Compute(wf, chunks = list(lon = 3)) ) expect_equal( -res1$output1, -res2$output1 +as.vector(res1$output1), +as.vector(resT) ) expect_equal( res1$output1, @@ -88,8 +92,8 @@ exp <- Start(dat = path, lon_reorder = CircularSort(0, 360), transform = CDORemapper, transform_extra_cells = 8, - transform_params = list(grid = 'r100x50', method = 'conservative', - crop = c(0, 360, -90, 90)), + transform_params = list(grid = 'r100x50', method = 'conservative'), +# crop = c(0, 360, -90, 90)), transform_vars = c('lat','lon'), synonims = list(lon = c('lon', 'longitude'), lat = c('lat', 'latitude')), return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'), @@ -101,6 +105,7 @@ func <- function(x) { step <- Step(func, target_dims = 'time', output_dims = 'time') wf <- AddStep(exp, step) +#--- suppressWarnings( res1_list <- Compute(wf, chunks = list(lon = 2)) ) @@ -109,116 +114,6 @@ res1$output1, res1_list$output1 ) -#----------------------------------- -# crop = FALSE -suppressWarnings( -exp <- Start(dat = path, - var = 'tas', - sdate = '20000101', - ensemble = indices(1), - time = indices(1), - lat = indices(1:640), - lon = indices(1:1296), - lat_reorder = Sort(), - lon_reorder = CircularSort(0, 360), - transform = CDORemapper, - transform_extra_cells = 8, - transform_params = list(grid = 'r100x50', method = 'conservative', - crop = FALSE), - transform_vars = c('lat','lon'), - synonims = list(lon = c('lon', 'longitude'), lat = c('lat', 'latitude')), - return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'), - retrieve = F) -) -func <- function(x) { - return(x) -} -step <- Step(func, target_dims = 'time', output_dims = 'time') -wf <- AddStep(exp, step) - -suppressWarnings( -res_crop_F_1 <- Compute(wf, chunks = list(lon = 2)) -) -suppressWarnings( -res_crop_F_2 <- Compute(wf, chunks = list(ensemble = 1)) -) -suppressWarnings( -res_crop_F_3 <- Compute(wf, chunks = list(lon = 3)) -) - -expect_equal( -res1$output1, -res_crop_F_1$output1 -) -expect_equal( -res_crop_F_1$output1, -res_crop_F_2$output1 -) -expect_equal( -res_crop_F_1$output1, -res_crop_F_3$output1 -) - -#--------------------------------------------- -#!!!!!!!!!!!!!!!!!!!!Problem when global + crop = T + chunk along lon!!!!!!!!!!!!!!!! -# crop = TRUE -suppressWarnings( -exp <- Start(dat = path, - var = 'tas', - sdate = '20000101', - ensemble = indices(1), - time = indices(1), - lat = indices(1:640), - lon = indices(1:1296), - lat_reorder = Sort(), - lon_reorder = CircularSort(0, 360), - transform = CDORemapper, - transform_extra_cells = 8, - transform_params = list(grid = 'r100x50', method = 'conservative', - crop = TRUE), - transform_vars = c('lat','lon'), - synonims = list(lon = c('lon', 'longitude'), lat = c('lat', 'latitude')), - return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'), - retrieve = F) -) -func <- function(x) { - return(x) -} -step <- Step(func, target_dims = 'time', output_dims = 'time') -wf <- AddStep(exp, step) - -#WRONG?????? -suppressWarnings( -res_crop_T_1 <- Compute(wf, chunks = list(lon = 2)) -) -suppressWarnings( -res_crop_T_2 <- Compute(wf, chunks = list(ensemble = 1)) -) -#WRONG????? -suppressWarnings( -res_crop_T_3 <- Compute(wf, chunks = list(lon = 3)) -) -suppressWarnings( -res_crop_T_4 <- Compute(wf, chunks = list(lat = 2)) -) - -expect_equal( -res1$output1, -res_crop_T_1$output1 -) -expect_equal( -res1$output1, -res_crop_T_2$output1 -) -expect_equal( -res1$output1, -res_crop_T_3$output1 -) -expect_equal( -res1$output1, -res_crop_T_4$output1 -) - }) @@ -281,149 +176,45 @@ func <- function(x) { step <- Step(func, target_dims = 'time', output_dims = 'time') wf <- AddStep(exp, step) +#--- suppressWarnings( -res <- Compute(wf, chunks = list(lon = 2)) +resT <- eval(exp) ) suppressWarnings( -res2 <- Compute(wf, chunks = list(ensemble = 1)) +res1 <- Compute(wf, chunks = list(lon = 2)) ) +#suppressWarnings( +#res2 <- Compute(wf, chunks = list(ensemble = 1)) +#) suppressWarnings( res3 <- Compute(wf, chunks = list(lon = 3)) ) - expect_equal( -res$output1, -res2$output1 +as.vector(res1$output1), +as.vector(resT) ) expect_equal( -res$output1, +res1$output1, res3$output1 ) expect_equal( -drop(res$output1)[, 1], +drop(res1$output1)[, 1], c(241.4042, 242.5804, 246.8507, 245.8008, 246.4318, 267.0983), tolerance = 0.001 ) expect_equal( -drop(res$output1)[, 2], +drop(res1$output1)[, 2], c(241.2223, 242.2564, 245.9863, 244.5377, 244.8937, 266.5749), tolerance = 0.001 ) expect_equal( -drop(res$output1)[, 3], +drop(res1$output1)[, 3], c(241.0895, 242.1896, 245.3183, 243.1169, 243.9446, 266.4386), tolerance = 0.001 ) -#------------------------------------------------------ -# crop = FALSE -suppressWarnings( -exp <- Start(dat = path, - var = 'tas', - sdate = '20000101', - ensemble = indices(1), - time = indices(1), - lat = indices(1:80), # 1:80 = -89.78488:-67.58778 - lon = indices(19:65), # 19:65 = 5.00000:17.7777778 - lat_reorder = Sort(), - lon_reorder = CircularSort(0, 360), - transform = CDORemapper, - transform_extra_cells = 8, - transform_params = list(grid = 'r100x50', - method = 'conservative', - crop = FALSE), - transform_vars = c('lat','lon'), - synonims = list(lon = c('lon', 'longitude'), lat = c('lat', 'latitude')), - return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'), - retrieve = F) -) - -func <- function(x) { - return(x) -} -step <- Step(func, target_dims = 'time', output_dims = 'time') -wf <- AddStep(exp, step) - -suppressWarnings( -res_crop_F_1 <- Compute(wf, chunks = list(lon = 2)) -) -suppressWarnings( -res_crop_F_2 <- Compute(wf, chunks = list(ensemble = 1)) -) -suppressWarnings( -res_crop_F_3 <- Compute(wf, chunks = list(lon = 3)) -) - -expect_equal( -res$output1, -res_crop_F_1$output1 -) -expect_equal( -res$output1, -res_crop_F_2$output1 -) -expect_equal( -res$output1, -res_crop_F_3$output1 -) - - - -#------------------------------------------------------ -# crop = TRUE -suppressWarnings( -exp <- Start(dat = path, - var = 'tas', - sdate = '20000101', - ensemble = indices(1), - time = indices(1), - lat = indices(1:80), # 1:80 = -89.78488:-67.58778 - lon = indices(19:65), # 19:65 = 5.00000:17.7777778 - lat_reorder = Sort(), - lon_reorder = CircularSort(0, 360), - transform = CDORemapper, - transform_extra_cells = 8, - transform_params = list(grid = 'r100x50', - method = 'conservative', - crop = T), - transform_vars = c('lat','lon'), - synonims = list(lon = c('lon', 'longitude'), lat = c('lat', 'latitude')), - return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'), - retrieve = F) -) - -func <- function(x) { - return(x) -} -step <- Step(func, target_dims = 'time', output_dims = 'time') -wf <- AddStep(exp, step) - -suppressWarnings( -res_crop_T_1 <- Compute(wf, chunks = list(lat = 2, lon = 2)) -) -suppressWarnings( -res_crop_T_2 <- Compute(wf, chunks = list(ensemble = 1)) -) -suppressWarnings( -res_crop_T_3 <- Compute(wf, chunks = list(lon = 3)) -) - - -expect_equal( -res$output1, -res_crop_T_1$output1 -) -expect_equal( -res$output1, -res_crop_T_2$output1 -) -expect_equal( -res$output1, -res_crop_T_3$output1 -) - }) @@ -485,18 +276,21 @@ step <- Step(func, target_dims = 'time', output_dims = 'time') wf <- AddStep(exp, step) suppressWarnings( -res1 <- Compute(wf, chunks = list(lon = 2)) +resT <- eval(exp) ) suppressWarnings( -res2 <- Compute(wf, chunks = list(ensemble = 1)) +res1 <- Compute(wf, chunks = list(lon = 2)) ) +#suppressWarnings( +#res2 <- Compute(wf, chunks = list(ensemble = 1)) +#) suppressWarnings( res3 <- Compute(wf, chunks = list(lon = 3)) ) expect_equal( -res1$output1, -res2$output1 +as.vector(res1$output1), +as.vector(resT) ) expect_equal( res1$output1, @@ -556,13 +350,20 @@ func <- function(x) { step <- Step(func, target_dims = 'time', output_dims = 'time') wf <- AddStep(exp, step) +#--- +suppressWarnings( +resT <- eval(exp) +) suppressWarnings( res4 <- Compute(wf, chunks = list(lon = 2)) ) suppressWarnings( res5 <- Compute(wf, chunks = list(lat = 2)) ) - +expect_equal( +as.vector(res4$output1), +as.vector(resT) +) expect_equal( res4$output1, res5$output1 @@ -572,107 +373,4 @@ as.vector(drop(res1$output1)[6:1, ]), as.vector(drop(res4$output1)) ) -#------------------------------------------------------ -#NOTE_19/01/2022: crop is deprecated -## crop = FALSE -#suppressWarnings( -#exp <- Start(dat = path, -# var = 'tas', -# sdate = '20000101', -# ensemble = indices(1), -# time = indices(1), -# lat = indices(1:80), # 1:80 = -89.78488:-67.58778 -# lon = indices(1:65),# 1:65 = 0.00000:17.7777778 -# lat_reorder = Sort(), -# lon_reorder = CircularSort(0, 360), -# transform = CDORemapper, -# transform_extra_cells = 8, -# transform_params = list(grid = 'r100x50', method = 'conservative', -# crop = F), -# transform_vars = c('lat','lon'), -# synonims = list(lon = c('lon', 'longitude'), lat = c('lat', 'latitude')), -# return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'), -# retrieve = F) -#) -# -#func <- function(x) { -# return(x) -#} -#step <- Step(func, target_dims = 'time', output_dims = 'time') -#wf <- AddStep(exp, step) -# -#suppressWarnings( -#res_crop_F_1 <- Compute(wf, chunks = list(lon = 2)) -#) -#suppressWarnings( -#res_crop_F_2 <- Compute(wf, chunks = list(ensemble = 1)) -#) -#suppressWarnings( -#res_crop_F_3 <- Compute(wf, chunks = list(lon = 3)) -#) -# -#expect_equal( -#as.vector(res1$output1), -#as.vector(drop(res_crop_F_1$output1)[1:6, ]) -#) -#expect_equal( -#res_crop_F_1$output1, -#res_crop_F_2$output1 -#) -#expect_equal( -#res_crop_F_1$output1, -#res_crop_F_3$output1 -#) -# -##---------------------------------------------- -## crop = TRUE -#suppressWarnings( -#exp <- Start(dat = path, -# var = 'tas', -# sdate = '20000101', -# ensemble = indices(1), -# time = indices(1), -# lat = indices(1:80), # 1:80 = -89.78488:-67.58778 -# lon = indices(1:65),# 1:65 = 0.00000:17.7777778 -# lat_reorder = Sort(), -# lon_reorder = CircularSort(0, 360), -# transform = CDORemapper, -# transform_extra_cells = 8, -# transform_params = list(grid = 'r100x50', method = 'conservative', -# crop = T), -# transform_vars = c('lat','lon'), -# synonims = list(lon = c('lon', 'longitude'), lat = c('lat', 'latitude')), -# return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'), -# retrieve = F) -#) -# -#func <- function(x) { -# return(x) -#} -#step <- Step(func, target_dims = 'time', output_dims = 'time') -#wf <- AddStep(exp, step) -# -#suppressWarnings( -#res_crop_T_1 <- Compute(wf, chunks = list(lon = 2)) -#) -#suppressWarnings( -#res_crop_T_2 <- Compute(wf, chunks = list(ensemble = 1)) -#) -#suppressWarnings( -#res_crop_T_3 <- Compute(wf, chunks = list(lon = 3)) -#) -# -#expect_equal( -#res_crop_F_1$output1, -#res_crop_T_1$output1 -#) -#expect_equal( -#res_crop_T_1$output1, -#res_crop_T_2$output1 -#) -#expect_equal( -#res_crop_T_1$output1, -#res_crop_T_3$output1 -#) -# }) diff --git a/tests/testthat/test-Compute-transform_values.R b/tests/testthat/test-Compute-transform_values.R index 1029b60fc1b504557d30fb8653d6fe8d639db05a..e6b6c26c08711a6c73970df5330e33fce1aba80f 100644 --- a/tests/testthat/test-Compute-transform_values.R +++ b/tests/testthat/test-Compute-transform_values.R @@ -29,8 +29,8 @@ exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$ longitude_reorder = CircularSort(0, 360), transform = CDORemapper, transform_params = list(grid = 'r100x50', - method = 'con', - crop = c(lons.min, lons.max, lats.min, lats.max)), + method = 'con'), +# crop = c(lons.min, lons.max, lats.min, lats.max)), transform_vars = c('latitude', 'longitude'), transform_extra_cells = 8, synonims = list(latitude = c('lat', 'latitude'), @@ -46,12 +46,18 @@ func <- function(exp) { step <- Step(func, target_dims = 'sdate', output_dims = 'sdate') wf <- AddStep(exp, step) + +#--- suppressWarnings( -res1 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 +resT <- eval(exp) ) + suppressWarnings( -res2 <- Compute(wf, chunks = list(ensemble = 1))$output1 +res1 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 ) +#suppressWarnings( +#res2 <- Compute(wf, chunks = list(ensemble = 1))$output1 +#) suppressWarnings( res3 <- Compute(wf, chunks = list(longitude = 3))$output1 ) @@ -61,8 +67,8 @@ dim(res1), c(sdate = 1, dat = 1, var = 1, ensemble = 1, time = 1, latitude = 50, longitude = 100) ) expect_equal( -res1, -res2 +as.vector(res1), +as.vector(resT) ) expect_equal( res1, @@ -112,8 +118,8 @@ exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$ longitude_reorder = CircularSort(0, 360), transform = CDORemapper, transform_params = list(grid = 'r100x50', - method = 'con', - crop = c(lons.min, lons.max, lats.min, lats.max)), + method = 'con'), +# crop = c(lons.min, lons.max, lats.min, lats.max)), transform_vars = c('latitude', 'longitude'), transform_extra_cells = 8, synonims = list(latitude = c('lat', 'latitude'), @@ -129,6 +135,8 @@ func <- function(exp) { step <- Step(func, target_dims = 'sdate', output_dims = 'sdate') wf <- AddStep(exp, step) + +#--- suppressWarnings( res1_vector <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 ) @@ -156,8 +164,8 @@ exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$ longitude_reorder = CircularSort(-180, 180), transform = CDORemapper, transform_params = list(grid = 'r100x50', - method = 'con', - crop = c(lons.min, lons.max, lats.min, lats.max)), + method = 'con'), +# crop = c(lons.min, lons.max, lats.min, lats.max)), transform_vars = c('latitude', 'longitude'), transform_extra_cells = 8, synonims = list(latitude = c('lat', 'latitude'), @@ -173,12 +181,17 @@ func <- function(exp) { step <- Step(func, target_dims = 'sdate', output_dims = 'sdate') wf <- AddStep(exp, step) + +#--- suppressWarnings( -res1_180 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 +resT_180 <- eval(exp) ) suppressWarnings( -res2_180 <- Compute(wf, chunks = list(ensemble = 1))$output1 +res1_180 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 ) +#suppressWarnings( +#res2_180 <- Compute(wf, chunks = list(ensemble = 1))$output1 +#) suppressWarnings( res3_180 <- Compute(wf, chunks = list(longitude = 3))$output1 ) @@ -189,139 +202,14 @@ as.vector(res1_180), tolerance = 0.0001 ) expect_equal( -res1_180, -res2_180 +as.vector(res1_180), +as.vector(resT_180) ) expect_equal( res1_180, res3_180 ) -#============================================================ - -# crop = FALSE -lons.min <- 0 -lons.max <- 359.9 -lats.min <- -90 -lats.max <- 90 - -suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', - var = 'tas', - sdate = '20000101', - ensemble = indices(1), - time = indices(1), - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort(0, 360), - transform = CDORemapper, - transform_params = list(grid = 'r100x50', - method = 'con', - crop = FALSE), - transform_vars = c('latitude', 'longitude'), - transform_extra_cells = 8, - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('longitude', 'lon')), - return_vars = list(latitude = NULL, - longitude = NULL, - time = 'sdate'), - retrieve= F) -) -func <- function(exp) { - return(exp) -} -step <- Step(func, - target_dims = 'sdate', output_dims = 'sdate') -wf <- AddStep(exp, step) -suppressWarnings( -res_crop_F_1 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 -) -suppressWarnings( -res_crop_F_2 <- Compute(wf, chunks = list(ensemble = 1))$output1 -) -suppressWarnings( -res_crop_F_3 <- Compute(wf, chunks = list(longitude = 3))$output1 -) - -expect_equal( -res1, -res_crop_F_1 -) -expect_equal( -res_crop_F_1, -res_crop_F_2 -) -expect_equal( -res_crop_F_1, -res_crop_F_3 -) - -#------------------------------------------------------- -#!!!!!!!!!!!!!!!!!!!!Problem when global + crop = T + chunk along lon!!!!!!!!!!!!!!!! - -# crop = TRUE -suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', - var = 'tas', - sdate = '20000101', - ensemble = indices(1), - time = indices(1), - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort(0, 360), - transform = CDORemapper, - transform_params = list(grid = 'r100x50', - method = 'con', - crop = TRUE), - transform_vars = c('latitude', 'longitude'), - transform_extra_cells = 8, - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('longitude', 'lon')), - return_vars = list(latitude = NULL, - longitude = NULL, - time = 'sdate'), - retrieve= F) -) -func <- function(exp) { - return(exp) -} -step <- Step(func, - target_dims = 'sdate', output_dims = 'sdate') -wf <- AddStep(exp, step) -#WRONG? -suppressWarnings( -res_crop_T_1 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 -) -suppressWarnings( -res_crop_T_2 <- Compute(wf, chunks = list(ensemble = 1))$output1 -) -#WRONG? -suppressWarnings( -res_crop_T_3 <- Compute(wf, chunks = list(longitude = 3))$output1 -) -suppressWarnings( -res_crop_T_4 <- Compute(wf, chunks = list(latitude = 3))$output1 -) - -expect_equal( -res1, -res_crop_T_1 -) -expect_equal( -res1, -res_crop_T_2 -) -expect_equal( -res1, -res_crop_T_3 -) -expect_equal( -res1, -res_crop_T_4 -) - }) ############################################################################ @@ -366,10 +254,10 @@ exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$ longitude_reorder = CircularSort(0, 360), transform = CDORemapper, transform_params = list(grid = 'r100x50', - method = 'con', - crop = c(lons.min, lons.max, lats.min, lats.max)), + method = 'con'), +# crop = c(lons.min, lons.max, lats.min, lats.max)), transform_vars = c('latitude', 'longitude'), - transform_extra_cells = 8, + transform_extra_cells = 16, #8, synonims = list(latitude = c('lat', 'latitude'), longitude = c('longitude', 'lon')), return_vars = list(latitude = NULL, #'dat', @@ -383,12 +271,17 @@ func <- function(exp) { step <- Step(func, target_dims = 'sdate', output_dims = 'sdate') wf <- AddStep(exp, step) + +#--- suppressWarnings( -res1 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 +resT <- eval(exp) ) suppressWarnings( -res2 <- Compute(wf, chunks = list(ensemble = 1))$output1 +res1 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 ) +#suppressWarnings( +#res2 <- Compute(wf, chunks = list(ensemble = 1))$output1 +#) suppressWarnings( res3 <- Compute(wf, chunks = list(longitude = 3))$output1 ) @@ -398,8 +291,8 @@ dim(res1), c(sdate = 1, dat = 1, var = 1, ensemble = 1, time = 1, latitude = 5, longitude = 3) ) expect_equal( -res1, -res2 +as.vector(res1), +as.vector(resT) ) expect_equal( res1, @@ -424,114 +317,6 @@ tolerance = 0.0001 #------------------------------------------------------- -# crop = FALSE -suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', - var = 'tas', - sdate = '20000101', - ensemble = indices(1), - time = indices(1), - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort(0, 360), - transform = CDORemapper, - transform_params = list(grid = 'r100x50', - method = 'con', - crop = FALSE), - transform_vars = c('latitude', 'longitude'), - transform_extra_cells = 8, - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('longitude', 'lon')), - return_vars = list(latitude = NULL, - longitude = NULL, - time = 'sdate'), - retrieve= F) -) -func <- function(exp) { - return(exp) -} -step <- Step(func, - target_dims = 'sdate', output_dims = 'sdate') -wf <- AddStep(exp, step) -suppressWarnings( -res_crop_F_1 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 -) -suppressWarnings( -res_crop_F_2 <- Compute(wf, chunks = list(ensemble = 1))$output1 -) -suppressWarnings( -res_crop_F_3 <- Compute(wf, chunks = list(longitude = 3))$output1 -) - -expect_equal( -res1, -res_crop_F_1 -) -expect_equal( -res_crop_F_1, -res_crop_F_2 -) -expect_equal( -res_crop_F_1, -res_crop_F_3 -) - -#------------------------------------------------------- - -# crop = TRUE -suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', - var = 'tas', - sdate = '20000101', - ensemble = indices(1), - time = indices(1), - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort(0, 360), - transform = CDORemapper, - transform_params = list(grid = 'r100x50', - method = 'con', - crop = TRUE), - transform_vars = c('latitude', 'longitude'), - transform_extra_cells = 8, - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('longitude', 'lon')), - return_vars = list(latitude = NULL, - longitude = NULL, - time = 'sdate'), - retrieve= F) -) -func <- function(exp) { - return(exp) -} -step <- Step(func, - target_dims = 'sdate', output_dims = 'sdate') -wf <- AddStep(exp, step) -suppressWarnings( -res_crop_T_1 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 -) -suppressWarnings( -res_crop_T_2 <- Compute(wf, chunks = list(ensemble = 1))$output1 -) -suppressWarnings( -res_crop_T_3 <- Compute(wf, chunks = list(longitude = 3))$output1 -) - -expect_equal( -res1, -res_crop_T_1 -) -expect_equal( -res_crop_T_1, -res_crop_T_2 -) -expect_equal( -res_crop_T_1, -res_crop_T_3 -) - # The region borders do not exist in the original grid value. For example, # the original grid is [longitude = 1296], so 11 and 21 do not exist there # (but 10 and 20 do, in the cases above) @@ -553,8 +338,8 @@ exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$ longitude_reorder = CircularSort(0, 360), transform = CDORemapper, transform_params = list(grid = 'r100x50', - method = 'con', - crop = c(lons.min, lons.max, lats.min, lats.max)), + method = 'con'), +# crop = c(lons.min, lons.max, lats.min, lats.max)), transform_vars = c('latitude', 'longitude'), transform_extra_cells = 8, synonims = list(latitude = c('lat', 'latitude'), @@ -570,12 +355,17 @@ func <- function(exp) { step <- Step(func, target_dims = 'sdate', output_dims = 'sdate') wf <- AddStep(exp, step) + +#--- suppressWarnings( -res1 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 +resT <- eval(exp) ) suppressWarnings( -res2 <- Compute(wf, chunks = list(ensemble = 1))$output1 +res1 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 ) +#suppressWarnings( +#res2 <- Compute(wf, chunks = list(ensemble = 1))$output1 +##) suppressWarnings( res3 <- Compute(wf, chunks = list(latitude = 3))$output1 ) @@ -585,8 +375,8 @@ dim(res1), c(sdate = 1, dat = 1, var = 1, ensemble = 1, time = 1, latitude = 5, longitude = 2) ) expect_equal( -res1, -res2 +as.vector(res1), +as.vector(resT) ) expect_equal( res1, @@ -635,6 +425,7 @@ lons.max <- 20 lats.min <- 20 lats.max <- 40 +#NOTE: transform_extra_cells = 8 the results are not equal # crop = region suppressWarnings( exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', @@ -648,10 +439,10 @@ exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$ longitude_reorder = CircularSort(0, 360), transform = CDORemapper, transform_params = list(grid = 'r100x50', - method = 'con', - crop = c(lons.min, lons.max, lats.min, lats.max)), + method = 'con'), +# crop = c(lons.min, lons.max, lats.min, lats.max)), transform_vars = c('latitude', 'longitude'), - transform_extra_cells = 8, + transform_extra_cells = 16, #8, synonims = list(latitude = c('lat', 'latitude'), longitude = c('longitude', 'lon')), return_vars = list(latitude = NULL, @@ -665,12 +456,17 @@ func <- function(exp) { step <- Step(func, target_dims = 'sdate', output_dims = 'sdate') wf <- AddStep(exp, step) + +#--- suppressWarnings( -res1 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 +resT <- eval(exp) ) suppressWarnings( -res2 <- Compute(wf, chunks = list(ensemble = 1))$output1 +res1 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 ) +#suppressWarnings( +#res2 <- Compute(wf, chunks = list(ensemble = 1))$output1 +#) suppressWarnings( res3 <- Compute(wf, chunks = list(longitude = 3))$output1 ) @@ -680,8 +476,8 @@ dim(res1), c(sdate = 1, dat = 1, var = 1, ensemble = 1, time = 1, latitude = 5, longitude = 6) ) expect_equal( -res1, -res2 +as.vector(res1), +as.vector(resT) ) expect_equal( res1, @@ -723,6 +519,7 @@ tolerance = 0.001 #-------------------------------------------------------------- # crop = region, CircularSort(-180, 180) +#NOTE: transform_extra_cells = 8 the results are not equal suppressWarnings( exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', var = 'tas', @@ -735,10 +532,10 @@ exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$ longitude_reorder = CircularSort(-180, 180), transform = CDORemapper, transform_params = list(grid = 'r100x50', - method = 'con', - crop = c(lons.min, lons.max, lats.min, lats.max)), + method = 'con'), +# crop = c(lons.min, lons.max, lats.min, lats.max)), transform_vars = c('latitude', 'longitude'), - transform_extra_cells = 8, + transform_extra_cells = 16, #8, synonims = list(latitude = c('lat', 'latitude'), longitude = c('longitude', 'lon')), return_vars = list(latitude = NULL, @@ -752,12 +549,17 @@ func <- function(exp) { step <- Step(func, target_dims = 'sdate', output_dims = 'sdate') wf <- AddStep(exp, step) + +#--- suppressWarnings( -res1_180 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 +resT_180 <- eval(exp) ) suppressWarnings( -res2_180 <- Compute(wf, chunks = list(ensemble = 1))$output1 +res1_180 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 ) +#suppressWarnings( +#res2_180 <- Compute(wf, chunks = list(ensemble = 1))$output1 +#) suppressWarnings( res3_180 <- Compute(wf, chunks = list(longitude = 3))$output1 ) @@ -767,8 +569,8 @@ res1, res1_180 ) expect_equal( -res1_180, -res2_180 +as.vector(res1_180), +as.vector(resT_180) ) expect_equal( res1_180, @@ -777,220 +579,4 @@ res3_180 #================================================================ -# crop = FALSE -suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', - var = 'tas', - sdate = '20000101', - ensemble = indices(1), - time = indices(1), - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort(0, 360), - transform = CDORemapper, - transform_params = list(grid = 'r100x50', - method = 'con', - crop = FALSE), - transform_vars = c('latitude', 'longitude'), - transform_extra_cells = 8, - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('longitude', 'lon')), - return_vars = list(latitude = NULL, - longitude = NULL, - time = 'sdate'), - retrieve= F) -) -func <- function(exp) { - return(exp) -} -step <- Step(func, - target_dims = 'sdate', output_dims = 'sdate') -wf <- AddStep(exp, step) -suppressWarnings( -res_crop_F_1 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 -) -suppressWarnings( -res_crop_F_2 <- Compute(wf, chunks = list(ensemble = 1))$output1 -) -suppressWarnings( -res_crop_F_3 <- Compute(wf, chunks = list(longitude = 3))$output1 -) - -expect_equal( -res1, -res_crop_F_1 -) -expect_equal( -res_crop_F_1, -res_crop_F_2 -) -expect_equal( -res_crop_F_1, -res_crop_F_3 -) - -#------------------------------------------------------- - -# crop = FALSE, CircularSort(-180, 180) -suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', - var = 'tas', - sdate = '20000101', - ensemble = indices(1), - time = indices(1), - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort(-180, 180), - transform = CDORemapper, - transform_params = list(grid = 'r100x50', - method = 'con', - crop = FALSE), - transform_vars = c('latitude', 'longitude'), - transform_extra_cells = 8, - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('longitude', 'lon')), - return_vars = list(latitude = NULL, - longitude = NULL, - time = 'sdate'), - retrieve = F) -) -func <- function(exp) { - return(exp) -} -step <- Step(func, - target_dims = 'sdate', output_dims = 'sdate') -wf <- AddStep(exp, step) -suppressWarnings( -res_crop_F_1_180 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 -) -suppressWarnings( -res_crop_F_2_180 <- Compute(wf, chunks = list(ensemble = 1))$output1 -) -suppressWarnings( -res_crop_F_3_180 <- Compute(wf, chunks = list(longitude = 3))$output1 -) - -expect_equal( -res1, -res_crop_F_1_180 -) -expect_equal( -res_crop_F_1, -res_crop_F_2_180 -) -expect_equal( -res_crop_F_1, -res_crop_F_3_180 -) - -#=========================================================== - -# crop = TRUE -suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', - var = 'tas', - sdate = '20000101', - ensemble = indices(1), - time = indices(1), - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort(0, 360), - transform = CDORemapper, - transform_params = list(grid = 'r100x50', - method = 'con', - crop = TRUE), - transform_vars = c('latitude', 'longitude'), - transform_extra_cells = 8, - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('longitude', 'lon')), - return_vars = list(latitude = NULL, - longitude = NULL, - time = 'sdate'), - retrieve= F) -) -func <- function(exp) { - return(exp) -} -step <- Step(func, - target_dims = 'sdate', output_dims = 'sdate') -wf <- AddStep(exp, step) -suppressWarnings( -res_crop_T_1 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 -) -suppressWarnings( -res_crop_T_2 <- Compute(wf, chunks = list(ensemble = 1))$output1 -) -suppressWarnings( -res_crop_T_3 <- Compute(wf, chunks = list(longitude = 3))$output1 -) - -expect_equal( -res1, -res_crop_T_1 -) -expect_equal( -res_crop_T_1, -res_crop_T_2 -) -expect_equal( -res_crop_T_1, -res_crop_T_3 -) -#-------------------------------------------------- -# crop = TRUE, CircularSort(-180, 180) -suppressWarnings( -exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', - var = 'tas', - sdate = '20000101', - ensemble = indices(1), - time = indices(1), - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = CircularSort(-180, 180), - transform = CDORemapper, - transform_params = list(grid = 'r100x50', - method = 'con', - crop = TRUE), - transform_vars = c('latitude', 'longitude'), - transform_extra_cells = 8, - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('longitude', 'lon')), - return_vars = list(latitude = NULL, - longitude = NULL, - time = 'sdate'), - retrieve = F) -) -func <- function(exp) { - return(exp) -} -step <- Step(func, - target_dims = 'sdate', output_dims = 'sdate') -wf <- AddStep(exp, step) -suppressWarnings( -res_crop_T_1_180 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 -) -suppressWarnings( -res_crop_T_2_180 <- Compute(wf, chunks = list(ensemble = 1))$output1 -) -suppressWarnings( -res_crop_T_3_180 <- Compute(wf, chunks = list(longitude = 3))$output1 -) - -expect_equal( -res1, -res_crop_T_1_180 -) -expect_equal( -res_crop_T_1, -res_crop_T_2_180 -) -expect_equal( -res_crop_T_1, -res_crop_T_3_180 -) - }) diff --git a/tests/testthat/test-Start-DCPP-across-depends.R b/tests/testthat/test-Start-DCPP-across-depends.R index 452a2306f7415d936719f03041c43bcb89769d2a..a3d95866a79d9fda53e2c6a594122eb8d7c6c743 100644 --- a/tests/testthat/test-Start-DCPP-across-depends.R +++ b/tests/testthat/test-Start-DCPP-across-depends.R @@ -1,7 +1,6 @@ context("DCPP successfull retrieved for depends and across parameters.") test_that("Chunks of DCPP files- Local execution", { - - path <- '/esarchive/exp/CMIP6/dcppA-hindcast/hadgem3-gc31-mm/cmip6-dcppA-hindcast_i1p1/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc' + path <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc' sdates <- c('2017', '2018') suppressWarnings( @@ -21,7 +20,7 @@ suppressWarnings( # [sdate = 2, chunk = 3] suppressWarnings( -dat_2018_chunk3 <- Start(dat = '/esarchive/exp/CMIP6/dcppA-hindcast/hadgem3-gc31-mm/cmip6-dcppA-hindcast_i1p1/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r1i1p1f2_gn_202201-202212.nc', +dat_2018_chunk3 <- Start(dat = '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r1i1p1f2_gn_202201-202212.nc', var = 'tos', time = 'all', i = indices(1:10), j = indices(1:10), retrieve = TRUE) ) @@ -29,7 +28,7 @@ expect_equal(dat[1,1,2,25:36,,], dat_2018_chunk3[1,1,,,]) # [sdate = 1, chunk = 2] suppressWarnings( -dat_2017_chunk2 <- Start(dat = '/esarchive/exp/CMIP6/dcppA-hindcast/hadgem3-gc31-mm/cmip6-dcppA-hindcast_i1p1/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2017-r1i1p1f2_gn_202001-202012.nc', +dat_2017_chunk2 <- Start(dat = '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2017-r1i1p1f2_gn_202001-202012.nc', var = 'tos', time = 'all', i = indices(1:10), j = indices(1:10), retrieve = TRUE) ) @@ -37,7 +36,7 @@ expect_equal(dat[1,1,1,13:24,,], dat_2017_chunk2[1,1,,,]) # [sdate = 2, chunk = 1] suppressWarnings( -dat_2018_chunk1 <- Start(dat = '/esarchive/exp/CMIP6/dcppA-hindcast/hadgem3-gc31-mm/cmip6-dcppA-hindcast_i1p1/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r1i1p1f2_gn_202001-202012.nc', +dat_2018_chunk1 <- Start(dat = '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r1i1p1f2_gn_202001-202012.nc', var = 'tos', time = 'all', i = indices(1:10), j = indices(1:10), retrieve = TRUE) ) diff --git a/tests/testthat/test-Start-calendar.R b/tests/testthat/test-Start-calendar.R index 53122c8368db45c9248beb5e4fc9eb66ed903fe7..fde9c5e49074a03fade98c02a9c9669f1b84317e 100644 --- a/tests/testthat/test-Start-calendar.R +++ b/tests/testthat/test-Start-calendar.R @@ -1,9 +1,8 @@ context("Start() different calendar") test_that("1. 360_day, daily, unit = 'days since 1850-01-01'", { - - path_hadgem3 <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/hadgem3-gc31-mm/', - 'cmip6-dcppA-hindcast_i1p1/DCPP/MOHC/HadGEM3-GC31-MM/', + path_hadgem3 <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast//HadGEM3-GC31-MM/', + 'DCPP/MOHC/HadGEM3-GC31-MM/', 'dcppA-hindcast/r1i1p1f2/day/$var$/gn/v20200101/', '$var$_day_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$fyear$.nc') @@ -48,7 +47,7 @@ expect_equal( }) test_that("2. 365_day, daily, unit = 'days since 1984-01-01'", { -path_bcc_csm2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/bcc-csm2-mr/cmip6-dcppA-hindcast_i1p1/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200114/$var$_day_BCC-CSM2-MR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_19800101-19891231.nc' +path_bcc_csm2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/BCC-CSM2-MR/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200114/$var$_day_BCC-CSM2-MR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_19800101-19891231.nc' suppressWarnings( data <- Start(dat = path_bcc_csm2, @@ -79,9 +78,8 @@ expect_equal( test_that("3. standard, daily, unit = 'days since 1850-1-1 00:00:00'", { - - path_mpi_esm <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/mpi-esm1-2-hr/', - 'cmip6-dcppA-hindcast_i1p1/DCPP/MPI-M/MPI-ESM1-2-HR/', + path_mpi_esm <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/', + 'DCPP/MPI-M/MPI-ESM1-2-HR/', 'dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200101/', '$var$_day_MPI-ESM1-2-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_$fyear$.nc') @@ -120,8 +118,8 @@ expect_equal( test_that("4. standard, monthly, unit = 'days since 1850-1-1 00:00:00'", { - path_mpi_esm <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/mpi-esm1-2-hr/', - 'cmip6-dcppA-hindcast_i1p1/DCPP/MPI-M/MPI-ESM1-2-HR/', + path_mpi_esm <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/', + 'DCPP/MPI-M/MPI-ESM1-2-HR/', 'dcppA-hindcast/r1i1p1f1/Amon/$var$/gn/v20200320/', '$var$_Amon_MPI-ESM1-2-HR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_200011-201012.nc') sdate <- '2000' @@ -213,7 +211,7 @@ suppressWarnings( expect_equal( attr(obs, 'Variables')$common$time[1], - as.POSIXct('2005-05-16 12:00:00', tz = 'UTC') + as.POSIXct('2005-05-01', tz = 'UTC') ) expect_equal( dim(attr(obs, 'Variables')$common$time), diff --git a/tests/testthat/test-Start-depends_values.R b/tests/testthat/test-Start-depends_values.R index 49114e7bd15cee0d0a77d480a5cfc90ed8328c0b..18d1b9f92024074ec756e2a938d18fe472efb5e8 100644 --- a/tests/testthat/test-Start-depends_values.R +++ b/tests/testthat/test-Start-depends_values.R @@ -5,7 +5,7 @@ context("Start() using values() to define dependency relations") -path <- '/esarchive/exp/CMIP6/dcppA-hindcast/hadgem3-gc31-mm/cmip6-dcppA-hindcast_i1p1/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc' +path <- '/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Omon/tos/gn/v20200417/$var$_Omon_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-r1i1p1f2_gn_$chunk$.nc' sdates <- c('2016', '2017', '2018') chunks <- array(dim = c(chunk = 3, sdate = 3)) diff --git a/tests/testthat/test-Start-indices_list_vector.R b/tests/testthat/test-Start-indices_list_vector.R index 0ad897a08b2e6b1677b3f85718e061a7284b5443..82e5cb1dc1183e3092460c59c6499549d925abd0 100644 --- a/tests/testthat/test-Start-indices_list_vector.R +++ b/tests/testthat/test-Start-indices_list_vector.R @@ -126,44 +126,18 @@ as.vector(exp2) # .. ..$ latitude : num [1:3(1d)] -81 -84.6 -88.2 -test_that("3. transform, indices reverse", { - -# lat and lon are lists of indices -suppressWarnings( -exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', - var = 'tas', - sdate = '20000101', - ensemble = indices(1), - time = indices(1), - latitude = indices(list(30, 1)), - latitude_reorder = Sort(), - longitude = indices(list(1, 40)), # can't reverse. Different meaning - longitude_reorder = CircularSort(0, 360), - transform = CDORemapper, - transform_params = list(grid = 'r100x50', - method = 'con', - crop = c(0, 11, -90, -81)), - transform_vars = c('latitude', 'longitude'), - transform_extra_cells = 8, - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('longitude', 'lon')), - return_vars = list(latitude = NULL, - longitude = NULL, - time = 'sdate'), - retrieve= T) -) - -# This test is not valid because it doesn't make sense to use longitude = 40:1. With the automatic "crop" values, the result is not correct. -## lat and lon are vectors of indices +#test_that("3. transform, indices reverse", { +# +## lat and lon are lists of indices #suppressWarnings( -#exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +#exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', # var = 'tas', # sdate = '20000101', # ensemble = indices(1), # time = indices(1), -# latitude = 30:1, +# latitude = indices(list(30, 1)), # latitude_reorder = Sort(), -# longitude = 40:1, +# longitude = indices(list(1, 40)), # can't reverse. Different meaning # longitude_reorder = CircularSort(0, 360), # transform = CDORemapper, # transform_params = list(grid = 'r100x50', @@ -179,12 +153,38 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var # retrieve= T) #) # -#expect_equal( -#as.vector(drop(exp1)[, 4:1]), -#as.vector(exp2) -#) - -}) +## This test is not valid because it doesn't make sense to use longitude = 40:1. With the automatic "crop" values, the result is not correct. +### lat and lon are vectors of indices +##suppressWarnings( +##exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +## var = 'tas', +## sdate = '20000101', +## ensemble = indices(1), +## time = indices(1), +## latitude = 30:1, +## latitude_reorder = Sort(), +## longitude = 40:1, +## longitude_reorder = CircularSort(0, 360), +## transform = CDORemapper, +## transform_params = list(grid = 'r100x50', +## method = 'con', +## crop = c(0, 11, -90, -81)), +## transform_vars = c('latitude', 'longitude'), +## transform_extra_cells = 8, +## synonims = list(latitude = c('lat', 'latitude'), +## longitude = c('longitude', 'lon')), +## return_vars = list(latitude = NULL, +## longitude = NULL, +## time = 'sdate'), +## retrieve= T) +##) +## +##expect_equal( +##as.vector(drop(exp1)[, 4:1]), +##as.vector(exp2) +##) +# +#}) ################################################################ ################################################################ diff --git a/tests/testthat/test-Start-metadata_dims.R b/tests/testthat/test-Start-metadata_dims.R index 3239e7de37b878030d776ea5019d15ceacf43fe6..5e4d97252c149516d72eb7e0b6380c60d6bc01fb 100644 --- a/tests/testthat/test-Start-metadata_dims.R +++ b/tests/testthat/test-Start-metadata_dims.R @@ -426,13 +426,13 @@ suppressWarnings( test_that("8. Two data sets, both have files but the first file is missing", { path_list <- list( MPI = list(name = 'MPI_ESM', - path = paste0('/esarchive/exp/CMIP6/dcppA-hindcast/mpi-esm1-2-hr/', - 'cmip6-dcppA-hindcast_i1p1/DCPP/MPI-M/MPI-ESM1-2-HR/', + path = paste0('/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/', + 'DCPP/MPI-M/MPI-ESM1-2-HR/', 'dcppA-hindcast/$member$/day/$var$/gn/v20200101/', '$var$_day_MPI-ESM1-2-HR_dcppA-hindcast_s$sdate$-$member$_gn_$chunk$.nc')), Had = list(name = 'HadGEM3', - path = paste0('/esarchive/exp/CMIP6/dcppA-hindcast/hadgem3-gc31-mm/', - 'cmip6-dcppA-hindcast_i1p1/DCPP/MOHC/HadGEM3-GC31-MM/', + path = paste0('/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/', + 'DCPP/MOHC/HadGEM3-GC31-MM/', 'dcppA-hindcast/$member$/day/$var$/gn/v20200101/', '$var$_day_HadGEM3-GC31-MM_dcppA-hindcast_s$sdate$-$member$_gn_$chunk$.nc'))) suppressWarnings( @@ -482,9 +482,9 @@ data <- Start(dataset = path_list, ) expect_equal( attr(data, 'Files'), - array(c("/esarchive/exp/CMIP6/dcppA-hindcast/mpi-esm1-2-hr/cmip6-dcppA-hindcast_i1p1/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/r1i1p1f1/day/tasmin/gn/v20200101/tasmin_day_MPI-ESM1-2-HR_dcppA-hindcast_s2018-r1i1p1f1_gn_20181101-20281231.nc", + array(c("/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/r1i1p1f1/day/tasmin/gn/v20200101/tasmin_day_MPI-ESM1-2-HR_dcppA-hindcast_s2018-r1i1p1f1_gn_20181101-20281231.nc", NA, NA, NA, NA, NA, NA, - "/esarchive/exp/CMIP6/dcppA-hindcast/hadgem3-gc31-mm/cmip6-dcppA-hindcast_i1p1/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r2i1p1f2/day/tasmin/gn/v20200101/tasmin_day_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r2i1p1f2_gn_20181101-20281230.nc"), + "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r2i1p1f2/day/tasmin/gn/v20200101/tasmin_day_HadGEM3-GC31-MM_dcppA-hindcast_s2018-r2i1p1f2_gn_20181101-20281230.nc"), dim = c(dataset = 2, var = 1, member = 2, sdate = 1, chunk = 2)) ) diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R index d97cd18f4f7654e41f741d76fb1411cd58d5bbaa..ac332cdc95e0608fa320706bf26e8ad4bddfcc5d 100644 --- a/tests/testthat/test-Start-metadata_reshaping.R +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -615,6 +615,45 @@ datesF, dates ) +# no return_vars +suppressWarnings( +data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_$file_date$.nc'), + var = 'tas', + file_date = file_date, #[syear = 3, smonth = 2] + time = indices(1:2), + latitude = indices(1), + longitude = indices(1), + split_multiselected_dims = TRUE, +# return_vars = list(latitude = NULL, +# longitude = NULL, +# time = 'file_date'), + retrieve = TRUE) +) +expect_equal( +names(attributes(data)$Variables$common), +'tas' +) + +suppressWarnings( +data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_$file_date$.nc'), + var = 'tas', + file_date = file_date, #[syear = 3, smonth = 2] + time = indices(1:2), + latitude = indices(1), + longitude = indices(1), + split_multiselected_dims = TRUE, +# return_vars = list(latitude = NULL, +# longitude = NULL, +# time = 'file_date'), + retrieve = FALSE) +) +expect_equal( +names(attributes(data)$Variables$common), +NULL +) + }) test_that("9. split file dim that contains 'time', and 'time' inner dim is implicit", { diff --git a/tests/testthat/test-Start-reorder-lat.R b/tests/testthat/test-Start-reorder-lat.R index 4133cf01e4b357350bb85dfc58212c472afc8060..2fe5de95b5f1c1dded90ad24f868e864c1704c92 100644 --- a/tests/testthat/test-Start-reorder-lat.R +++ b/tests/testthat/test-Start-reorder-lat.R @@ -872,13 +872,13 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## -test_that("1-3. Selector type: indices(list)", { - -}) +#test_that("1-3. Selector type: indices(list)", { +# +#}) ############################################## -test_that("1-4. Selector type: indices(vector)", { - -}) +#test_that("1-4. Selector type: indices(vector)", { +# +#}) ############################################## test_that("4-x-2-12-123-2-1-x", { diff --git a/tests/testthat/test-Start-reorder-latCoarse.R b/tests/testthat/test-Start-reorder-latCoarse.R index 4fc62ad03dbadae5c758a1012eac6e7593f8b8ce..6ca7b15c3c006db1137295003ce69ff8bc001bea 100644 --- a/tests/testthat/test-Start-reorder-latCoarse.R +++ b/tests/testthat/test-Start-reorder-latCoarse.R @@ -876,15 +876,15 @@ res <- Start(dat = list(list(path=path_exp)), }) ############################################## -test_that("1-3. Selector type: indices(list)", { - -}) +#test_that("1-3. Selector type: indices(list)", { +# +#}) ############################################## -test_that("1-4. Selector type: indices(vector)", { - -}) +#test_that("1-4. Selector type: indices(vector)", { +# +#}) ############################################## -test_that("1-4. Selector type: indices(vector)", { - -}) +#test_that("1-4. Selector type: indices(vector)", { +# +#}) diff --git a/tests/testthat/test-Start-transform-border.R b/tests/testthat/test-Start-transform-border.R index a84f1eca315b5929cc57da45ba3f305bbc6cdbb9..dee8b4e0582e007057df58d7deeeafbc6d66cd84 100644 --- a/tests/testthat/test-Start-transform-border.R +++ b/tests/testthat/test-Start-transform-border.R @@ -1,4 +1,6 @@ context("Transform: check with cdo") + +############################################## # This unit test checks different border situations: normal regional that doesn't touch the borders, # global situation that uses all the grids, or one side reaches the border. @@ -24,6 +26,7 @@ context("Transform: check with cdo") # The result of cdo is from CDO version 1.9.8. +############################################## test_that("1. normal regional situation", { lons.min <- 10 @@ -75,6 +78,7 @@ tolerance = 0.0001 ) }) +#--------------------------------------------- # The result is consistent with cdo result, using all the grid points to transform # and crop the region. @@ -86,8 +90,7 @@ tolerance = 0.0001 #[4,] 284.1387 287.3716 287.7389 #[5,] 285.6547 285.0194 286.1099 -#------------------------------------------------ - +############################################## test_that("2. global situation", { lons.min <- 0 @@ -145,6 +148,7 @@ tolerance = 0.0001 }) +#--------------------------------------------- #> drop(arr2$data_array)[,1:3] # [,1] [,2] [,3] #[1,] 286.4231 288.0916 285.7435 @@ -160,8 +164,7 @@ tolerance = 0.0001 #[4,] 288.6723 285.0679 279.6016 #[5,] 286.8253 282.6013 279.5081 -#----------------------------------------------- - +############################################## test_that("3. left side too close to border", { lons.min <- 0 @@ -229,7 +232,7 @@ tolerance = 0.0001 }) - +#--------------------------------------------- #drop(arr2$data_array)[,] # [,1] [,2] [,3] [,4] [,5] [,6] #[1,] 286.4231 288.0916 285.7435 284.9907 285.4820 286.1208 @@ -239,7 +242,7 @@ tolerance = 0.0001 #[5,] 284.3575 284.8728 284.6408 285.6547 285.0194 286.1099 - +############################################## test_that("4. right side too close to border", { lons.min <- 350 @@ -287,6 +290,7 @@ tolerance = 0.0001 }) +#--------------------------------------------- #> drop(arr2$data_array)[,] # [,1] [,2] #[1,] 286.3033 285.9373 @@ -295,8 +299,7 @@ tolerance = 0.0001 #[4,] 285.0679 279.6016 #[5,] 282.6013 279.5081 -#-------------------------------------------------- - +############################################## test_that("5. across meridian", { lons.min <- 170 @@ -359,6 +362,8 @@ tolerance = 0.0001 }) +#--------------------------------------------- + # cdo result is [170:190]; Start() is [-180:-170; 170:180]. # So drop(exp)[, 1:3] == drop(arr2$data_array)[, 3:5]. #drop(arr2$data_array)[,] @@ -370,6 +375,7 @@ tolerance = 0.0001 #[5,] 284.6429 284.6293 284.9630 285.7766 286.0924 +############################################## test_that("6. normal case; [-180, 180]", { # The lon range is too close to border for the original longitude [0, 360], but # is normal case for [-180, 180]. In zzz.R, it is counted as normal case, and the @@ -439,9 +445,8 @@ tolerance = 0.0001 }) -#---------------------------------------------------------- - -test_that("6. left side too close to border; [-180, 180]", { +############################################## +test_that("7. left side too close to border; [-180, 180]", { # The lon range is too close to border for the original longitude [0, 360], but # is normal case for [-180, 180]. In zzz.R, it is counted as normal case, and the # result is the same as 3. @@ -490,6 +495,7 @@ tolerance = 0.0001 }) +#--------------------------------------------- # cdo result is [181:190] #> drop(arr2$data_array)[,] # [,1] [,2] @@ -499,3 +505,208 @@ tolerance = 0.0001 #[4,] 289.5334 289.5931 #[5,] 285.7766 286.0924 + +############################################## +# Tests for global longitude: + +test_that("8. Global situation: left side too close to border; [0, 360]", { + +lats.min <- 0 +lats.max <- 10 +lons.min <- 0 +lons.max <- 359 + +suppressWarnings( + exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', + var = 'tas', + sdate = '20000101', + ensemble = indices(1), + time = indices(1), + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = CircularSort(0, 360), + transform = CDORemapper, + transform_params = list(grid = 'r360x180', + method = 'bilinear'), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = NULL, + longitude = NULL, + time = 'sdate'), + retrieve = TRUE) +) + +expect_equal( + dim(drop(exp)), + c(latitude = 10, longitude = 360) +) +expect_equal( + drop(exp)[,1], + c(299.8538, 300.0350, 300.2633, 300.4800, 300.6892, 300.2321, 300.1185, + 301.1927, 300.3126, 300.6474), + tolerance = 0.0001 +) +expect_equal( + drop(exp)[,2], + c(299.9356, 300.0557, 300.2042, 300.4571, 300.7495, 300.6388, 299.6622, + 299.2109, 299.4723, 299.5299), + tolerance = 0.0001 +) +expect_equal( + drop(exp)[,3], + c(300.0876, 300.1628, 300.2939, 300.5117, 300.6660, 300.5703, 300.0838, + 300.3170, 299.9515, 299.7573), + tolerance = 0.0001 +) +expect_equal( + drop(exp)[,4], + c(300.1547, 300.2518, 300.3484, 300.4062, 300.5299, 300.5294, 300.2701, + 300.1524, 299.4566, 299.0317), + tolerance = 0.0001 +) +expect_equal( + drop(exp)[,5], + c(300.1340, 300.1652, 300.3981, 300.5044, 300.5583, 300.5028, 300.4284, + 299.6214, 299.0601, 299.1104), + tolerance = 0.0001 +) + +}) + +#--------------------------------------------- +# The result is consistent with cdo result, using all the grid points to transform +# and crop the region: + +# path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +# file <- NcOpen(path) +# arr <- NcToArray(file, +# dim_indices = list(time = 1, ensemble = 1, +# latitude = 1:640, longitude = 1:1296), +# vars_to_read = 'tas') +# lats <- NcToArray(file, +# dim_indices = list(latitude = 1:640), vars_to_read = 'latitude') +# lons <- NcToArray(file, +# dim_indices = list(longitude = 1:1296), vars_to_read = 'longitude') +# NcClose(file) + +# arr3 <- s2dv::CDORemap(arr, lons = as.vector(lons), lats = as.vector(lats), +# grid = 'r360x180', method = 'bilinear', crop = c(0,359,0,10)) + +# drop(arr3$data_array)[,1:5] + +# [,1] [,2] [,3] [,4] [,5] +# [1,] 299.8538 299.9356 300.0876 300.1547 300.1340 +# [2,] 300.0350 300.0557 300.1628 300.2518 300.1652 +# [3,] 300.2633 300.2042 300.2939 300.3484 300.3981 +# [4,] 300.4800 300.4571 300.5117 300.4062 300.5044 +# [5,] 300.6892 300.7495 300.6660 300.5299 300.5583 +# [6,] 300.2321 300.6388 300.5703 300.5294 300.5028 +# [7,] 300.1185 299.6622 300.0838 300.2701 300.4284 +# [8,] 301.1927 299.2109 300.3170 300.1524 299.6214 +# [9,] 300.3126 299.4723 299.9515 299.4566 299.0601 +# [10,] 300.6474 299.5299 299.7573 299.0317 299.1104 + +############################################## +test_that("9. Global situation: right side too close to border; [0.5, 359.9]", { + +lats.min <- 0 +lats.max <- 10 +lons.min <- 0.5 +lons.max <- 359.9 + +suppressWarnings( +exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', + var = 'tas', + sdate = '20000101', + ensemble = indices(1), + time = indices(1), + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = CircularSort(0, 360), + transform = CDORemapper, + transform_params = list(grid = 'r360x180', + method = 'bilinear'), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = NULL, + longitude = NULL, + time = 'sdate'), + retrieve = TRUE) +) + + +expect_equal( + dim(drop(exp)), + c(latitude = 10, longitude = 359) +) +expect_equal( + drop(exp)[,1], + c(299.9356, 300.0557, 300.2042, 300.4571, 300.7495, 300.6388, 299.6622, + 299.2109, 299.4723, 299.5299), + tolerance = 0.0001 +) +expect_equal( + drop(exp)[,2], + c(300.0876, 300.1628, 300.2939, 300.5117, 300.6660, 300.5703, 300.0838, + 300.3170, 299.9515, 299.7573), + tolerance = 0.0001 +) +expect_equal( + drop(exp)[,3], + c(300.1547, 300.2518, 300.3484, 300.4062, 300.5299, 300.5294, 300.2701, + 300.1524, 299.4566, 299.0317), + tolerance = 0.0001 +) +expect_equal( + drop(exp)[,4], + c(300.1340, 300.1652, 300.3981, 300.5044, 300.5583, 300.5028, 300.4284, + 299.6214, 299.0601, 299.1104), + tolerance = 0.0001 +) +expect_equal( + drop(exp)[,5], + c(300.0874, 300.2558, 300.4289, 300.5256, 300.6006, 300.6316, 299.5125, + 298.8563, 299.5071, 300.0644), + tolerance = 0.0001 +) + +}) +#--------------------------------------------- + +# The result is consistent with cdo result, using all the grid points to transform +# and crop the region: + +# library(easyNCDF) +# path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +# file <- NcOpen(path) +# arr <- NcToArray(file, +# dim_indices = list(time = 1, ensemble = 1, +# latitude = 1:640, longitude = 1:1296), +# vars_to_read = 'tas') +# lats <- NcToArray(file, +# dim_indices = list(latitude = 1:640), vars_to_read = 'latitude') +# lons <- NcToArray(file, +# dim_indices = list(longitude = 1:1296), vars_to_read = 'longitude') +# NcClose(file) + +# arr3 <- s2dv::CDORemap(arr, lons = as.vector(lons), lats = as.vector(lats), +# grid = 'r360x180', method = 'bilinear', crop = c(0.5,359.9,0,10)) + +# drop(arr2$data_array)[,1:5] + +# [,1] [,2] [,3] [,4] [,5] +# [1,] 299.9356 300.0876 300.1547 300.1340 300.0874 +# [2,] 300.0557 300.1628 300.2518 300.1652 300.2558 +# [3,] 300.2042 300.2939 300.3484 300.3981 300.4289 +# [4,] 300.4571 300.5117 300.4062 300.5044 300.5256 +# [5,] 300.7495 300.6660 300.5299 300.5583 300.6006 +# [6,] 300.6388 300.5703 300.5294 300.5028 300.6316 +# [7,] 299.6622 300.0838 300.2701 300.4284 299.5125 +# [8,] 299.2109 300.3170 300.1524 299.6214 298.8563 +# [9,] 299.4723 299.9515 299.4566 299.0601 299.5071 +# [10,] 299.5299 299.7573 299.0317 299.1104 300.0644 +############################################## \ No newline at end of file