diff --git a/.Rbuildignore b/.Rbuildignore index 90018c782d3ac1075895bbfa778c1248ae5259dd..b320a05571bd3c450b5b1f2e522261ba923fc601 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,7 +6,7 @@ ^README\.md$ #\..*\.RData$ #^vignettes$ -#^tests$ +^tests$ ^inst/doc$ #^inst/doc/*$ #^inst/doc/figures/$ diff --git a/DESCRIPTION b/DESCRIPTION index f1a9778c8fcd0d67ab57e621d3ba8a52351ed19c..f761503614963190702c9aea93cd264bcfba3368 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,14 +1,14 @@ Package: startR Title: Automatically Retrieve Multidimensional Distributed Data Sets -Version: 2.1.0-5 +Version: 2.2.0 Authors@R: c( - person("BSC-CNS", role = c("aut", "cph")), person("Nicolau", "Manubens", , "nicolau.manubens@bsc.es", role = c("aut")), - person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("ctb", "cre")), - person("Nuria", "Perez-Zanon", , "nuria.perez@bsc.es", role = c("ctb"), comment = c(ORCID = "0000-0001-8568-3071")), + person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("aut", "cre")), + person("Nuria", "Perez-Zanon", , "nuria.perez@bsc.es", role = c("aut"), comment = c(ORCID = "0000-0001-8568-3071")), person("Javier", "Vegas", , "javier.vegas@bsc.es", role = c("ctb")), person("Pierre-Antoine", "Bretonniere", , "pierre-antoine.bretonniere@bsc.es", role = c("ctb")), - person("Roberto", "Serrano", , "rsnotivoli@gmal.com", role = c("ctb"))) + person("Roberto", "Serrano", , "rsnotivoli@gmal.com", role = c("ctb")), + person("BSC-CNS", role = c("aut", "cph"))) Description: Tool to automatically fetch, transform and arrange subsets of multi- dimensional data sets (collections of files) stored in local and/or remote file systems or servers, using multicore capabilities where possible. @@ -19,24 +19,23 @@ Description: Tool to automatically fetch, transform and arrange subsets of the tool suitable for any research field where large multidimensional data sets are involved. Depends: - R (>= 3.6.1) + R (>= 3.6.0) Imports: abind, bigmemory, future, - multiApply (>= 2.1.1), + multiApply (>= 2.1.0), parallel, easyNCDF, - s2dverification, + s2dv, ClimProjDiags, PCICt Suggests: stats, utils, testthat -License: LGPL-3 +License: Apache License 2.0 URL: https://earth.bsc.es/gitlab/es/startR/ BugReports: https://earth.bsc.es/gitlab/es/startR/-/issues -LazyData: true SystemRequirements: cdo ecFlow RoxygenNote: 7.0.1 diff --git a/NAMESPACE b/NAMESPACE index ccf783cbe12b94f8b703a403b9cf16f4be0ff870..1434a0f7a06ce718c3ea494c6e749829a173585c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,7 +24,8 @@ import(future) import(multiApply) import(parallel) importFrom(ClimProjDiags,Subset) -importFrom(s2dverification,CDORemap) +importFrom(s2dv,CDORemap) importFrom(stats,na.omit) importFrom(stats,setNames) +importFrom(utils,getFromNamespace) importFrom(utils,str) diff --git a/NEWS.md b/NEWS.md index 8c7a11636c700e54e7c663ffedbc9295be22601b..c542dd1cc14451ce643b361c58583b7744312829 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,21 @@ +# startR v2.2.0 (Release date: 2022-02-11) +- License changes to Apache License 2.0 +- R version dependency changes to >= 3.6.0 +- The dependency on s2dverification changes to s2dv +- The transform parameter "crop" for CDORemapper() is deprecated. It is assigned as a vector of four numbers of the range of latitude and longitude selectors automatically by Start(). +- Chunking the transformed dimensions is available. +- The transform and reorder function works with selector 'all' and indices() now. +- Initialize big.matrix in Start() as NA when parameter "ObjectBigmemory" is specified or when the job is submitted to remote machine by Compute(). +- Bugfix of naming the chunks submitted to remote machine. It prevents job crashes when chunk_1 and chunk_11 run at the same time, for example. +- Adjust time attribute to UTC instead of local time zone, and correct the time calculation according to calendar type and units. +- The default value of Start() parameter "merge_across_dims_narm" is changed to TRUE. +- The metadata of startR object is refined. Different datasets are recorded separately. +- Force return_vars to have value when inner dim has dependency on file dim. +- Correct the wrong names of return_vars. If the names of return_vars are synonyms, change them back to the inner dim names. +- When merging one inner dimension across files, Start() can notice the different inner dimension length of files and merge without extra NAs. Need to specify "largest_dims_length = TRUE". +- Bugfixes for several reshaping problems with different combinations of parameters "merge_across_dims", "merge_across_dims_narm", and "split_multiselected_dims". +- Modify the dimension consistency check to only check margin dimensions. The target dimensions can have different lengths. + # startR v2.1.0 (Release date: 2020-10-30) - Bugfix for metadata retrieving when there are more than one dataset and one of them is missing. - Bugfix for the Start() parameter 'metadata_dims' is set to non-dat dimension. diff --git a/R/CDORemapper.R b/R/CDORemapper.R index 8aed954be45da45d9824b60a03f4b3503265aa68..67c6b9e93b0b0e1f5c1b73efeeaefca0f21f2038 100644 --- a/R/CDORemapper.R +++ b/R/CDORemapper.R @@ -5,7 +5,7 @@ #''transform' in a Start() call. This function complies with the input/output #'interface required by Start() defined in the documentation for the parameter #''transform' of function Start().\cr\cr -#'This function uses the function CDORemap() in the package 's2dverification' to +#'This function uses the function CDORemap() in the package 's2dv' to #'perform the interpolation, hence CDO is required to be installed. #' #'@param data_array A data array to be transformed. See details in the @@ -16,6 +16,8 @@ #'@param file_selectors A charcter vector indicating the information of the path of #' the file parameter 'data_array' comes from. See details in the documentation of #' the parameter 'transform' of the function Start(). The default value is NULL. +#'@param crop_domain A list of the transformed domain of each transform +#' variable, automatically provided by Start(). #'@param \dots A list of additional parameters to adjust the transform process, #' as provided in the parameter 'transform_params' in a Start() call. See details #' in the documentation of the parameter 'transform' of the function Start(). @@ -24,7 +26,7 @@ #' potentially with different sizes, and potentially with the attribute #' 'variables' with additional auxiliary data. See details in the documentation #' of the parameter 'transform' of the function Start(). -#'@seealso \code{\link[s2dverification]{CDORemap}} +#'@seealso \code{\link[s2dv]{CDORemap}} #' #'@examples #'# Used in Start(): @@ -42,20 +44,21 @@ #' longitude_reorder = CircularSort(-180, 180), #' transform = CDORemapper, #' transform_params = list(grid = 'r360x181', -#' method = 'conservative', -#' crop = c(-120, 120, -60, 60)), +#' method = 'conservative'), #' transform_vars = c('latitude', 'longitude'), #' return_vars = list(latitude = 'dat', #' longitude = 'dat', #' time = 'sdate'), #' retrieve = FALSE) #' } -#'@importFrom s2dverification CDORemap +#'@importFrom s2dv CDORemap +#'@importFrom utils getFromNamespace #'@export -CDORemapper <- function(data_array, variables, file_selectors = NULL, ...) { +CDORemapper <- function(data_array, variables, file_selectors = NULL, + crop_domain = NULL, ...) { file_dims <- names(file_selectors) - known_lon_names <- startR:::.KnownLonNames() - known_lat_names <- startR:::.KnownLatNames() + known_lon_names <- getFromNamespace('.KnownLonNames', 'startR')() + known_lat_names <- getFromNamespace('.KnownLatNames', 'startR')() if (!any(known_lon_names %in% names(variables)) || !any(known_lat_names %in% names(variables))) { stop("The longitude and latitude variables must be requested in ", @@ -89,11 +92,31 @@ CDORemapper <- function(data_array, variables, file_selectors = NULL, ...) { } } extra_params <- list(...) - if (!all(c('grid', 'method', 'crop') %in% names(extra_params))) { - stop("Parameters 'grid', 'method' and 'crop' must be specified for the ", + + if (!all(c('grid', 'method') %in% names(extra_params))) { + stop("Parameters 'grid' and 'method' must be specified for the ", "CDORemapper, via the 'transform_params' argument.") } - result <- s2dverification::CDORemap(data_array, lons, lats, ...) + + # Use crop_domain to get 'crop' + if (!is.null(crop_domain)) { + ## lon + lon_name <- names(crop_domain)[which(names(crop_domain) %in% known_lon_names)] + crop_lon <- unlist(crop_domain[[lon_name]]) + ## lat + lat_name <- names(crop_domain)[which(names(crop_domain) %in% known_lat_names)] + crop_lat <- unlist(crop_domain[[lat_name]]) + crop_values <- c(crop_lon, crop_lat) + + if ('crop' %in% names(extra_params)) { + warning("Argument 'crop' in 'transform_params' for CDORemapper() is ", + "deprecated. It is automatically assigned as the selected domain ", + "in Start() call.") + } + extra_params[['crop']] <- crop_values + } + + result <- do.call(s2dv::CDORemap, c(list(data_array, lons, lats), extra_params)) return_variables <- list(result$lons, result$lats) names(return_variables) <- c(lon_name, lat_name) list(data_array = result$data_array, variables = return_variables) diff --git a/R/Start.R b/R/Start.R index b361eb1073c6131c7d9f81b4af34c5922b9aedd1..d11c669daeec82be745586fb321c789ec8e03376 100644 --- a/R/Start.R +++ b/R/Start.R @@ -967,10 +967,10 @@ Start <- function(..., # dim = indices/selectors, # the variable 'dat' is mounted with the information of each # dataset. # Take only the datasets for the requested chunk - dats_to_take <- chunk_indices(length(dim_params[[found_pattern_dim]]), - chunks[[found_pattern_dim]]['chunk'], - chunks[[found_pattern_dim]]['n_chunks'], - found_pattern_dim) + dats_to_take <- get_chunk_indices(length(dim_params[[found_pattern_dim]]), + chunks[[found_pattern_dim]]['chunk'], + chunks[[found_pattern_dim]]['n_chunks'], + found_pattern_dim) dim_params[[found_pattern_dim]] <- dim_params[[found_pattern_dim]][dats_to_take] dat <- dim_params[[found_pattern_dim]] #NOTE: This function creates the object 'dat_names' @@ -1396,7 +1396,7 @@ Start <- function(..., # dim = indices/selectors, # Take chunk if needed (only defined dim; undefined dims will be chunked later in # find_ufd_value(). if (chunks[[file_dim]]['n_chunks'] > 1) { - desired_chunk_indices <- chunk_indices( + desired_chunk_indices <- get_chunk_indices( length(dat_selectors[[file_dim]][[j]]), chunks[[file_dim]]['chunk'], chunks[[file_dim]]['n_chunks'], @@ -1803,6 +1803,7 @@ Start <- function(..., # dim = indices/selectors, picked_vars <- lapply(picked_vars, setNames, names(return_vars)) } picked_vars_ordered <- picked_vars + picked_vars_unorder_indices <- picked_vars for (i in 1:length(dat)) { @@ -1921,22 +1922,34 @@ Start <- function(..., # dim = indices/selectors, transformed_common_vars <- NULL transformed_common_vars_ordered <- NULL transformed_common_vars_unorder_indices <- NULL - + transform_crop_domain <- NULL + for (i in 1:length(dat)) { if (dataset_has_files[i]) { indices <- indices_of_first_files_with_data[[i]] if (!is.null(indices)) { #////////////////////////////////////////////////// # Find data_dims - ## old code. use the 1st valid file to determine the dims - if (!largest_dims_length | is.numeric(largest_dims_length)) { + ## If largest_dims_length is a number & !merge_across_dims, + ## directly assign this dim as the number; + ## If largest_dims_length is a number & this dim is across files, find the dim length of each file + find_largest_dims_length_by_files <- FALSE + if (is.numeric(largest_dims_length)) { + if (names(largest_dims_length) %in% inner_dims_across_files) { + find_largest_dims_length_by_files <- TRUE + } + } else if (largest_dims_length) { + find_largest_dims_length_by_files <- TRUE + } + + if (!find_largest_dims_length_by_files) { # old code file_path <- do.call("[", c(list(array_of_files_to_load), as.list(indices_of_first_files_with_data[[i]]))) # The following 5 lines should go several lines below, but were moved # here for better performance. # If any of the dimensions comes without defining variable, then we read # the data dimensions. data_dims <- NULL - if (length(unlist(var_params[expected_inner_dims[[i]]])) < length(expected_inner_dims[[i]])) { +# if (length(unlist(var_params[expected_inner_dims[[i]]])) < length(expected_inner_dims[[i]])) { file_to_open <- file_path data_dims <- file_dim_reader(file_to_open, NULL, selectors_of_first_files_with_data[[i]], lapply(dat[[i]][['selectors']][expected_inner_dims[[i]]], '[[', 1), @@ -1944,7 +1957,7 @@ Start <- function(..., # dim = indices/selectors, # file_dim_reader returns dimension names as found in the file. # Need to translate accoridng to synonims: names(data_dims) <- replace_with_synonmins(data_dims, synonims) - } +# } if (is.numeric(largest_dims_length)) { # largest_dims_length is a named vector # Check if the names fit the inner dimension names @@ -1957,12 +1970,16 @@ Start <- function(..., # dim = indices/selectors, } } - ## largest_dims_length = TRUE } else { - data_dims <- find_largest_dims_length( - selectors_total_list[[i]], array_of_files_to_load, - selector_indices_save[[i]], dat[[i]], expected_inner_dims[[i]], - synonims, file_dim_reader) + ## largest_dims_length = TRUE, or is a number & merge_across_dims is across this dim + tmp <- find_largest_dims_length( + selectors_total_list[[i]], array_of_files_to_load, + selector_indices_save[[i]], dat[[i]], expected_inner_dims[[i]], + synonims, file_dim_reader) + data_dims <- tmp$largest_data_dims + # 'data_dims_each_file' is used when merge_across_dims = TRUE & + # the files have different length of inner dim. + data_dims_each_file <- tmp$data_dims_all_files # file_dim_reader returns dimension names as found in the file. # Need to translate accoridng to synonims: @@ -1971,6 +1988,23 @@ Start <- function(..., # dim = indices/selectors, } # end if (largest_dims_length == TRUE) #////////////////////////////////////////////////// + # Some dimension is defined in Start() call but doesn't exist in data + if (!all(expected_inner_dims[[i]] %in% names(data_dims))) { + tmp <- expected_inner_dims[[i]][which(!expected_inner_dims[[i]] %in% names(data_dims))] + stop("Could not find the dimension '", tmp, "' in the file. Either ", + "change the dimension name in your request, adjust the ", + "parameter 'dim_names_in_files' or fix the dimension name in ", + "the file.\n", file_path) + } + # Not all the inner dims are defined in Start() call + if (!all(names(data_dims) %in% expected_inner_dims[[i]])) { + tmp <- names(data_dims)[which(!names(data_dims) %in% expected_inner_dims[[i]])] + if (data_dims[tmp] != 1) { + stop("The dimension '", tmp, "' is found in the file ", file_path, + " but not defined in the Start call.") + } + } + #/////////////////////////////////////////////////////////////////// # Transform the variables if needed and keep them apart. @@ -1986,10 +2020,68 @@ Start <- function(..., # dim = indices/selectors, # picked_common_vars vars_to_transform <- generate_vars_to_transform(vars_to_transform, picked_common_vars, transform_vars, picked_common_vars_ordered) + # Save the crop domain from selectors of transformed vars + # PROB: It doesn't consider aiat. If aiat, the indices are for + # after transformed data; we don't know the corresponding + # values yet. + transform_crop_domain <- vector('list') + for (transform_var in transform_vars) { + transform_crop_domain[[transform_var]] <- dat[[i]][['selectors']][[transform_var]][[1]] + # Turn indices into values + if (attr(transform_crop_domain[[transform_var]], 'indices')) { + if (transform_var %in% names(common_return_vars)) { + if (transform_var %in% names(dim_reorder_params)) { + transform_crop_domain[[transform_var]] <- + generate_transform_crop_domain_values( + transform_crop_domain[[transform_var]], + picked_vars = picked_common_vars_ordered[[transform_var]]) + } else { + transform_crop_domain[[transform_var]] <- + generate_transform_crop_domain_values( + transform_crop_domain[[transform_var]], + picked_vars = picked_common_vars[[transform_var]]) + } + } else { # return_vars + if (transform_var %in% names(dim_reorder_params)) { + transform_crop_domain[[transform_var]] <- + generate_transform_crop_domain_values( + transform_crop_domain[[transform_var]], + picked_vars = picked_vars_ordered[[i]][[transform_var]]) + } else { + transform_crop_domain[[transform_var]] <- + generate_transform_crop_domain_values( + transform_crop_domain[[transform_var]], + picked_vars = picked_vars[[i]][[transform_var]]) + } + } + } else if (is.atomic(transform_crop_domain[[transform_var]])) { + # if it is values but vector + transform_crop_domain[[transform_var]] <- + c(transform_crop_domain[[transform_var]][1], + tail(transform_crop_domain[[transform_var]], 1)) + } + + # For CDORemapper (not sure if it's also suitable for other transform functions): + # If lon_reorder is not used + lon selector is from big to small, + # lonmax and lonmin need to be exchanged. The ideal way is to + # exchange in CDORemapper(), but lon_reorder is used or not is not + # known by CDORemapper(). + # NOTE: lat's order doesn't matter, big to small and small to big + # both work. Since we shouldn't assume transform_var in Start(), + # e.g., transform_var can be anything transformable in the assigned transform function, + # we exchange whichever parameter here anyway. + if (!transform_var %in% names(dim_reorder_params) & + diff(unlist(transform_crop_domain[[transform_var]])) < 0) { + transform_crop_domain[[transform_var]] <- rev(transform_crop_domain[[transform_var]]) + } + + } + # Transform the variables transformed_data <- do.call(transform, c(list(data_array = NULL, variables = vars_to_transform, - file_selectors = selectors_of_first_files_with_data[[i]]), + file_selectors = selectors_of_first_files_with_data[[i]], + crop_domain = transform_crop_domain), transform_params)) # Discard the common transformed variables if already transformed before if (!is.null(transformed_common_vars)) { @@ -2077,16 +2169,10 @@ Start <- function(..., # dim = indices/selectors, # replaced for equivalent indices. if ((any(dat[[i]][['selectors']][[inner_dim]][[1]] %in% c('all', 'first', 'last'))) && (chunks[[inner_dim]]['n_chunks'] != 1)) { - selectors <- dat[[i]][['selectors']][[inner_dim]][[1]] - if (selectors == 'all') { - selectors <- indices(1:(data_dims[[inner_dim]] * chunk_amount)) - } else if (selectors == 'first') { - selectors <- indices(1) - } else { - selectors <- indices(data_dims[[inner_dim]] * chunk_amount) - } - dat[[i]][['selectors']][[inner_dim]][[1]] <- selectors + dat[[i]][['selectors']][[inner_dim]][[1]] <- + replace_character_with_indices(selectors = dat[[i]][['selectors']][[inner_dim]][[1]], data_dims = data_dims[[inner_dim]], chunk_amount) } + # The selectors for the inner dimension are taken. selector_array <- dat[[i]][['selectors']][[inner_dim]][[1]] if (debug) { @@ -2135,6 +2221,7 @@ Start <- function(..., # dim = indices/selectors, var_ordered <- NULL var_unorder_indices <- NULL with_transform <- FALSE + #//////////////////////////////////////////////////////////////////// # If the selectors come with an associated variable if (!is.null(var_with_selectors_name)) { if ((var_with_selectors_name %in% transform_vars) && (!is.null(transform))) { @@ -2224,6 +2311,7 @@ Start <- function(..., # dim = indices/selectors, var_unorder_indices <- 1:m } } + if (debug) { if (inner_dim %in% dims_to_check) { print("-> SIZE OF ORIGINAL VARIABLE:") @@ -2236,8 +2324,11 @@ Start <- function(..., # dim = indices/selectors, print(var_unorder_indices) } } - var_dims <- dim(var_with_selectors) + var_dims <- var_full_dims <- dim(var_with_selectors) var_file_dims <- 1 + + # If this inner dim's selector (var_with_selectors) is an array + # that has file dim as dimension (e.g., across or depend relationship) if (any(names(var_dims) %in% found_file_dims[[i]])) { if (with_transform) { stop("Requested transformation for inner dimension '", @@ -2284,13 +2375,6 @@ Start <- function(..., # dim = indices/selectors, ## TODO HERE:: #- indices_of_first_files_with_data may change, because array is now extended var_full_dims <- dim(var_with_selectors) - if (!(inner_dim %in% names(var_full_dims))) { - stop("Could not find the dimension '", inner_dim, "' in ", - "the file. Either change the dimension name in ", - "your request, adjust the parameter ", - "'dim_names_in_files' or fix the dimension name in ", - "the file.\n", file_path) - } } else if (((is.numeric(selector_array) || is.list(selector_array)) && selectors_are_indices) || (is.character(selector_array) && (length(selector_array) == 1) && (selector_array %in% c('all', 'first', 'last')) && @@ -2300,13 +2384,6 @@ Start <- function(..., # dim = indices/selectors, # Lines moved above for better performance. ##data_dims <- file_dim_reader(file_path, NULL, selectors_of_first_files_with_data[[i]], ## lapply(dat[[i]][['selectors']][expected_inner_dims[[i]]], '[[', 1)) - if (!(inner_dim %in% names(data_dims))) { - stop("Could not find the dimension '", inner_dim, "' in ", - "the file. Either change the dimension name in ", - "your request, adjust the parameter ", - "'dim_names_in_files' or fix the dimension name in ", - "the file.\n", file_path) - } } else { stop(paste0("Can not translate the provided selectors for '", inner_dim, "' to numeric indices. Provide numeric indices and a ", @@ -2317,6 +2394,42 @@ Start <- function(..., # dim = indices/selectors, # data_dims has been populated. If a selector variable was provided, # the variables var_dims, var_file_dims and var_full_dims have been # populated instead. + #//////////////////////////////////////////////////////////////////// + + # If the inner dim lengths differ among files, + # need to know each length to create the indices for each file later. + # Record 'inner_dim_lengths' here for later usage. + inner_dim_lengths <- NULL + if (largest_dims_length & !is.null(file_dim)) { + # inner_dim_lengths here includes all the files, but we only want + # the files of fyear for certain "sdate". We will categorize it later. + inner_dim_lengths <- tryCatch({ + sapply(data_dims_each_file, '[[', inner_dim) + }, error = function(x) { + sapply(data_dims_each_file, '[[', + synonims[[inner_dim]][which(synonims[[inner_dim]] != inner_dim)]) + }) + + # Use other file dims as the factors to categorize. + other_file_dims <- dim(array_of_files_to_load)[which(file_dims != file_dim)] + other_file_dims <- lapply(lapply(other_file_dims, seq, 1), rev) + other_file_dims_factor <- expand.grid(other_file_dims) + selector_indices_save_subset <- + lapply(selector_indices_save[[i]], '[', which(file_dims != file_dim)) + + # Put the fyear with the same other file dims (sdate, etc.) together, and find the largest length (in theory all of them should be the same) + inner_dim_lengths_cat <- vector('list', dim(other_file_dims_factor)[1]) + for (i_factor in 1:length(inner_dim_lengths_cat)) { + + inner_dim_lengths_cat[[i_factor]] <- + inner_dim_lengths[which(sapply(lapply( + selector_indices_save_subset, '==', + other_file_dims_factor[i_factor, ]), all))] + } + # Find the largest length of each time step + inner_dim_lengths <- do.call(pmax, inner_dim_lengths_cat) + } + fri <- first_round_indices <- NULL sri <- second_round_indices <- NULL # This variable will keep the indices needed to crop the transformed @@ -2334,7 +2447,13 @@ Start <- function(..., # dim = indices/selectors, sri <- vector('list', length = chunk_amount) dim(sri) <- c(chunk_amount) if (selector_array == 'all') { - fri[] <- replicate(chunk_amount, list(1:(data_dims[inner_dim]))) + if (is.null(inner_dim_lengths) | length(unique(inner_dim_lengths)) <= 1) { #old code + fri[] <- replicate(chunk_amount, list(1:(data_dims[inner_dim]))) + } else { # files have different inner dim length + for (i_chunk in 1:length(fri)) { + fri[[i_chunk]] <- 1:inner_dim_lengths[i_chunk] + } + } taken_chunks <- rep(TRUE, chunk_amount) #sri <- NULL } else if (selector_array == 'first') { @@ -2502,6 +2621,7 @@ Start <- function(..., # dim = indices/selectors, selector_store_position[names(selector_indices_to_take)] <- selector_indices_to_take sub_array_of_selectors <- Subset(selector_array, names(selector_indices_to_take), as.list(selector_indices_to_take), drop = 'selected') + if (debug) { if (inner_dim %in% dims_to_check) { print("-> ITERATING OVER FILE DIMENSIONS OF THE SELECTORS.") @@ -2517,7 +2637,7 @@ Start <- function(..., # dim = indices/selectors, #} else if (!is.null(var_ordered)) { # sub_array_of_values <- var_ordered } else { - if (length(var_file_dims) > 0) { + if (length(names(var_file_dims)) > 0) { var_indices_to_take <- selector_indices_to_take[which(names(selector_indices_to_take) %in% names(var_file_dims))] sub_array_of_values <- Subset(var_with_selectors, names(var_indices_to_take), as.list(var_indices_to_take), drop = 'selected') @@ -2534,7 +2654,9 @@ Start <- function(..., # dim = indices/selectors, print(file_dim) } } - #???????????????? + + # The inner dim selector is an array in which have file dim (e.g., time = [sdate = 2, time = 4], + # or the inner dim doesn't go across any file dim (e.g., no time_across = 'sdate') if ((!is.null(file_dim) && (file_dim %in% names(selector_file_dims))) || is.null(file_dim)) { if (length(sub_array_of_selectors) > 0) { if (debug) { @@ -2683,8 +2805,9 @@ Start <- function(..., # dim = indices/selectors, } } - # If chunking along this inner dim, this part creates the indices for each chunk. + #//////////////////////////////////////////////////////////// + # If chunking along this inner dim, this part creates the indices for each chunk. # For example, if 'sub_array_of_indices' is c(5:10) and chunked into 2, # 'sub_array_of_indices' becomes c(5:7) for chunk = 1, c(8:10) for chunk = 2. # If 'sub_array_of_indices' is list(55, 62) and chunked into 2, it becomes @@ -2703,21 +2826,23 @@ Start <- function(..., # dim = indices/selectors, } if (!is.list(sub_array_of_indices)) { sub_array_of_indices <- - sub_array_of_indices[chunk_indices(length(sub_array_of_indices), - chunks[[inner_dim]]["chunk"], - chunks[[inner_dim]]["n_chunks"], - inner_dim)] + sub_array_of_indices[get_chunk_indices(length(sub_array_of_indices), + chunks[[inner_dim]]["chunk"], + chunks[[inner_dim]]["n_chunks"], + inner_dim)] } else { tmp <- - chunk_indices(length(sub_array_of_indices[[1]]:sub_array_of_indices[[2]]), - chunks[[inner_dim]]["chunk"], chunks[[inner_dim]]["n_chunks"], - inner_dim) + get_chunk_indices(length(sub_array_of_indices[[1]]:sub_array_of_indices[[2]]), + chunks[[inner_dim]]["chunk"], chunks[[inner_dim]]["n_chunks"], + inner_dim) vect <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]] sub_array_of_indices[[1]] <- vect[tmp[1]] sub_array_of_indices[[2]] <- vect[tmp[length(tmp)]] } } # The sub_array_of_indices now contains numeric indices of the values to be taken by each chunk. + #//////////////////////////////////////////////////////////// + #---------------------------------------------------------- # 'sub_sub_array_of_values' is for sri chunking. If this inner dim is chunked, @@ -2766,7 +2891,6 @@ Start <- function(..., # dim = indices/selectors, sub_array_of_fri <- generate_sub_array_of_fri( with_transform, goes_across_prime_meridian, sub_array_of_indices, n, beta, is_circular_dim) - # May be useful for crop = T. 'subset_vars_to_transform' may not need # to include extra cells, but currently it shows mistake if not include. sub_array_of_fri_no_beta <- generate_sub_array_of_fri( @@ -2801,33 +2925,11 @@ Start <- function(..., # dim = indices/selectors, inner_dim, sub_array_of_fri) } } - - # Change the order of longitude crop if no reorder + from big to small. - # cdo -sellonlatbox, the lon is west, east (while lat can be north - # to south or opposite) - - # Before changing crop, first we need to find the name of longitude. - # NOTE: The potential bug here (also the bug for CDORemapper): the lon name - # is limited (only the ones listed in .KnownLonNames() are available. - known_lon_names <- startR:::.KnownLonNames() - lon_name <- names(subset_vars_to_transform)[which(names(subset_vars_to_transform) %in% known_lon_names)[1]] - - # NOTE: The cases not considered: (1) if lon reorder(decreasing = T) - # It doesn't make sense, but if someone uses it, here should - # occur error. (2) crop = TRUE/FALSE - if ('crop' %in% names(transform_params) & var_with_selectors_name == lon_name & is.null(dim_reorder_params[[inner_dim]])) { - if (is.numeric(class(transform_params$crop))) { - if (transform_params$crop[1] > transform_params$crop[2]) { - tmp <- transform_params$crop[1] - transform_params$crop[1] <- transform_params$crop[2] - transform_params$crop[2] <- tmp - } - } - } - + transformed_subset_var <- do.call(transform, c(list(data_array = NULL, variables = subset_vars_to_transform, - file_selectors = selectors_of_first_files_with_data[[i]]), + file_selectors = selectors_of_first_files_with_data[[i]], + crop_domain = transform_crop_domain), transform_params))$variables[[var_with_selectors_name]] # Sorting the transformed variable and working out the indices again after transform. if (!is.null(dim_reorder_params[[inner_dim]])) { @@ -2909,11 +3011,28 @@ Start <- function(..., # dim = indices/selectors, # will miss. 'previous_sri' is checked and will be included if this # situation happens, but don't know if the transformed result is # correct or not. + # NOTE: The chunking criteria may not be 100% correct. The current way + # is to pick the sri that larger than the minimal sub_sub_array_of_values + # and smaller than the maximal sub_sub_array_of_values; if it's + # the first chunk, make sure the 1st sri is included; if it's the + # last chunk, make sure the last sri is included. if (chunks[[inner_dim]]["n_chunks"] > 1) { + sub_array_of_sri_complete <- sub_array_of_sri if (is.list(sub_sub_array_of_values)) { # list sub_array_of_sri <- which(transformed_subset_var >= min(unlist(sub_sub_array_of_values)) & transformed_subset_var <= max(unlist(sub_sub_array_of_values))) + # if it's 1st chunk & the first sri is not included, include it. + if (chunks[[inner_dim]]["chunk"] == 1 & + !(sub_array_of_sri_complete[1] %in% sub_array_of_sri)) { + sub_array_of_sri <- c(sub_array_of_sri_complete[1], sub_array_of_sri) + } + # if it's last chunk & the last sri is not included, include it. + if (chunks[[inner_dim]]["chunk"] == chunks[[inner_dim]]["n_chunks"] & + !(tail(sub_array_of_sri_complete, 1) %in% sub_array_of_sri)) { + sub_array_of_sri <- c(sub_array_of_sri, tail(sub_array_of_sri_complete, 1)) + } + # Check if sub_array_of_sri perfectly connects to the previous sri. # If not, inlclude the previous sri. #NOTE 1: don't know if the transform for the previous sri is @@ -2922,7 +3041,13 @@ Start <- function(..., # dim = indices/selectors, # Don't know if the cropping will miss some sri or not. if (sub_array_of_sri[1] != 1) { if (!is.null(previous_sub_sub_array_of_values)) { - previous_sri <- max(which(transformed_subset_var <= previous_sub_sub_array_of_values)) + # if decreasing = F + if (transformed_subset_var[1] < transformed_subset_var[2]) { + previous_sri <- max(which(transformed_subset_var <= previous_sub_sub_array_of_values)) + } else { + # if decreasing = T + previous_sri <- max(which(transformed_subset_var >= previous_sub_sub_array_of_values)) + } if (previous_sri + 1 != sub_array_of_sri[1]) { sub_array_of_sri <- (previous_sri + 1):sub_array_of_sri[length(sub_array_of_sri)] } @@ -2932,6 +3057,10 @@ Start <- function(..., # dim = indices/selectors, } else { # is vector tmp <- which(transformed_subset_var >= min(sub_sub_array_of_values) & transformed_subset_var <= max(sub_sub_array_of_values)) + # Ensure tmp and sub_array_of_sri are both ascending or descending + if (is.unsorted(tmp) != is.unsorted(sub_array_of_sri)) { + tmp <- rev(tmp) + } # Include first or last sri if tmp doesn't have. It's only for # ""vectors"" because vectors look for the closest value. #NOTE: The condition here is not correct. The criteria should be @@ -2952,14 +3081,21 @@ Start <- function(..., # dim = indices/selectors, # Don't know if the cropping will miss some sri or not. if (sub_array_of_sri[1] != 1) { if (!is.null(previous_sub_sub_array_of_values)) { - previous_sri <- max(which(transformed_subset_var <= previous_sub_sub_array_of_values)) - if (previous_sri + 1 != sub_array_of_sri[1]) { + # if decreasing = F + if (transformed_subset_var[1] < transformed_subset_var[2]) { + previous_sri <- max(which(transformed_subset_var <= previous_sub_sub_array_of_values)) + } else { + # if decreasing = T + previous_sri <- max(which(transformed_subset_var >= previous_sub_sub_array_of_values)) + } + if (previous_sri + 1 != which(sub_array_of_sri[1] == sub_array_of_sri_complete)) { sub_array_of_sri <- (previous_sri + 1):sub_array_of_sri[length(sub_array_of_sri)] } } } } } + ordered_sri <- sub_array_of_sri sub_array_of_sri <- transformed_subset_var_unorder[sub_array_of_sri] @@ -3043,19 +3179,33 @@ Start <- function(..., # dim = indices/selectors, taken_chunks <- TRUE } } - } else { #???????????? + } else { + # The inner dim goes across a file dim (e.g., time_across = 'sdate') if (debug) { if (inner_dim %in% dims_to_check) { print("-> THE INNER DIMENSION GOES ACROSS A FILE DIMENSION.") } } + # If "_across = + merge_across_dims = FALSE + chunk over ", return error because this instance is not logically correct. + if (chunks[[inner_dim]]["n_chunks"] > 1 & inner_dim %in% inner_dims_across_files) { + stop("Chunk over dimension '", inner_dim, "' is not allowed because '", + inner_dim, "' is across '", + names(inner_dims_across_files)[which(inner_dim %in% inner_dims_across_files)], "'.") + } + if (inner_dim %in% names(dim(sub_array_of_selectors))) { if (is.null(var_with_selectors_name)) { + if (!largest_dims_length | (largest_dims_length & length(unique(inner_dim_lengths)) <= 1)) { #old code + maximal_indice <- data_dims[inner_dim] * chunk_amount + } else { # files have different length of inner dim + maximal_indice <- sum(inner_dim_lengths) + } + if (any(na.omit(unlist(sub_array_of_selectors)) < 1) || - any(na.omit(unlist(sub_array_of_selectors)) > data_dims[inner_dim] * chunk_amount)) { - stop("Provided indices out of range for dimension '", inner_dim, "' ", - "for dataset '", dat[[i]][['name']], "' (accepted range: 1 to ", - data_dims[inner_dim] * chunk_amount, ").") + any(na.omit(unlist(sub_array_of_selectors)) > maximal_indice)) { + stop("Provided indices out of range for dimension '", inner_dim, "' ", + "for dataset '", dat[[i]][['name']], "' (accepted range: 1 to ", + maximal_indice, ").") } } else { if (inner_dim %in% names(dim(sub_array_of_values))) { @@ -3085,7 +3235,7 @@ Start <- function(..., # dim = indices/selectors, if (is.list(sub_array_of_indices)) { sub_array_of_indices <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]] } - sub_array_of_indices <- sub_array_of_indices[chunk_indices(length(sub_array_of_indices), + sub_array_of_indices <- sub_array_of_indices[get_chunk_indices(length(sub_array_of_indices), chunks[[inner_dim]]['chunk'], chunks[[inner_dim]]['n_chunks'], inner_dim)] @@ -3094,13 +3244,33 @@ Start <- function(..., # dim = indices/selectors, sub_array_is_list <- TRUE sub_array_of_indices <- unlist(sub_array_of_indices) } - if (is.null(var_with_selectors_name)) { - indices_chunk <- floor((sub_array_of_indices - 1) / data_dims[inner_dim]) + 1 - transformed_indices <- ((sub_array_of_indices - 1) %% data_dims[inner_dim]) + 1 - } else { - indices_chunk <- floor((sub_array_of_indices - 1) / var_full_dims[inner_dim]) + 1 - transformed_indices <- ((sub_array_of_indices - 1) %% var_full_dims[inner_dim]) + 1 + + # "indices_chunk" refers to in which file the + # sub_array_of_indices is; "transformed_indices" + # refers to the indices of sub_array_of_indices in each file. + if (!largest_dims_length | + (largest_dims_length & length(unique(inner_dim_lengths)) <= 1)) { + # old code; all the files have the same length of inner_dim + if (is.null(var_with_selectors_name)) { + indices_chunk <- floor((sub_array_of_indices - 1) / data_dims[inner_dim]) + 1 + transformed_indices <- ((sub_array_of_indices - 1) %% data_dims[inner_dim]) + 1 + } else { + indices_chunk <- floor((sub_array_of_indices - 1) / var_full_dims[inner_dim]) + 1 + transformed_indices <- ((sub_array_of_indices - 1) %% var_full_dims[inner_dim]) + 1 + } + } else { # files have different inner dim length + indices_chunk <- c() + for (item in 1:length(inner_dim_lengths)) { + tmp <- which(sub_array_of_indices <= cumsum(inner_dim_lengths)[item]) + indices_chunk <- c(indices_chunk, rep(item, length(tmp) - length(indices_chunk))) + } + sub_array_of_indices_by_file <- split(sub_array_of_indices, indices_chunk) + for (item in 2:length(sub_array_of_indices_by_file)) { + sub_array_of_indices_by_file[[item]] <- sub_array_of_indices_by_file[[item]] - cumsum(inner_dim_lengths)[item - 1] + } + transformed_indices <- unlist(sub_array_of_indices_by_file, use.names = FALSE) } + if (sub_array_is_list) { sub_array_of_indices <- as.list(sub_array_of_indices) } @@ -3109,14 +3279,15 @@ Start <- function(..., # dim = indices/selectors, print("-> GOING TO ITERATE ALONG CHUNKS.") } } + for (chunk in 1:chunk_amount) { if (!is.null(names(selector_store_position))) { selector_store_position[file_dim] <- chunk } else { selector_store_position <- chunk } - chunk_selectors <- transformed_indices[which(indices_chunk == chunk)] - sub_array_of_indices <- chunk_selectors + sub_array_of_indices <- transformed_indices[which(indices_chunk == chunk)] + sub_array_of_indices <- transformed_indices[which(indices_chunk == chunk)] if (with_transform) { # If the provided selectors are expressed in the world # before transformation @@ -3158,6 +3329,7 @@ Start <- function(..., # dim = indices/selectors, taken_chunks[chunk] <- TRUE } } + if (!is.null(var_unorder_indices)) { ordered_fri <- sub_array_of_fri sub_array_of_fri <- var_unorder_indices[sub_array_of_fri] @@ -3262,7 +3434,7 @@ Start <- function(..., # dim = indices/selectors, if (type_of_var_to_crop == 'transformed') { if (!aiat) { if (!(length(selector_array) == 1 & - selector_array %in% c('all', 'first', 'last'))) { + all(selector_array %in% c('all', 'first', 'last')))) { vars_to_crop[[var_to_crop]] <- Subset(transformed_subset_var, inner_dim, crop_indices) } else { vars_to_crop[[var_to_crop]] <- @@ -3283,7 +3455,7 @@ Start <- function(..., # dim = indices/selectors, if (type_of_var_to_crop == 'transformed' & !aiat) { if (!(length(selector_array) == 1 & - selector_array %in% c('all', 'first', 'last'))) { + all(selector_array %in% c('all', 'first', 'last')))) { common_vars_to_crop[[common_var_to_crop]] <- Subset(transformed_subset_var, inner_dim, crop_indices) } else { @@ -3508,7 +3680,8 @@ Start <- function(..., # dim = indices/selectors, data_array <- bigmemory::big.matrix(nrow = prod(final_dims), ncol = 1) } else { data_array <- bigmemory::big.matrix(nrow = prod(final_dims), ncol = 1, - backingfile = ObjectBigmemory) + backingfile = ObjectBigmemory, + init = NA) } shared_matrix_pointer <- bigmemory::describe(data_array) if (is.null(ObjectBigmemory)) { @@ -3583,6 +3756,7 @@ Start <- function(..., # dim = indices/selectors, synonims = synonims, transform = transform, transform_params = transform_params, + transform_crop_domain = transform_crop_domain, silent = silent, debug = debug) } else { cluster <- parallel::makeCluster(num_procs, outfile = "") @@ -3594,6 +3768,7 @@ Start <- function(..., # dim = indices/selectors, synonims = synonims, transform = transform, transform_params = transform_params, + transform_crop_domain = transform_crop_domain, silent = silent, debug = debug) }) parallel::stopCluster(cluster) @@ -3794,7 +3969,7 @@ Start <- function(..., # dim = indices/selectors, # piece. .LoadDataFile <- function(work_piece, shared_matrix_pointer, file_data_reader, synonims, - transform, transform_params, + transform, transform_params, transform_crop_domain = NULL, silent = FALSE, debug = FALSE) { #warning(attr(shared_matrix_pointer, 'description')$sharedName) # suppressPackageStartupMessages({library(bigmemory)}) @@ -3833,7 +4008,8 @@ Start <- function(..., # dim = indices/selectors, } sub_array <- do.call(transform, c(list(data_array = sub_array, variables = work_piece[['vars_to_transform']], - file_selectors = work_piece[['file_selectors']]), + file_selectors = work_piece[['file_selectors']], + crop_domain = transform_crop_domain), transform_params)) if (debug) { if (all(unlist(store_indices[1:6]) == 1)) { @@ -3848,8 +4024,8 @@ Start <- function(..., # dim = indices/selectors, dims_to_crop <- which(!sapply(second_round_indices, is.null)) if (length(dims_to_crop) > 0) { dimnames_to_crop <- names(second_round_indices)[dims_to_crop] - sub_array <- Subset(sub_array, dimnames_to_crop, - second_round_indices[dimnames_to_crop]) + sub_array <- ClimProjDiags::Subset(sub_array, dimnames_to_crop, + second_round_indices[dimnames_to_crop]) } if (debug) { if (all(unlist(store_indices[1:6]) == 1)) { diff --git a/R/Utils.R b/R/Utils.R index 3a6f6ea59feca0038389a4627aa9850bba170e4e..d0e850e7f8c01180ef043ca5d4b5886ea3abca61 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -34,8 +34,8 @@ } old_indices <- attr(selectors, 'indices') old_values <- attr(selectors, 'values') - selectors <- ClimProjDiags::Subset(selectors, 1:length(chunk), - lapply(1:length(chunk), + selectors <- ClimProjDiags::Subset(selectors, names(chunk), + lapply(names(chunk), function(x) { n_indices <- dim(selectors)[x] chunk_sizes <- rep(floor(n_indices / n_chunks[x]), n_chunks[x]) diff --git a/R/zzz.R b/R/zzz.R index 3eeed1a5c667e6deb88d861f5e100fd6593fa4e4..0130724599137903e4c47d9fea52504dcd36d2fa 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -163,7 +163,7 @@ look_for_chunks <- function(dim_params, dim_names) { # This is a helper function to compute the chunk indices to take once the total # number of indices for a dimension has been discovered. - chunk_indices <- function(n_indices, chunk, n_chunks, dim_name) { + get_chunk_indices <- function(n_indices, chunk, n_chunks, dim_name) { if (n_chunks > n_indices) { stop("Requested to divide dimension '", dim_name, "' of length ", n_indices, " in ", n_chunks, " chunks, which is not possible.") @@ -408,7 +408,7 @@ find_ufd_value <- function(undefined_file_dims, dat, i, replace_values, var = unique(parsed_values), return_indices = FALSE) # Take chunk if needed - dat_selectors[[u_file_dim]][[j]] <- dat_selectors[[u_file_dim]][[j]][chunk_indices(length(dat_selectors[[u_file_dim]][[j]]), + dat_selectors[[u_file_dim]][[j]] <- dat_selectors[[u_file_dim]][[j]][get_chunk_indices(length(dat_selectors[[u_file_dim]][[j]]), chunks[[u_file_dim]]['chunk'], chunks[[u_file_dim]]['n_chunks'], u_file_dim)] @@ -538,7 +538,8 @@ find_largest_dims_length <- function(selectors_total_list, array_of_files_to_loa largest_data_dims[kk] <- max(sapply(data_dims_all_files, '[', kk)) } names(largest_data_dims) <- names(data_dims_all_files[[1]]) - return(largest_data_dims) + return(list(largest_data_dims = largest_data_dims, + data_dims_all_files = data_dims_all_files)) } # Gererate vars_to_transform from picked_vars[[i]] and picked_common_vars @@ -560,6 +561,22 @@ generate_vars_to_transform <- function(vars_to_transform, picked_vars, transform return(vars_to_transform) } +# Turn indices to values for transform_crop_domain +generate_transform_crop_domain_values <- function(transform_crop_domain, picked_vars) { + if (transform_crop_domain == 'all') { + transform_crop_domain <- c(picked_vars[1], tail(picked_vars, 1)) + } else { # indices() + if (is.list(transform_crop_domain)) { + transform_crop_domain <- picked_vars[unlist(transform_crop_domain)] + } else { # vector + transform_crop_domain <- + c(picked_vars[transform_crop_domain[1]], + picked_vars[tail(transform_crop_domain, 1)]) + } + } + return(transform_crop_domain) +} + # Out-of-range warning show_out_of_range_warning <- function(inner_dim, range, bound) { # bound: 'lower' or 'upper' @@ -582,12 +599,24 @@ generate_sub_sub_array_of_values <- function(input_array_of_values, sub_array_of sub_sub_array_of_values <- list(input_array_of_values[sub_array_of_indices[[1]]], input_array_of_values[sub_array_of_indices[[2]]]) if (number_of_chunk > 1) { - previous_sub_sub_array_of_values <- input_array_of_values[sub_array_of_indices[[1]] - 1] + if (diff(unlist(sub_array_of_indices)) > 0) { + previous_sub_sub_array_of_values <- + input_array_of_values[sub_array_of_indices[[1]] - 1] + } else { + previous_sub_sub_array_of_values <- + input_array_of_values[sub_array_of_indices[[1]] + 1] + } } } else { # is vector sub_sub_array_of_values <- input_array_of_values[sub_array_of_indices] if (number_of_chunk > 1) { - previous_sub_sub_array_of_values <- input_array_of_values[sub_array_of_indices[1] - 1] + if (diff(sub_array_of_indices[1:2]) > 0) { + previous_sub_sub_array_of_values <- + input_array_of_values[sub_array_of_indices[1] - 1] + } else { + previous_sub_sub_array_of_values <- + input_array_of_values[sub_array_of_indices[1] + 1] + } } } @@ -1064,7 +1093,7 @@ rebuild_array_merge_split <- function(data_array_tmp, indices_chunk, all_split_d if (!all(diff(as.numeric(names(final_order_list))) > 0)) { # shape the vector into the array without split_dims - split_dims_pos <- match(all_split_dims[[1]], final_dims_fake) + split_dims_pos <- match(names(all_split_dims[[1]]), names(final_dims_fake)) new_dims <- c() if (split_dims_pos[1] > 1) { new_dims <- c(new_dims, final_dims_fake[1:(split_dims_pos[1] - 1)]) @@ -1080,18 +1109,22 @@ rebuild_array_merge_split <- function(data_array_tmp, indices_chunk, all_split_d tmp <- cumsum(unlist(length_inner_across_dim)) tmp <- c(0, tmp) for (i in 1:length(length_inner_across_dim)) { - data_array_seperate[[i]] <- Subset(data_array_no_split, across_inner_dim, - (tmp[i] + 1):tmp[i + 1]) + data_array_seperate[[i]] <- ClimProjDiags::Subset(data_array_no_split, + across_inner_dim, + (tmp[i] + 1):tmp[i + 1]) } # re-build the array: chunk which_chunk <- as.numeric(names(final_order_list)) + sort_which_chunk <- sort(unique(which_chunk)) + which_chunk <- sapply(lapply(which_chunk, '==', sort_which_chunk), which) + how_many_indices <- unlist(final_order_list) array_piece <- list() ind_in_array_seperate <- as.list(rep(1, length(data_array_seperate))) for (i in 1:length(final_order_list)) { - array_piece[[i]] <- Subset(data_array_seperate[[which_chunk[i]]], - across_inner_dim, - ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1)) + array_piece[[i]] <- ClimProjDiags::Subset( + data_array_seperate[[which_chunk[i]]], across_inner_dim, + ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1)) ind_in_array_seperate[[which_chunk[i]]] <- ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] } @@ -1357,3 +1390,13 @@ transform_indices <- function(v, n, m) { unique2(round(((v - 1) / (n - 1)) * (m - 1))) + 1 # this rounding may generate 0s. what then? } +replace_character_with_indices <- function(selectors, data_dims, chunk_amount) { + if (selectors == 'all') { + selectors <- indices(1:(data_dims * chunk_amount)) + } else if (selectors == 'first') { + selectors <- indices(1) + } else if (selectors == 'last') { + selectors <- indices(data_dims * chunk_amount) + } + return(selectors) +} diff --git a/inst/chunking/load_process_save_chunk.R b/inst/chunking/load_process_save_chunk.R index 55e47f07716f476dcaf86b0b036099a9d9238906..b7b73a9ff027937015d3e8b4400c1080c990c43a 100644 --- a/inst/chunking/load_process_save_chunk.R +++ b/inst/chunking/load_process_save_chunk.R @@ -73,8 +73,19 @@ for (input in 1:length(data)) { # Creates a name for the temporal file using the chunks numbers: nameMemoryObject <- gsub("[^0-9.-]", "_", gsub(out_dir, "", task_path)) nameMemoryObject <- substr(nameMemoryObject, 2, nchar(nameMemoryObject)) - removeRS <- function(str) paste(rle(strsplit(str, "")[[1]])$values, collapse = "") - nameMemoryObject <- removeRS(nameMemoryObject) + removeRS <- function(str) { + vec <- strsplit(str, "")[[1]] + res <- vec[1] + for (i in 2:length(vec)) { + if (!is.na(as.numeric(vec[i], supressWarnings = TRUE))) { + res <- c(res, vec[i]) + } else if (res[length(res)] != vec[i]) { + res <- c(res, vec[i]) + } + } + return(paste(res, collapse = "")) + } + nameMemoryObject <- suppressWarnings(removeRS(nameMemoryObject)) start_call[['ObjectBigmemory']] <- nameMemoryObject data[[input]] <- tryCatch(eval(start_call), # Handler when an error occurs: diff --git a/inst/doc/data_check.md b/inst/doc/data_check.md index 156a2531ce091bac515310c240e297c282b7619f..abe07c64580b14422eac040339af823d3923a4d8 100644 --- a/inst/doc/data_check.md +++ b/inst/doc/data_check.md @@ -221,7 +221,6 @@ obs2[2, ] It is a wrapper package of ncdf4. The idea is similar to ncdf4 but with some more-friendly feature. Check [easyNCDF documentation](https://cran.r-project.org/web/packages/easyNCDF/index.html) to learn how to use it. - ```r # startR # Use the same one as (2). @@ -231,9 +230,15 @@ library(easyNCDF) file <- NcOpen('/esarchive/recon/ecmwf/era5/monthly_mean/sfcWind_f1h/sfcWind_201811.nc') obs2 <- NcToArray(file, vars_to_read = 'sfcWind', - dim_indices = list(longitude = c(1:10), - latitude = c(1:10), + dim_indices = list(lon = c(1:10), + lat = c(1:10), time = c(1))) +lats2 <- NcToArray(file, + dim_indices = list(lat = 1:10), + vars_to_read = 'lat') +lons2 <- NcToArray(file, + dim_indices = list(lon = 1:10), + vars_to_read = 'lon') NcClose(file) # Check @@ -245,6 +250,12 @@ dim(obs2) # var longitude latitude time # 1 10 10 1 +## (b) attributes +all.equal(as.vector(attr(obs1, 'Variables')$dat1$longitude), as.vector(lons2)) +#[1] TRUE +all.equal(as.vector(attr(obs1, 'Variables')$dat1$latitude), as.vector(lats2)) +#[1] TRUE + ## (c) summary summary(obs1) # Min. 1st Qu. Median Mean 3rd Qu. Max. @@ -252,6 +263,8 @@ summary(obs1) summary(as.vector(obs2)) # Min. 1st Qu. Median Mean 3rd Qu. Max. # 4.934 4.954 5.025 5.035 5.105 5.165 +all.equal(as.vector(obs1), as.vector(aperm(obs2, c(1, 3, 2, 4)))) +#[1] TRUE ## (d) individual values obs1[1, 1, 1, 1, , 2] diff --git a/inst/doc/deployment.md b/inst/doc/deployment.md index 47325db52180433bd49da4920be06a53780cb43f..1e098bba417d816d6cb234e18cdcd532ff74754c 100644 --- a/inst/doc/deployment.md +++ b/inst/doc/deployment.md @@ -17,7 +17,7 @@ devtools::install_git('https://earth.bsc.es/gitlab/es/startR') ``` - Among others, the bigmemory package will be installed. - If loading and processing NetCDF files (only file format supported by now), install the easyNCDF package. - - If planning to interpolate the data with CDO (either by using the `transform` parameter in `startR::Start`, or by using `s2dverification::CDORemap` in the workflow specified to `startR::Compute`), install s2dverification (>= 2.8.4) and CDO (version 1.6.3 tested). CDO is not available for Windows. + - If planning to interpolate the data with CDO (either by using the `transform` parameter in `startR::Start`, or by using `s2dv::CDORemap` in the workflow specified to `startR::Compute`), install s2dv and CDO (version 1.6.3 tested). CDO is not available for Windows. A local or remote file system or THREDDS/OPeNDAP server providing the data to be retrieved must be accessible. @@ -50,7 +50,7 @@ All machines must be UNIX-based, with the "hostname", "date", "touch" and "sed" - netCDF-4 is installed, if loading and processing NetCDF files (only supported format by now) - R (>= 2.14.1) is installed as a Linux Environment Module - the startR package is installed - - if using CDO interpolation, the s2dverification package and CDO 1.6.3 are installed + - if using CDO interpolation, the s2dv package and CDO 1.6.3 are installed - any other R packages required by the `startR::Compute` workflow are installed - any other Environment Modules used by the `startR::Compute` workflow are installed - a shared file system (with a unified access point) or THREDDS/OPeNDAP server is accessible across HPC nodes and HPC login node, where the necessary data can be uploaded from your workstation. A file system shared between your workstation and the HPC is also supported and advantageous. Use of a data transfer service between the workstation and the HPC is also supported under specific configurations. diff --git a/inst/doc/faq.md b/inst/doc/faq.md index fbf2efbc098210a04abd1d5a35cd207638e8521c..689e13a2f468cc1ccf27ffb146af46047794a2c4 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -37,7 +37,9 @@ This document intends to be the first reference for any doubts that you may have 4. [My jobs work well in workstation and fatnodes but not on Power9 (or vice versa)](#4-my-jobs-work-well-in-workstation-and-fatnodes-but-not-on-power9-or-vice-versa) 5. [Errors related to wrong file formatting](#5-errors-related-to-wrong-file-formatting) 6. [Errors using a new cluster (setting Nord3)](#6-errors-using-a-new-cluster-setting-nord3) - 7. [Start() fails retrieving data](#7-start-fails-retrieving-data) + 7. [Start() fails retrieving data](#7-start-fails-retrieving-data) + 8. [Error 'caught segfault' when job submitted to HPCs](#8-error-caught-segfault-when-job-submitted-to-hpcs) + ## 1. How to @@ -275,7 +277,7 @@ data <- Start(..., retrieve = FALSE) func <- function(x) { - y <- s2dverification::Season(x, posdim = 'time') #specify package name + y <- s2dv::Season(x, posdim = 'time') #specify package name return(y) } @@ -293,7 +295,7 @@ wf <- AddStep(data, step) cluster = list(queue_host = 'p1', #your alias for power9 queue_type = 'slurm', temp_dir = '/gpfs/scratch/bsc32/bsc32734/startR_hpc/', - lib_dir = '/gpfs/projects/bsc32/share/R_libs/3.5/', #s2dverification is involved here, so the machine can find Season() + lib_dir = '/gpfs/projects/bsc32/share/R_libs/3.5/', #s2dv is involved here, so the machine can find Season() r_module = 'startR/0.1.2-foss-2018b-R-3.5.0', job_wallclock = '00:10:00', cores_per_job = 4, @@ -311,7 +313,7 @@ wf <- AddStep(data, step) If you want to do the interpolation within Start(), you can use the following four parameters: -1. **`transform`**: Assign the interpolation function. It is recommended to use `startR::CDORemapper`, the wrapper function of s2dverification::CDORemap(). +1. **`transform`**: Assign the interpolation function. It is recommended to use `startR::CDORemapper`, the wrapper function of s2dv::CDORemap(). 2. **`transform_params`**: A list of the required inputs for `transform`. Take `transform = CDORemapper` as an example, the common items are: - `grid`: A character string specifying either a name of a target grid (recognized by CDO, e.g., 'r256x128', 't106grid') or a path to another NetCDF file with the target grid (a single grid must be defined in such file). - `method`: A character string specifying an interpolation method (recognized by CDO, e.g., 'con', 'bil', 'bic', 'dis'). The following long names are also supported: 'conservative', 'bilinear', 'bicubic', and 'distance-weighted'. @@ -321,10 +323,10 @@ If you want to do the interpolation within Start(), you can use the following fo The parameter ’crop’ also accepts a numeric vector of custom borders: c(western border, eastern border, southern border, northern border). 3. **`transform_vars`**: A character vector of the inner dimensions to be transformed. E.g., c('latitude', 'longitude'). -4. **`transform_extra_cells`**: A numeric indicating the number of grid cell to extend from the borders if the interpolating region is a subset of the whole region. 2 as default, which is consistent with the method in s2dverification::Load(). +4. **`transform_extra_cells`**: A numeric indicating the number of grid cell to extend from the borders if the interpolating region is a subset of the whole region. 2 as default, which is consistent with the method in s2dv::Load(). You can find an example script here [ex1_1_tranform.R](/inst/doc/usecase/ex1_1_tranform.R) -You can see more information in s2dverification::CDORemap documentation [here](https://earth.bsc.es/gitlab/es/s2dverification/blob/master/man/CDORemap.Rd). +You can see more information in s2dv::CDORemap documentation [here](https://earth.bsc.es/gitlab/es/s2dv/blob/master/man/CDORemap.Rd). ### 6. Get data attributes without retrieving data to workstation @@ -459,7 +461,7 @@ data <- Start(dat = repos, ### 9. Use CDORemap() in function -If you want to interpolate data by s2dverification::CDORemap in function, you need to tell the +If you want to interpolate data by s2dv::CDORemap in function, you need to tell the machine which CDO module to use. Therefore, `CDO_module = 'CDO/1.9.5-foss-2018b'` should be added in Compute() cluster list. See the example in usecase [ex2_3_cdo.R](inst/doc/usecase/ex2_3_cdo.R). @@ -473,6 +475,8 @@ When trying to load both start dates at once using Start(), the order in which t - `sdates = c('19991101', '19990901')`, the member dimension will be of length 51, showing missing values for the members 26 to 51 in the second start date; - `sdates = c('19990901', '19991101')`, the member dimension will be of length 25, any member will be missing. +To ensure that all the members are retrieved, we can use parameter 'largest_dims_length'. See [FAQ 21](https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/faq.md#21-retrieve-the-complete-data-when-the-dimension-length-varies-among-files) for details. + The code to reproduce this behaviour could be found in the Use Cases section, [example 1.4](/inst/doc/usecase/ex1_4_variable_nmember.R). ### 11. Select the longitude/latitude region @@ -857,6 +861,8 @@ by a named integer vector, for example, `largest_dims_length = c(member = 51)`. adopt the provided ones and use the first valid file to decide the rest of dimensions. By this means, the efficiency can be similar to `largest_dims_length = FALSE`. +Find example in [use case ex1_4](/inst/doc/usecase/ex1_4_variable_nmember.R). + ### 22. Define the selector when the indices in the files are not aligned When the data structure between the requested files is not identical, we need to give different @@ -1025,7 +1031,7 @@ some problem. ### 5. Errors related to wrong file formatting -Several errors could be return when the files are not correctly formatted. If you see one of this errors, review the coordinates in your files: +Several errors could be returned when the files are not correctly formatted. If you see one of this errors, review the coordinates in your files: ``` Error in Rsx_nc4_put_vara_double: NetCDF: Numeric conversion not representable @@ -1039,7 +1045,7 @@ Error in dim(x$x) <- dim_bk : ``` ``` -Error in s2dverification::CDORemap(data_array, lons, lats, ...) : +Error in s2dv::CDORemap(data_array, lons, lats, ...) : Found invalid values in 'lons'. ``` @@ -1047,7 +1053,7 @@ Error in s2dverification::CDORemap(data_array, lons, lats, ...) : ERROR: invalid cell Aborting in file clipping.c, line 1295 ... -Error in s2dverification::CDORemap(data_array, lons, lats, ...) : +Error in s2dv::CDORemap(data_array, lons, lats, ...) : CDO remap failed. ``` @@ -1106,3 +1112,24 @@ Error in file_var_reader(NULL, file_object, NULL, var_to_read, synonims) : ``` check if your path contains the label $var$ in the path. If not, try to added it as part of the path or the file name. Where $var$ is the variable to retrieve from files. + + +### 8. Error 'caught segfault' when job submitted to HPCs + +This error message implies that the memory space is not enough for +computation. +Check if the partition `/dev/shm/` is empty. If the trash files occupy this partition, +the process you are running may fail. Remove the files and re-run your code. + +If the error persists, check your code with a smaller data sample to discard a problem with your code since this error message indicates that you are requesting more memory than the available. + + +### 9. White lines on the figure of interpolated irregular data + +To process irregular grid data, we can load the data by Start() and interpolate it to a regular grid by other tools (e.g., using s2dv::CDORemap). +In some instances, when we plot the interpolated data, we see some white lines on the map (see figure below). +To solve this problem, we can try to exclude the first and last indices of the latitude and longitude in Start() call. +Check [use case ex1_15](inst/doc/usecase/ex1_15_irregular_grid_CDORemap.R) for the example script (Case 2). The solution of each case may differ, so if you find this solution does not work for your case, please open an issue. + + + diff --git a/inst/doc/figures/faq_2_9_white_stripes.png b/inst/doc/figures/faq_2_9_white_stripes.png new file mode 100644 index 0000000000000000000000000000000000000000..ceccedbf1cefc0f8729425bae8c0b70a55cc4748 Binary files /dev/null and b/inst/doc/figures/faq_2_9_white_stripes.png differ diff --git a/inst/doc/practical_guide.md b/inst/doc/practical_guide.md index d476652420df126e1893af3191ea63de2137bab8..378f6486bd7a5cb60fadb35918a15bd056f55379 100644 --- a/inst/doc/practical_guide.md +++ b/inst/doc/practical_guide.md @@ -297,7 +297,7 @@ If you are interested in actually loading the entire data set in your machine yo - evaluating the object returned by `Start()`: `data_load <- eval(data)` See the section on "How to choose the number of chunks, jobs and cores" for indications on working out the maximum amount of data that can be retrieved with a `Start()` call on a specific machine. -You may realize that this functionality is similar to the `Load()` function in the s2dverification package. In fact, `Start()` is more advanced and flexible, although `Load()` is more mature and consistent for loading typical seasonal to decadal forecasting data. `Load()` will be adapted in the future to use `Start()` internally. +You may realize that this functionality is similar to the `Load()` function in the s2dv package. In fact, `Start()` is more advanced and flexible, although `Load()` is more mature and consistent for loading typical seasonal to decadal forecasting data. `Load()` will be adapted in the future to use `Start()` internally. There are no constrains for the number or names of the outer or inner dimensions used in a `Start()` call. In other words, `Start()` will handle NetCDF files with any number of dimensions with any name, as well as files distributed across folders in complex ways, since you can use customized wildcards in the path pattern. diff --git a/inst/doc/tutorial/PATC2021/Figures/.gitkeep b/inst/doc/tutorial/PATC2021/Figures/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/inst/doc/tutorial/PATC2021/Figures/handson_1_fig_crps.png b/inst/doc/tutorial/PATC2021/Figures/handson_1_fig_crps.png new file mode 100644 index 0000000000000000000000000000000000000000..73c480a848755ce57ac162fbc7d71e51e89c8329 Binary files /dev/null and b/inst/doc/tutorial/PATC2021/Figures/handson_1_fig_crps.png differ diff --git a/inst/doc/tutorial/PATC2021/Figures/handson_1_fig_rmsss.png b/inst/doc/tutorial/PATC2021/Figures/handson_1_fig_rmsss.png new file mode 100644 index 0000000000000000000000000000000000000000..7086350961e47067a018bdef9ad4fd4abdca5703 Binary files /dev/null and b/inst/doc/tutorial/PATC2021/Figures/handson_1_fig_rmsss.png differ diff --git a/inst/doc/tutorial/PATC2021/PATC2021_handson.pdf b/inst/doc/tutorial/PATC2021/PATC2021_handson.pdf new file mode 100644 index 0000000000000000000000000000000000000000..5630f10ab6daa4a3ecbc02a02d62482b579854c5 Binary files /dev/null and b/inst/doc/tutorial/PATC2021/PATC2021_handson.pdf differ diff --git a/inst/doc/tutorial/PATC2021/hands-on_1_workflow.md b/inst/doc/tutorial/PATC2021/hands-on_1_workflow.md new file mode 100644 index 0000000000000000000000000000000000000000..4f91a51f3a4060bf5b676c5e1ee76fd6301fd9d5 --- /dev/null +++ b/inst/doc/tutorial/PATC2021/hands-on_1_workflow.md @@ -0,0 +1,160 @@ +# Hands-on 1: Seasonal forecast verification + +## Goal +Learn how to use the startR workflow to finish a piece of analysis, including defining and pre-processing the desired data, +defining the operation, building the workflow, and executing the operation. + +In this use case, the ensemble-adjusted Continuous Ranked Probability Score (CRPS) and the root mean square error skill score (RMSSS) are computed to verify the forecast. +To make the process faster, the required data size is small here so we can run the execution on workstation. + +## 1. Define data +The first step of the analysis is use Start() to identify the data needed. + +Hints: +(1) The data paths use two '$' as the wildcard, and it can be given any names you like. +(2) The variable we want to use is 'tas'. +(3) The required time (sdate) period is November 1981 to 2016. +(4) Read global data (i.e., full spatial grids.) +(5) Take only the first time step. +(6) To make the data smaller, we only take the first two ensemble members from the experimental data. +(7) Because we are going to use the whole startR workflow, we only need to create a pointer to the data repository rather than retrieve it to the workstation. + +```r +library(startR) + +# exp data + # Use this one if on workstation or nord3 + repos_exp <- paste0('/esarchive/exp/ecmwf/system4_m1/monthly_mean/', + '$var$_f6h/$var$_$sdate$.nc') + # Use this one if on Marenostrum4 and log in with PATC2021 account + repos_exp <- paste0('/gpfs/scratch/nct00/nct00016/d3_02_R-hands_on/tmp-esarchive/', + 'exp/ecmwf/system4_m1/monthly_mean/', + '$var$_f6h/$var$_$sdate$.nc') + + sdates_exp <- sapply(1981:2016, function(x) paste0(x, '1101')) + + exp <- Start(dat = repos_exp, + var = ______, + sdate = sdates_exp, + time = ______, + ensemble = ______, + latitude = ______, + longitude = ______, + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + return_vars = list(latitude = NULL, longitude = NULL, time = NULL), + retrieve = ______) + +# obs data + repos_obs <- paste0('/gpfs/scratch/nct00/nct00016/d3_02_R-hands_on/tmp-esarchive/', + 'recon/ecmwf/erainterim/monthly_mean/', + '$var$_f6h/$var$_$sdate$.nc') + sdates_obs <- sapply(1981:2016, function(x) paste0(x, '11')) + + obs <- Start(dat = repos_obs, + var = ______, + sdate = sdates_obs, + time = ______, + latitude = ______, + longitude = ______, + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + return_vars = list(latitude = NULL, longitude = NULL, time = NULL), + retrieve = ______) +``` + +Question: + +1. What are the dimensions of these two data? + +2. What is the size of these two data? + + +## 2. Define operation and workflow +It is recommended to define the function and write Step() together, because the latter one helps you clarify the input and output dimensions of the function. + +In the self-defined function, we want to use the two functions, 'SpecsVerification::EnsCrps' and 's2dv:::.RMSSS', to calculate the skill score. +You can check the first function by typing `?SpecsVerification::EnsCrps`, and the second function on [s2dv GitLab](https://earth.bsc.es/gitlab/es/s2dv/-/blob/master/R/RMSSS.R). + +Hint: +(1) The self-defined function is for exp and obs data. Therefore, target_dims and output_dims in Step() should be a list with two vector elements. +(2) Check the functions 'EnsCrps' and '.RMSSS'. What are the minimum required dimensions of inputs of each function? These dimensions should be the 'target_dims' in Step(). +(3) To return the two skill scores ('crps' and 'rmsss') together, put them in a list with two elements. +(4) What are the dimensions of outputs? These dimensions should be the 'output_dims' in Step(). +(5) The first input of AddStep() should also be a list containing exp and obs. + +```r + # self-defined function + func <- function(exp, obs) { + # CRPS + crps <- mean(SpecsVerification::EnsCrps(exp, obs, R.new = Inf), na.rm = TRUE) + + # RMSSS + # obs only has [sdate] now. Add one more dim for it. + obs <- s2dv::InsertDim(obs, posdim = 2, lendim = 1, name = 'ensemble') + rmsss <- mean(s2dv:::.RMSSS(exp, obs, time_dim = 'sdate', dat_dim = 'ensemble', pval = FALSE)$rmsss) + + return(______) + } + step <- Step(func, target_dims = ______, + output_dims = ______) + wf <- AddStep(______, step) +``` + +Question: +1. Which dimensions are used in operation? Which dimensions are free? + +2. Can we use more dimensions as target_dims? What are the disadvantages? + + +## 3. Execute locally +To avoid potential technical problems in the connection and configuration, +we choose to run the execution locally. +Noted that it is recommended to submit jobs to HPCs if the data size is big or +the operation is heavy. + +Hint: +(1) Use the free dimensions (i.e., those are not 'target_dims' in Step()) to chunk. +(2) It is safe to divide the data into pieces of which the size each is 1/2-1/3 of RAM. In this case, the data size is only around 100Mb, so you can play with it without +worrying about crashing the workstation. +(3) You will see some error messages like "Error in R_nc4_open: No such file or directory", +which are ignorable. + +```r + res <- Compute(wf$crps, + chunks = list(______)) +``` + +## 4. Check the results + +1. Check the list structure using function `str()`. What are the items in the list? + +2. What do the dimensions of each item look like? + +3. Plot the map +Use `s2dv::PlotEquiMap` or other visualization tools to plot the map. +```r + # Get latitude and longitude from the attributes + lat <- as.vector(attr(exp, 'Variables')$common$latitude) + lon <- as.vector(attr(exp, 'Variables')$common$longitude) + + # Set the color bar + brks_crps <- seq(min(res$crps, na.rm = TRUE), max(res$crps, na.rm = TRUE), length.out = 11) + brks_rmsss <- seq(min(res$rmsss, na.rm = TRUE), max(res$rmsss, na.rm = TRUE), length.out = 11) + + library(s2dv) + # Plot crps + PlotEquiMap(______, lon, lat, + color_fun = clim.palette('yellowred'), brks = brks_crps, + filled.continents = FALSE, triangle_ends = c(TRUE, TRUE), + toptitle = 'ECMWF monthly mean tas CRPS 2012-2016', title_scale = 0.6, + fileout = '~/handson_1_fig_crps.png') + + # Plot rmsss + PlotEquiMap(______, lon, lat, + color_fun = clim.palette('yellowred'), brks = brks_rmsss, + filled.continents = FALSE, triangle_ends = c(TRUE, TRUE), + toptitle = 'ECMWF monthly mean tas RMSSS 2012-2016', title_scale = 0.6, + fileout = '~/handson_1_fig_rmsss.png') + +``` diff --git a/inst/doc/tutorial/PATC2021/hands-on_1_workflow_ans.md b/inst/doc/tutorial/PATC2021/hands-on_1_workflow_ans.md new file mode 100644 index 0000000000000000000000000000000000000000..5ecc054d115afd533c6b58d669895ffee041dc03 --- /dev/null +++ b/inst/doc/tutorial/PATC2021/hands-on_1_workflow_ans.md @@ -0,0 +1,193 @@ +# Hands-on 1: Seasonal forecast verification + +## Goal +Learn how to use the startR workflow to finish a piece of analysis, including defining and pre-processing the desired data, +defining the operation, building the workflow, and executing the operation. + +In this use case, the ensemble-adjusted Continuous Ranked Probability Score (CRPS) and the root mean square error skill score (RMSSS) are computed to verify the forecast. +To make the process faster, the required data size is small here so we can run the execution on workstation. + +## 1. Define data +The first step of the analysis is use Start() to identify the data needed. + +Hints: +(1) The data paths use two '$' as the wildcard, and it can be given any names you like. +(2) The variable we want to use is 'tas'. +(3) The required time (sdate) period is November 1981 to 2016. +(4) Read global data (i.e., full spatial grids.) +(5) Take only the first time step. +(6) To make the data smaller, we only take the first two ensemble members from the experimental data. +(7) Because we are going to use the whole startR workflow, we only need to create a pointer to the data repository rather than retrieve it to the workstation. + +```r +library(startR) + +# exp data + # Use this one if on workstation or nord3 + repos_exp <- paste0('/esarchive/exp/ecmwf/system4_m1/monthly_mean/', + '$var$_f6h/$var$_$sdate$.nc') + # Use this one if on Marenostrum4 and log in with PATC2021 account + repos_exp <- paste0('/gpfs/scratch/nct00/nct00016/d3_02_R-hands_on/tmp-esarchive/', + 'exp/ecmwf/system4_m1/monthly_mean/', + '$var$_f6h/$var$_$sdate$.nc') + + sdates_exp <- sapply(1981:2016, function(x) paste0(x, '1101')) + + exp <- Start(dat = repos_exp, + var = 'tas', + sdate = sdates_exp, + time = indices(1), + ensemble = indices(1:2), + latitude = 'all', + longitude = 'all', + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + return_vars = list(latitude = NULL, longitude = NULL, time = 'sdate'), + retrieve = FALSE) + +# obs data + repos_obs <- paste0('/gpfs/scratch/nct00/nct00016/d3_02_R-hands_on/tmp-esarchive/', + 'recon/ecmwf/erainterim/monthly_mean/', + '$var$_f6h/$var$_$sdate$.nc') + sdates_obs <- sapply(1981:2016, function(x) paste0(x, '11')) + + obs <- Start(dat = repos_obs, + var = 'tas', + sdate = sdates_obs, + time = indices(1), + latitude = 'all', + longitude = 'all', + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), + return_vars = list(latitude = NULL, longitude = NULL, time = 'sdate'), + retrieve = FALSE) +``` + +Question: + +1. What are the dimensions of these two data? +```r +attr(exp, 'Dimensions') + dat var sdate time ensemble latitude longitude + 1 1 36 1 2 256 512 + +attr(obs, 'Dimensions') + dat var sdate time latitude longitude + 1 1 36 1 256 512 +``` + +2. What is the size of these two data? +exp: 72Mb; obs: 36Mb + + +## 2. Define operation and workflow +It is recommended to define the function and write Step() together, because the latter one helps you clarify the input and output dimensions of the function. + +In the self-defined function, we want to use the two functions, 'SpecsVerification::EnsCrps' and 's2dv:::.RMSSS', to calculate the skill score. +You can check the first function by typing `?SpecsVerification::EnsCrps`, and the second function on [s2dv GitLab](https://earth.bsc.es/gitlab/es/s2dv/-/blob/master/R/RMSSS.R). + +Hint: +(1) The self-defined function is for exp and obs data. Therefore, target_dims and output_dims in Step() should be a list with two vector elements. +(2) Check the functions 'EnsCrps' and '.RMSSS'. What are the minimum required dimensions of inputs of each function? These dimensions should be the 'target_dims' in Step(). +(3) To return the two skill scores ('crps' and 'rmsss') together, put them in a list with two elements. +(4) What are the dimensions of outputs? These dimension should be the 'output_dims' in Step(). +(5) The first input of AddStep() should also be a list containing exp and obs. + +```r + # self-defined function + func <- function(exp, obs) { + # CRPS + crps <- mean(SpecsVerification::EnsCrps(exp, obs, R.new = Inf), na.rm = TRUE) + + # RMSSS + # obs only has [sdate] now. Add one more dim for it. + obs <- s2dv::InsertDim(obs, posdim = 2, lendim = 1, name = 'ensemble') + rmsss <- mean(s2dv:::.RMSSS(exp, obs, time_dim = 'sdate', dat_dim = 'ensemble', pval = FALSE)$rmsss) + + return(list(crps = crps, rmsss = rmsss)) + } + step <- Step(func, target_dims = list(c('sdate', 'ensemble'), c('sdate')), + output_dims = list(crps = NULL, rmsss = NULL)) + wf <- AddStep(list(exp, obs), step) +``` + +Question: +1. Which dimensions are used in operation? Which dimensions are free? +'sdate' and 'ensemble' are used in operation. The other dimensions (dat, var, time, latitude, and longitude) are not necessary for the function. These free dimensions will be subsetted by multiApply during the execution. + +2. Can we use more dimensions as target_dims? What are the disadvantages? +It still works if putting more dimensions as target_dims, but we will lose the choices for chunking in the next step. Also, the lighter the function is, the quicker the operation runs. + + + +## 3. Execute locally +To avoid potential technical problems in the connection and configuration, +we choose to run the execution locally. +Noted that it is recommended to submit jobs to HPCs if the data size is big or +the operation is heavy. + +Hint: +(1) Use the free dimensions (i.e., those are not 'target_dims' in Step()) to chunk. +(2) It is safe to divide the data into pieces of which the size each is 1/2-1/3 of RAM. In this case, the data size is only around 100Mb, so you can play with it without +worrying about crashing the workstation. +(3) You will see some error messages like "Error in R_nc4_open: No such file or directory", +which are ignorable. + +```r + res <- Compute(wf$crps, + chunks = list(latitude = 2, longitude = 2)) +``` + +## 4. Check the results + +1. Check the list structure using function `str()`. What are the items in the list? +```r +str(res) +List of 2 + $ crps : num [1, 1, 1, 1:256, 1:512] 0.879 0.97 1.029 1.04 0.991 ... + $ rmsss: num [1, 1, 1, 1:256, 1:512] 0.991 0.991 0.99 0.99 0.99 ... + ... +``` + +2. What does the dimension look like? +```r +dim(res$crps) + dat var time latitude longitude + 1 1 1 256 512 +dim(res$rmsss) + dat var time latitude longitude + 1 1 1 256 512 +``` + +3. Plot the map +Use `s2dv::PlotEquiMap` or other visualization tools to plot the map. +```r + # Get latitude and longitude from the attributes + lat <- as.vector(attr(exp, 'Variables')$common$latitude) + lon <- as.vector(attr(exp, 'Variables')$common$longitude) + + # Set the color bar + brks_crps <- seq(0, max(res$crps, na.rm = TRUE), length.out = 21) + brks_rmsss <- seq(min(res$rmsss, na.rm = TRUE), max(res$rmsss, na.rm = TRUE), length.out = 21) + + library(s2dv) + # Plot crps + PlotEquiMap(res$crps[1, 1, 1, , ], lon, lat, + cols = heat.colors(length(brks_crps) - 1)[(length(brks_crps) - 1):1], + brks = brks_crps, + filled.continents = FALSE, triangle_ends = c(TRUE, TRUE), + toptitle = 'ECMWF monthly mean tas CRPS 2012-2016', title_scale = 0.6, + fileout = '~/handson_1_fig_crps.png') + + # Plot rmsss + PlotEquiMap(res$rmsss[1, 1, 1, , ], lon, lat, + color_fun = clim.palette('yellowred'), brks = brks_rmsss, + filled.continents = FALSE, triangle_ends = c(TRUE, TRUE), + toptitle = 'ECMWF monthly mean tas RMSSS 2012-2016', title_scale = 0.6, + fileout = '~/handson_1_fig_rmsss.png') +``` + +![CRPS map](./Figures/handson_1_fig_crps.png) + +![RMSSS map](./Figures/handson_1_fig_rmsss.png) + diff --git a/inst/doc/tutorial/PATC2021/hands-on_2_interpolation.md b/inst/doc/tutorial/PATC2021/hands-on_2_interpolation.md new file mode 100644 index 0000000000000000000000000000000000000000..02ac7264363bb92085f679575e047a3ab4e0605f --- /dev/null +++ b/inst/doc/tutorial/PATC2021/hands-on_2_interpolation.md @@ -0,0 +1,90 @@ +# Hands-on 2: Spatial grid interpolation + +## Goal +To learn how to use Start() to load the data and do the interpolation. +In this use case, we only use Start() and load the data in the local memory. +The default transformation function is startR::CDORemapper, a wrapper function of s2dv::CDORemap that uses cdo inside. + +In detail, we are going to: +(1) Learn different ways to assign longitude and latitude in Start(). +(2) Use parameter '_reorder' to specify the sorted order. +(3) Use transform-related parameters. + +## 1. Define longitude and latitude +```r + # Use this one if on workstation or nord3 + repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + # Use this one if on Marenostrum4 and log in with PATC2021 account + repos <- paste0('/gpfs/scratch/nct00/nct00016/d3_02_R-hands_on/tmp-esarchive/', + 'exp/ecmwf/system5_m1/monthly_mean/', + '$var$_f6h/$var$_$sdate$.nc') + var <- 'tas' + sdate <- c('20170101') + lon.min <- 0 + lon.max <- 359.9 + lat.min <- -90 + lat.max <- 90 + + data <- Start(dat = repos, + var = var, + sdate = sdate, + ensemble = indices(1), + time = 'all', + latitude = values(list(lat.min, lat.max)), + latitude_reorder = Sort(), + longitude = values(list(lon.min, lon.max)), + longitude_reorder = CircularSort(0, 360), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = 'dat', + latitude = 'dat'), + retrieve = TRUE + ) +``` + +1. Run the above script. Check the dimensions, the warning messages, and the values of +longitude and latitude. What is the range of longitude and latitude? + +2. Why 'lon.max' is 359.9 but not 360? What will happen if it is 360? + +3. Now, change + - `latitude_reorder = Sort()` to `latitude_reorder = Sort(decreasing = TRUE)` + - `longitude_reorder = CircularSort(0, 360)` to `longitude_reorder = CircularSort(-180, 180)` + - Set `lon.min <- -180` and `lon.max <- 179.9` + +Check the values of longitude and latitude again. Is it different from the original script? + + +## 2. Interpolation +Now, let us add in the transformation parameters. +```r + data <- Start(dat = repos, + var = var, + sdate = sdate, + ensemble = indices(1), + time = 'all', + latitude = values(list(lat.min, lat.max)), + latitude_reorder = Sort(), + longitude = values(list(lon.min, lon.max)), + longitude_reorder = CircularSort(0, 360), + ## transformation + transform = CDORemapper, + transform_extra_cells = 2, + transform_params = list(grid = 'r360x181', + method = 'conservative', + crop = c(lon.min, lon.max, + lat.min, lat.max)), + transform_vars = c('latitude', 'longitude'), + apply_indices_after_transform = FALSE, + ## + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = NULL, + latitude = NULL), + retrieve = TRUE + ) +``` +4. Run the above script. Check the dimensions and the values of longitude and latitude. + diff --git a/inst/doc/tutorial/PATC2021/hands-on_2_interpolation_ans.md b/inst/doc/tutorial/PATC2021/hands-on_2_interpolation_ans.md new file mode 100644 index 0000000000000000000000000000000000000000..90bac148db8d1c81a4425e5f6cc74beaac907b9d --- /dev/null +++ b/inst/doc/tutorial/PATC2021/hands-on_2_interpolation_ans.md @@ -0,0 +1,137 @@ +# Hands-on 2: Spatial grid interpolation + +## Goal +To learn how to use Start() to load the data and do the interpolation. +In this use case, we only use Start() and load the data in the local memory. +The default transformation function is startR::CDORemapper, a wrapper function of s2dv::CDORemap that uses cdo inside. + +In detail, we are going to: +(1) Learn different ways to assign longitude and latitude in Start(). +(2) Use parameter '_reorder' to specify the sorted order. +(3) Use transform-related parameters. + +## 1. Define longitude and latitude +```r + # Use this one if on workstation or nord3 + repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + # Use this one if on Marenostrum4 and log in with PATC2021 account + repos <- paste0('/gpfs/scratch/nct00/nct00016/d3_02_R-hands_on/tmp-esarchive/', + 'exp/ecmwf/system5_m1/monthly_mean/', + '$var$_f6h/$var$_$sdate$.nc') + var <- 'tas' + sdate <- c('20170101') + lon.min <- 0 + lon.max <- 359.9 + lat.min <- -90 + lat.max <- 90 + + data <- Start(dat = repos, + var = var, + sdate = sdate, + ensemble = indices(1), + time = 'all', + latitude = values(list(lat.min, lat.max)), + latitude_reorder = Sort(), + longitude = values(list(lon.min, lon.max)), + longitude_reorder = CircularSort(0, 360), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = 'dat', + latitude = 'dat'), + retrieve = TRUE + ) +``` + +1. Run the above script. Check the dimensions, the warning messages, and the values of +longitude and latitude. What is the range of longitude and latitude? +```r +dim(data) + dat var sdate ensemble time latitude longitude + 1 1 1 1 7 640 1296 + +longitude <- attr(data, 'Variables')$dat1$longitude +range(longitude) +[1] 0.0000 359.7222 + +latitude <- attr(data, 'Variables')$dat1$latitude +latitude[1] +[1] -89.78488 +latitude[640] +[1] 89.78488 +``` + +2. Why 'lon.max' is 359.9 but not 360? What will happen if it is 360? +If it is 360, you only get one point of longitude. Because of `longitude_reorder = CircularSort(0, 360)`, Start() regards 0 and 360 as the same point. Therefore, we need to set a number slightly smaller than 360 but bigger than the maximum value in the original data, so we can get the whole range. + +3. Now, change + - `latitude_reorder = Sort()` to `latitude_reorder = Sort(decreasing = TRUE)` + - `longitude_reorder = CircularSort(0, 360)` to `longitude_reorder = CircularSort(-180, 180)` + - Set `lon.min <- -180` and `lon.max <- 179.9` + +Check the values of longitude and latitude again. Is it different from the original script? +```r +dim(data) + dat var sdate ensemble time latitude longitude + 1 1 1 1 7 640 1296 + +longitude <- attr(data, 'Variables')$dat1$longitude +range(longitude) +[1] -180.0000 179.7222 + +latitude <- attr(data, 'Variables')$dat1$latitude +latitude[1] +[1] 89.78488 +latitude[640] +[1] -89.78488 + +``` +The dimensions are the same. The longitude range changes to [-180, 180], and the latitude sorts from 90 to -90. + + +## 2. Interpolation +Now, let us add in the transformation parameters. + +```r + data <- Start(dat = repos, + var = var, + sdate = sdate, + ensemble = indices(1), + time = 'all', + latitude = values(list(lat.min, lat.max)), + latitude_reorder = Sort(), + longitude = values(list(lon.min, lon.max)), + longitude_reorder = CircularSort(0, 360), + ## transformation + transform = CDORemapper, + transform_extra_cells = 2, + transform_params = list(grid = 'r360x181', + method = 'conservative', + crop = c(lon.min, lon.max, + lat.min, lat.max)), + transform_vars = c('latitude', 'longitude'), + ## + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = NULL, + latitude = NULL), + retrieve = TRUE + ) +``` + +4. Run the above script. Check the dimensions and the values of longitude and latitude. +```r +dim(data) + dat var sdate ensemble time latitude longitude + 1 1 1 1 7 181 360 + +longitude <- attr(data, 'Variables')$common$longitude +range(longitude) +[1] 0 359 + +latitude <- attr(data, 'Variables')$common$latitude +range(latitude) +[1] -90 90 +``` + diff --git a/inst/doc/tutorial/PATC2021/nord3_demo.R b/inst/doc/tutorial/PATC2021/nord3_demo.R new file mode 100644 index 0000000000000000000000000000000000000000..af203627babe3129ff0437248c494106b4b0ab56 --- /dev/null +++ b/inst/doc/tutorial/PATC2021/nord3_demo.R @@ -0,0 +1,77 @@ +library(startR) + + repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + var <- 'tas' + sdate <- c('20170101', '20170201') + lon.min <- 0 + lon.max <- 359.9 + lat.min <- -90 + lat.max <- 90 + + data <- Start(dat = repos, + var = var, + sdate = sdate, + ensemble = indices(1:50), + time = 'all', + latitude = values(list(lat.min, lat.max)), + latitude_reorder = Sort(), + longitude = values(list(lon.min, lon.max)), + longitude_reorder = CircularSort(0, 360), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = NULL, latitude = NULL), + retrieve = FALSE + ) + + func <- function(x, ens_dim, conf, pval) { + # ensemble mean + ens_mean <- apply(x, which(names(dim(x)) != ens_dim), mean) + # temporal trend + trend <- s2dv:::.Trend(ens_mean, conf = conf, pval = pval)$trend + + return(trend) + } + step <- Step(func, target_dims = c('ensemble', 'time'), + output_dims = list(trend = 'stats'), + use_libraries = c('s2dv')) + wf <- AddStep(data, step, ens_dim = 1, conf = FALSE, pval = FALSE) + +#-------------------user-defined--------------------- + queue_host <- 'nord1' + temp_dir <- '/gpfs/scratch/bsc32/bsc32734/startR_hpc/' + ecflow_suite_dir <- '/home/Earth/aho/startR_local/' +#---------------------------------------------------- + + res <- Compute(wf, + chunks = list(latitude = 2, + longitude = 3), + threads_load = 2, + threads_compute = 4, + cluster = list(queue_host = queue_host, + queue_type = 'lsf', + temp_dir = temp_dir, + cores_per_job = 2, + job_wallclock = '05:00', + max_jobs = 4, + extra_queue_params = list('#BSUB -q bsc_es'), + bidirectional = FALSE, + polling_period = 10 + ), + ecflow_suite_dir = ecflow_suite_dir, + wait = FALSE + ) #$output1 + +# Save the header of res +saveRDS(res, file = '~/nord3_demo_header.Rds') + +# You can leave now. When you come back next time, just read the .Rds file +res_collect <- readRDS('~/nord3_demo_header.Rds') +result <- Collect(res_collect, wait = TRUE) +# Save the result if needed +saveRDS(result, file = '~/nord3_demo_result.Rds') + +# Check the result +str(result) +dim(res$trend) + diff --git a/inst/doc/tutorial/crps.png b/inst/doc/tutorial/internal_training_20200902/crps.png similarity index 100% rename from inst/doc/tutorial/crps.png rename to inst/doc/tutorial/internal_training_20200902/crps.png diff --git a/inst/doc/tutorial/hands-on_part1.md b/inst/doc/tutorial/internal_training_20200902/hands-on_part1.md similarity index 100% rename from inst/doc/tutorial/hands-on_part1.md rename to inst/doc/tutorial/internal_training_20200902/hands-on_part1.md diff --git a/inst/doc/tutorial/hands-on_part1_ans.md b/inst/doc/tutorial/internal_training_20200902/hands-on_part1_ans.md similarity index 100% rename from inst/doc/tutorial/hands-on_part1_ans.md rename to inst/doc/tutorial/internal_training_20200902/hands-on_part1_ans.md diff --git a/inst/doc/tutorial/hands-on_part2.md b/inst/doc/tutorial/internal_training_20200902/hands-on_part2.md similarity index 100% rename from inst/doc/tutorial/hands-on_part2.md rename to inst/doc/tutorial/internal_training_20200902/hands-on_part2.md diff --git a/inst/doc/tutorial/hands-on_part2_ans.md b/inst/doc/tutorial/internal_training_20200902/hands-on_part2_ans.md similarity index 100% rename from inst/doc/tutorial/hands-on_part2_ans.md rename to inst/doc/tutorial/internal_training_20200902/hands-on_part2_ans.md diff --git a/inst/doc/tutorial/nord3_demo.R b/inst/doc/tutorial/internal_training_20200902/nord3_demo.R similarity index 100% rename from inst/doc/tutorial/nord3_demo.R rename to inst/doc/tutorial/internal_training_20200902/nord3_demo.R diff --git a/inst/doc/tutorial/rmsss.png b/inst/doc/tutorial/internal_training_20200902/rmsss.png similarity index 100% rename from inst/doc/tutorial/rmsss.png rename to inst/doc/tutorial/internal_training_20200902/rmsss.png diff --git a/inst/doc/tutorial/startR_tutorial_20200902.pdf b/inst/doc/tutorial/internal_training_20200902/startR_tutorial_20200902.pdf similarity index 100% rename from inst/doc/tutorial/startR_tutorial_20200902.pdf rename to inst/doc/tutorial/internal_training_20200902/startR_tutorial_20200902.pdf diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md index 013b47a3c159be2931b3b2c24b47857f2e254525..0ffacc7f618336a7253c121a56a0c0f36dfdc303 100644 --- a/inst/doc/usecase.md +++ b/inst/doc/usecase.md @@ -65,27 +65,40 @@ parameter and Start() can recognize this dependecy relationship. the dependency between one inner dimension and one file dimension (i.e., the usage of *_across), while this use case is for two file dimensions (i.e., the usage of *_depends). + 15. [Load irregular grid data](inst/doc/usecase/ex1_15_irregular_grid_CDORemap.R) + This script shows how to use Start() to load irregular grid data , then regrid it by s2dv::CDORemap. + 2. **Execute computation (use `Compute()`)** 1. [Function working on time dimension](inst/doc/usecase/ex2_1_timedim.R) 2. [Function using attributes of the data](inst/doc/usecase/ex2_2_attr.R) Using attributes is only available in startR_v0.1.3 or above. 3. [Use function CDORemap for interpolation](inst/doc/usecase/ex2_3_cdo.R) - Using parameter `CDO_module` is only available in startR_v0.1.3 or above. Interpolate data by using `s2dverification::CDORemap` in the workflow. + Using parameter `CDO_module` is only available in startR_v0.1.3 or above. Interpolate data by using `s2dv::CDORemap` in the workflow. 4. [Use two functions in workflow](inst/doc/usecase/ex2_4_two_func.R) 5. 6. [Use external parameters in atomic function](inst/doc/usecase/ex2_6_ext_param_func.R) + 7. [Calculate the ensemble-adjusted Continuous Ranked Probability Score (CRPS)](inst/doc/usecase/ex2_7_seasonal_forecast_crps.R) - Use `SpecsVerification::EnsCrps` to calculate the ensemble-adjusted Continuous Ranked Probability Score (CRPS) for ECWMF experimental data, and do ensemble mean. Use `s2dverification::PlotEquiMap` to plot the CRPS map. + Use `SpecsVerification::EnsCrps` to calculate the ensemble-adjusted Continuous Ranked Probability Score (CRPS) for ECWMF experimental data, and do ensemble mean. Use `s2dv::PlotEquiMap` to plot the CRPS map. + 8. [Use CSTools Calibration function](inst/doc/usecase/ex2_8_calibration.R) Use `CSTools:::.cal`, the interior function of `CSTools::CST_Calibration`, to do the bias adjustment for ECMWF experimental monthly mean data. + 9. [Use a mask to apply different methods to different gridpoints](inst/doc/usecase/ex2_9_mask.R) If you need to apply your analysis in a few gridpoints, you may want to consider use case 1.6, but if you need to load a lot of grid points, maybe this a better solution. + 10. [Apply an existing mask on data](inst/doc/usecase/ex2_10_existing_mask.R) This use case shows you how to apply the existing mask file on your data. If you need to create the mask file on your own, go to ex2_9_mask.R. + 11. [Two datasets with different length of target dimensions](inst/doc/usecase/ex2_11_two_dat_inconsistent_target_dim.R) This use case uses experimental and the corresponding observational data to calculate the temporal mean and spatial weighted mean. Notice that the spatial resolutions of the two datasets are different, but it still works because lat and lon are target dimensions. - + 12. [Transform and chunk spatial dimensions](inst/doc/usecase/ex2_12_transform_and_chunk.R) + This use case provides an example of transforming and chunking latitude and longitude dimensions. + + 13. [Load irregular grid data and interpolate it in the workflow](inst/doc/usecase/ex2_13_irregular_regrid.R) + This script shows how to load irregular grid data by Start(), then regrid it +by s2dv::CDORemap in the workflow. It is a solution before Start() can deal with irregular regridding directly. diff --git a/inst/doc/usecase/ex1_15_irregular_grid_CDORemap.R b/inst/doc/usecase/ex1_15_irregular_grid_CDORemap.R new file mode 100644 index 0000000000000000000000000000000000000000..f8634430e13730ea135182d6b3da0206f96ee918 --- /dev/null +++ b/inst/doc/usecase/ex1_15_irregular_grid_CDORemap.R @@ -0,0 +1,83 @@ +#---------------------------------------------------------------------------- +# Author: An-Chi Ho +# Date: 18th Jun 2021 +# +# This script shows how to use Start() to load irregular grid data , then regrid it +# by s2dv::CDORemap. +#---------------------------------------------------------------------------- + +library(startR) +library(s2dv) + +# Case 1: +path1 <- '/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/r1i1p1f1/Omon/$var$/gn/v20190713/$var$_*_s$sdate$-$member$_gn_$aux$.nc' + +data1 <- Start(dataset = path1, + var = 'tos', + sdate = paste0(1960), + aux = indices(1), + aux_depends = 'sdate', + j = 'all', + i = 'all', + time = indices(1), + member = 'r1i1p1f1', + return_vars = list(j = NULL, i = NULL, + latitude = NULL, longitude = NULL), + retrieve = T) +lons1 <- attributes(data1)$Variables$common$longitude +lats1 <- attributes(data1)$Variables$common$latitude +dim(lons1) +# i j +#362 292 +dim(lats1) +# i j +#362 292 +dim(data1) +#dataset var sdate aux j i time member +# 1 1 1 1 292 362 1 1 + +res1 <- CDORemap(drop(data1), lons1, lats1, grid = 'r100x50', method = 'bil', crop = FALSE) +dim(res1$data_array) +#lon lat +#100 50 +PlotEquiMap(t(res1$data_array), lon = res1$lons, lat = res1$lats, fileout = '~/irre_grid_res1.png') + +#--------------------------------------------------------- + +# Case 2: +path2 <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/cmcc-cm2-sr5/cmip6-dcppA-hindcast_i1p1/', + 'DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/$member$/Omon/$var$/gn/v20210312/', + '$var$_*_s$sdate$-$member$_gn_$aux$.nc') + +data2 <- Start(dataset = path2, + var = 'tos', + sdate = '1960', + aux = 'all', + aux_depends = 'sdate', + j = indices(2:361), # remove two indices to avoid white strips + i = indices(2:291), # remove two indices to avoid white strips + time = indices(1), + member = 'r1i1p1f1', + return_vars = list(j = NULL, i = NULL, + latitude = NULL, longitude = NULL), + retrieve = T) + +lons2 <- attributes(data2)$Variables$common$longitude +lats2 <- attributes(data2)$Variables$common$latitude +dim(lons2) +# j i +#360 290 +dim(lats2) +# j i +#360 290 +dim(data2) +#dataset var sdate aux j i time member +# 1 1 1 1 360 290 1 1 + +data2 <- ClimProjDiags::Subset(data2, c(1,2,3,7,8), list(1,1,1,1,1), drop = 'selected') +res2 <- CDORemap(drop(data2), lons2, lats2, grid = 'r100x50', method = 'bil', crop = FALSE) +dim(res2$data_array) +#lon lat +#100 50 +PlotEquiMap(res2$data_array, lon = res2$lons, lat = res2$lats, fileout = '~/irre_grid_res2.png') + diff --git a/inst/doc/usecase/ex1_1_tranform.R b/inst/doc/usecase/ex1_1_tranform.R index c805b45d8678ddc20b334c3b7f222498b19a0953..a544f84c96005aea28ae6f99f3e61fc5b2fc9c88 100644 --- a/inst/doc/usecase/ex1_1_tranform.R +++ b/inst/doc/usecase/ex1_1_tranform.R @@ -1,12 +1,12 @@ # ------------------------------------------------------------------ # This script shows you how to do the interpolation in Start(). It also -# uses s2dverification::Load to compare the results, which are identical +# uses s2dv::Load to compare the results, which are identical # (only tiny difference due to round-up). # # The parameters in Start() for interpolation include 'transform', # 'transform_extra_cells', 'transform_params', and 'transform_vars'. # 'transform' is the interpolation function. startR provides 'CDORemapper', -# which is the wrapper function of s2dverification::CDORemap. +# which is the wrapper function of s2dv::CDORemap. # 'transform_extra_cells' defines the extra grid points you want to use for # interpolation. The default value is 2. 'transform_params' is a list which # defines the arguments used in 'cdo'. 'transform_vars' is a vector indicating @@ -65,9 +65,9 @@ obs[1,1,1,1,1:3,1:2] #------------------------- -# s2dverification::Load() +# s2dv::Load() #------------------------- -library(s2dverification) +library(s2dv) pobs <- paste0('/esarchive/recon/ecmwf/era5/monthly_mean/', '$VAR_NAME$_f1h/$VAR_NAME$_$YEAR$$MONTH$.nc') diff --git a/inst/doc/usecase/ex1_2_exp_obs_attr.R b/inst/doc/usecase/ex1_2_exp_obs_attr.R index fa2323d6aaa5ace57195ea52e127a8ad1fa1b3c7..19874efc730861847f23ceb39db8140e65c3d20b 100644 --- a/inst/doc/usecase/ex1_2_exp_obs_attr.R +++ b/inst/doc/usecase/ex1_2_exp_obs_attr.R @@ -5,13 +5,12 @@ # can have the same dimension structure. # Spatial dimensions: -# The exp and obs data happen to have the same spatial resolution (256x512) and +# The exp and obs data happen to have the same spatial resolution (256x512) and # the grids are not shifted, so we don't need to regrid them. However, their latitude # orders are opposite. exp has ascending order while obs has descending order. -# To make them consistent, we cannot simply use 'all' as the selector of obs because -# for now, the reordering parameter '*_Sort' is only functional when the dimension is -# defined by values(). We can use either `indices(256:1)` or the exp attributes (`values()`) -# to define the latitude of obs. +# To make them consistent, we need to use "_reorder" parameter. In fact, it is +# always recommended using "lat_reorder" and "lon_reorder" to ensure you get +# the expected latitude and longitude. # Temporal dimensions: # The exp and obs files have different date/time structure. exp has one file per year and @@ -33,7 +32,9 @@ exp <- Start(dat = repos_exp, sdate = as.character(c(2005:2008)), time = indices(1:3), lat = 'all', + lat_reorder = Sort(), lon = 'all', + lon_reorder = CircularSort(0, 360), synonims = list(lat = c('lat', 'latitude'), lon = c('lon', 'longitude')), return_vars = list(lon = NULL, @@ -41,10 +42,6 @@ exp <- Start(dat = repos_exp, time = 'sdate'), retrieve = FALSE) -attr(exp, 'Dimensions') -# dat var sdate time lat lon -# 1 1 4 3 256 512 - # Retrieve attributes for observational data retrieval. ## Because latitude order in experiment is [-90, 90] but in observation is [90, -90], ## latitude values need to be retrieved and used below. @@ -83,8 +80,10 @@ obs <- Start(dat = repos_obs, var = 'tas', date = unique(format(dates, '%Y%m')), time = values(dates), #dim: [sdate = 4, time = 3] - lat = values(lats), # indices(256:1), - lon = values(lons), # 'all', + lat = 'all', + lat_reorder = Sort(), + lon = 'all', + lon_reorder = CircularSort(0, 360), time_across = 'date', merge_across_dims = TRUE, split_multiselected_dims = TRUE, @@ -95,10 +94,6 @@ obs <- Start(dat = repos_obs, time = 'date'), retrieve = FALSE) -attr(obs, 'Dimensions') -# dat var sdate time lat lon -# 1 1 4 3 256 512 - #==================================================== # Check the attributes. They should be all identical. #==================================================== @@ -177,8 +172,10 @@ obs2 <- Start(dat = repos_obs, var = 'tas', date = unique(format(dates, '%Y%m')), time = values(dates_adjust), # use the adjust ones - lat = values(lats), - lon = values(lons), + lat = 'all', + lat_reorder = Sort(), + lon = 'all', + lon_reorder = CircularSort(0, 360), time_across = 'date', merge_across_dims = TRUE, split_multiselected_dims = TRUE, @@ -207,8 +204,10 @@ obs3 <- Start(dat = repos_obs, var = 'tas', date = unique(format(dates, '%Y%m')), time = values(dates), - lat = values(lats), - lon = values(lons), + lat = 'all', + lat_reorder = Sort(), + lon = 'all', + lon_reorder = CircularSort(0, 360), time_across = 'date', time_tolerance = as.difftime(15, units = 'days'), merge_across_dims = TRUE, @@ -222,10 +221,9 @@ obs3 <- Start(dat = repos_obs, # We lose many data because there are no data within 15 days from the provided time values. print(attr(obs3, 'Variables')$common$time) -[1] "2005-02-28 18:00:00 UTC" "2006-02-28 18:00:00 UTC" -[3] "2007-02-28 18:00:00 UTC" "2008-02-29 18:00:00 UTC" +#[1] "2005-02-28 18:00:00 UTC" "2006-02-28 18:00:00 UTC" +#[3] "2007-02-28 18:00:00 UTC" "2008-02-29 18:00:00 UTC" # If 'time_tolerance' is changed to "as.difftime(1, units = 'days')", an error shows: # Selectors do not match any of the possible values for the dimension 'time'. - diff --git a/inst/doc/usecase/ex1_4_variable_nmember.R b/inst/doc/usecase/ex1_4_variable_nmember.R index 495c6f8278685482cd865378c59ed40faca3e38f..2e90fe6ab9d2733d99d096c8a151f58ecdb10ad5 100644 --- a/inst/doc/usecase/ex1_4_variable_nmember.R +++ b/inst/doc/usecase/ex1_4_variable_nmember.R @@ -1,11 +1,13 @@ # This code shows that the number of members could depend on the start date -# and the order of start dates requested -# See FAQ 10 [The members depends on the start date](/inst/doc/faq.md) +# and the order of start dates impacts the length of member dimension. +# See FAQ 10 [The members depends on the start date](https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/faq.md#10-the-number-of-members-depends-on-the-start-date) and +# FAQ 21[Retrieve the complete data when the dimension length varies among files](https://earth.bsc.es/gitlab/es/startR/-/blob/master/inst/doc/faq.md#21-retrieve-the-complete-data-when-the-dimension-length-varies-among-files) +# For more explanation. library(startR) path_list <- list(list(name = 'system5', - path = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc')) + path = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc')) sdates_exp <- c('19991101', '19990901') data_Nov_Sep <- Start(dat = path_list, var = 'psl', @@ -13,8 +15,9 @@ data_Nov_Sep <- Start(dat = path_list, sdate = sdates_exp, time = indices(1), latitude = values(list(0, 20)), - latitude_reorder=Sort(), + latitude_reorder = Sort(), longitude = values(list(0, 5)), + longitude_reorder = CircularSort(0, 360), synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude'), member = c('ensemble', 'realization')), @@ -23,9 +26,13 @@ data_Nov_Sep <- Start(dat = path_list, dim(data_Nov_Sep) # dat var member sdate time latitude longitude # 1 1 51 2 1 71 19 + apply(data_Nov_Sep, 4, function(x){sum(is.na(x))}) -# 26 missing values for the second start date +#[1] 0 35074 +# --> 26*71*19 = 35074 missing values for the second start date. + +#-------- exchange the order of sdate ------------- sdates_exp <- c('19990901', '19991101') data_Sep_Nov <- Start(dat = path_list, var = 'psl', @@ -33,20 +40,44 @@ data_Sep_Nov <- Start(dat = path_list, sdate = sdates_exp, time = indices(1), latitude = values(list(0, 20)), - latitude_reorder=Sort(), + latitude_reorder = Sort(), longitude = values(list(0, 5)), + longitude_reorder = CircularSort(0, 360), synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude'), - member = c('ensemble', 'realization')), + member = c('ensemble', 'realization')), retrieve = TRUE) -# 25 members available +# Only 25 members available dim(data_Sep_Nov) # dat var member sdate time latitude longitude # 1 1 25 2 1 71 19 -# Any missing value: apply(data_Sep_Nov, 4, function(x){sum(is.na(x))}) +#[1] 0 0 +#--> No missing value. +#-----------Use 'largest_dims_length' to get full members ------------- +data_largest_len <- Start(dat = path_list, + var = 'psl', + member = 'all', + largest_dims_length = c(member = 51), #TRUE, + sdate = sdates_exp, + time = indices(1), + latitude = values(list(0, 20)), + latitude_reorder = Sort(), + longitude = values(list(0, 5)), + longitude_reorder = CircularSort(0, 360), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + member = c('ensemble', 'realization')), + retrieve = TRUE) +dim(data_largest_len) +# dat var member sdate time latitude longitude +# 1 1 51 2 1 71 19 +# Switch the sdate order of data_largest_len and compare to the first data +all.equal(as.vector(data_largest_len[, , , 2:1, , , ]), + as.vector(data_Nov_Sep),check.attribtes = F) +#[1] TRUE diff --git a/inst/doc/usecase/ex2_13_irregular_regrid.R b/inst/doc/usecase/ex2_13_irregular_regrid.R new file mode 100644 index 0000000000000000000000000000000000000000..e09a691333ad40754315d98e548fda55ba7aa9f0 --- /dev/null +++ b/inst/doc/usecase/ex2_13_irregular_regrid.R @@ -0,0 +1,82 @@ +#---------------------------------------------------------------------------- +# Author: An-Chi Ho +# Date: 8th Oct 2021 +# +# This script shows how to load irregular grid data by Start(), then regrid it +# by s2dv::CDORemap in the workflow. It is a solution before Start() can deal +# with irregular regridding directly. +#---------------------------------------------------------------------------- + +library(startR) +library(s2dv) + +path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/cmcc-cm2-sr5/cmip6-dcppA-hindcast_i1p1/', + 'DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/$member$/Omon/$var$/gn/v20210312/', + '$var$_*_s$sdate$-$member$_gn_$aux$.nc') + +data <- Start(dataset = path, + var = 'tos', + sdate = c('1960', '1961'), + aux = 'all', + aux_depends = 'sdate', + j = indices(2:361), # remove two indices to avoid white strips + i = indices(2:291), # remove two indices to avoid white strips + time = indices(1:12), + member = 'r1i1p1f1', + return_vars = list(j = NULL, i = NULL, + latitude = NULL, longitude = NULL), + retrieve = F) + +attr(data, 'Dimensions') +#dataset var sdate aux j i time member +# 1 1 2 1 360 290 12 1 +dim(attr(data, 'Variables')$common$longitude) +# j i +#360 290 +dim(attr(data, 'Variables')$common$latitude) +# j i +#360 290 + +func_regrid <- function(data) { + lons <- attr(data, 'Variables')$common$longitude + lats <- attr(data, 'Variables')$common$latitude + data <- s2dv::CDORemap(data, lons, lats, grid = 'r360x180', + method = 'bil', crop = FALSE) + lons_reg <- data[['lons']] + lats_reg <- data[['lats']] + return(list(data = data[[1]], lats = lats_reg, lons = lons_reg)) +} + +#NOTE: The data transposes if target_dims are only 'j' and 'i'. +# If only 'j' and 'i', output_dims will be 'lat', 'lon'. +step <- Step(fun = func_regrid, + target_dims = list(data = c('j', 'i')), + output_dims = list(data = c('lon', 'lat'), + lats = 'lat', lons = 'lon'), + use_attributes = list(data = "Variables")) +wf <- AddStep(data, step) + +res <- Compute(workflow = wf$data, + chunks = list(sdate = 2, time = 2)) + +names(res) +#[1] "data" "lats" "lons" +dim(res$data) +# lon lat dataset var sdate aux time member +# 360 180 1 1 2 1 12 1 +dim(res$lons) +# lon dataset var sdate aux time member +# 360 1 1 2 1 12 1 +dim(res$lats) +# lat dataset var sdate aux time member +# 180 1 1 2 1 12 1 + +PlotEquiMap(drop(res$data)[ , , 1, 1], + lon = drop(res$lons)[, 1, 1], + lat = drop(res$lats)[, 1, 1]) + +# Plot Layout for sdate = 1 all the time steps +var <- Reorder(drop(res$data)[, , 1, ], c(3, 1, 2)) +PlotLayout(PlotEquiMap, c('lon', 'lat'), var = var, + lon = drop(res$lons)[, 1, 1], lat = drop(res$lats)[, 1, 1]) + diff --git a/inst/doc/usecase/ex2_3_cdo.R b/inst/doc/usecase/ex2_3_cdo.R index 0262de38a50ee259196d275abfe4ddef56d940bb..8c398f7ca5b33b2a079cd92561a167b57ef0b0a8 100644 --- a/inst/doc/usecase/ex2_3_cdo.R +++ b/inst/doc/usecase/ex2_3_cdo.R @@ -1,5 +1,5 @@ # -------------------------------------------------------------- -# Function doing regridding CDO (e.g.: s2dverification::CDORemap) +# Function doing regridding CDO (e.g.: s2dv::CDORemap) #--------------------------------------------------------------- library(startR) repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' @@ -18,8 +18,8 @@ library(startR) lons_data = as.vector(attr(x, 'Variables')$dat1$longitude) lats_data = as.vector(attr(x, 'Variables')$dat1$latitude) resgrid = "r360x180" # prlr - r <- s2dverification::CDORemap(x, lons_data, lats_data, resgrid, - 'bil', crop = FALSE, force_remap = TRUE)[[1]] + r <- s2dv::CDORemap(x, lons_data, lats_data, resgrid, + 'bil', crop = FALSE, force_remap = TRUE)[[1]] return(r) } diff --git a/inst/doc/usecase/ex2_4_two_func.R b/inst/doc/usecase/ex2_4_two_func.R index d4d1b5f14a6f1eb2a162f2becd80f50c06b1a559..2e638bd60ea6d3f90b7ff2cca385e9f89d7d587e 100644 --- a/inst/doc/usecase/ex2_4_two_func.R +++ b/inst/doc/usecase/ex2_4_two_func.R @@ -1,5 +1,5 @@ # -------------------------------------------------------------- -# Two functions (e.g.: s2dverification::CDORemap and Season) +# Two functions (e.g.: s2dv::CDORemap and Season) #--------------------------------------------------------------- library(startR) repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' diff --git a/inst/doc/usecase/ex2_7_seasonal_forecast_verification.R b/inst/doc/usecase/ex2_7_seasonal_forecast_verification.R index 8b746071ecff79ef898695fffb2c499ac4fa948d..b28e6a6d88b0a65ff4890e78a0018791f010ebca 100644 --- a/inst/doc/usecase/ex2_7_seasonal_forecast_verification.R +++ b/inst/doc/usecase/ex2_7_seasonal_forecast_verification.R @@ -104,7 +104,7 @@ # Plotting -library(s2dverification) +library(s2dv) lon <- seq(from = 0, to = 359, length.out = 512) lat <- seq(from = 90, to = -90, length.out = 256) diff --git a/man/CDORemapper.Rd b/man/CDORemapper.Rd index 763be7708c73e0be54b1edcc7e7d8b9d1aeebda4..024ce32de1194ca691976a6207f98f830aa7233b 100644 --- a/man/CDORemapper.Rd +++ b/man/CDORemapper.Rd @@ -4,7 +4,13 @@ \alias{CDORemapper} \title{CDO Remap Data Transformation for 'startR'} \usage{ -CDORemapper(data_array, variables, file_selectors = NULL, ...) +CDORemapper( + data_array, + variables, + file_selectors = NULL, + crop_domain = NULL, + ... +) } \arguments{ \item{data_array}{A data array to be transformed. See details in the @@ -18,6 +24,9 @@ parameter 'transform' of the function Start().} the file parameter 'data_array' comes from. See details in the documentation of the parameter 'transform' of the function Start(). The default value is NULL.} +\item{crop_domain}{A list of the transformed domain of each transform +variable, automatically provided by Start().} + \item{\dots}{A list of additional parameters to adjust the transform process, as provided in the parameter 'transform_params' in a Start() call. See details in the documentation of the parameter 'transform' of the function Start().} @@ -34,7 +43,7 @@ data subsets onto a specified target grid, intended for use as parameter 'transform' in a Start() call. This function complies with the input/output interface required by Start() defined in the documentation for the parameter 'transform' of function Start().\cr\cr -This function uses the function CDORemap() in the package 's2dverification' to +This function uses the function CDORemap() in the package 's2dv' to perform the interpolation, hence CDO is required to be installed. } \examples{ @@ -53,8 +62,7 @@ perform the interpolation, hence CDO is required to be installed. longitude_reorder = CircularSort(-180, 180), transform = CDORemapper, transform_params = list(grid = 'r360x181', - method = 'conservative', - crop = c(-120, 120, -60, 60)), + method = 'conservative'), transform_vars = c('latitude', 'longitude'), return_vars = list(latitude = 'dat', longitude = 'dat', @@ -63,5 +71,5 @@ perform the interpolation, hence CDO is required to be installed. } } \seealso{ -\code{\link[s2dverification]{CDORemap}} +\code{\link[s2dv]{CDORemap}} } diff --git a/startR-manual.pdf b/startR-manual.pdf index 0e205e687143fcabaa6c6c47c17b14e302458c9c..84ba5184bb5acc5bb7e84cf35887158bc6d1235a 100644 Binary files a/startR-manual.pdf and b/startR-manual.pdf differ diff --git a/tests/testthat/test-Compute-CDORemap.R b/tests/testthat/test-Compute-CDORemap.R index 991e7e1e12d308fb75be27863a4b274ce6c7ba40..28df2340a6883b72a9a859ca783791a7740c7ee3 100644 --- a/tests/testthat/test-Compute-CDORemap.R +++ b/tests/testthat/test-Compute-CDORemap.R @@ -19,8 +19,8 @@ suppressWarnings( fun <- function(x) { lons_data <- as.vector(attr(x, 'Variables')$dat1$longitude) lats_data <- as.vector(attr(x, 'Variables')$dat1$latitude) - r <- s2dverification::CDORemap(x, lons_data, lats_data, "r360x181", - 'bil', crop = FALSE, force_remap = TRUE)[[1]] + r <- s2dv::CDORemap(x, lons_data, lats_data, "r360x181", + 'bil', crop = FALSE, force_remap = TRUE)[[1]] return(r) } diff --git a/tests/testthat/test-Compute-chunk_split_dim.R b/tests/testthat/test-Compute-chunk_split_dim.R new file mode 100644 index 0000000000000000000000000000000000000000..09da16096467e861dc5035840f346d3d46ce0275 --- /dev/null +++ b/tests/testthat/test-Compute-chunk_split_dim.R @@ -0,0 +1,222 @@ +# This unit test is to check chunking over the split dim. It involves +# how to arrange the chunks in a correct order even when chunking is happening. + +context("Chunk over split dim") + +test_that("1. The files are not repeated", { + +repos_exp <- paste0('/esarchive/exp/ecearth/a1tr/cmorfiles/CMIP/EC-Earth-Consortium/', + 'EC-Earth3/historical/r24i1p1f1/Amon/$var$/gr/v20190312/', + '$var$_Amon_EC-Earth3_historical_r24i1p1f1_gr_$sdate$01-$sdate$12.nc') + +suppressWarnings( +exp <- Start(dat = repos_exp, + var = 'tas', + sdate = as.character(c(2005:2008)), + time = indices(1:3), + lat = indices(1:2), + lon = indices(1:3), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(lon = NULL, + lat = NULL, + time = 'sdate'), + retrieve = FALSE) +) +lats <- attr(exp, 'Variables')$common$lat +lons <- attr(exp, 'Variables')$common$lon +## The 'time' attribute is a two-dim array +dates <- attr(exp, 'Variables')$common$time +#dim(dates) +#sdate time +# 4 3 + +repos_obs <- '/esarchive/recon/ecmwf/erainterim/monthly_mean/$var$_f6h/$var$_$date$.nc' +suppressWarnings( +obs <- Start(dat = repos_obs, + var = 'tas', + date = unique(format(dates, '%Y%m')), + time = values(dates), #dim: [sdate = 4, time = 3] + lat = values(lats), + lat_reorder = Sort(), + lon = values(lons), + lon_reorder = CircularSort(0, 360), + time_across = 'date', + merge_across_dims = TRUE, + split_multiselected_dims = TRUE, + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(lon = NULL, + lat = NULL, + time = 'date'), + retrieve = FALSE) +) + +fun <- function(x) { +return(x) +} +step1 <- Step(fun, target_dims = 'time', output_dims = 'time') +wf1 <- AddStep(obs, step1) +suppressWarnings( +res <- Compute(wf1, chunks = list(sdate = 1))$output1 +) +suppressWarnings( +res1 <- Compute(wf1, chunks = list(sdate = 2))$output1 +) + +step2 <- Step(fun, target_dims = 'sdate', output_dims = 'sdate') +wf2 <- AddStep(obs, step2) +suppressWarnings( +res2 <- Compute(wf2, chunks = list(time = 2))$output1 +) + +step3 <- Step(fun, target_dims = 'lon', output_dims = 'lon') +wf3 <- AddStep(obs, step3) +suppressWarnings( +res3 <- Compute(wf3, chunks = list(time = 2, sdate = 2))$output1 +) + +expect_equal( +res[1,1,1,,2,2], +c(250.8127, 248.7926, 249.4770, 246.8312), +tolerance = 0.0001 +) +expect_equal( +res[2,1,1,,2,2], +c(237.3297, 235.9947, 235.4366, 235.9104), +tolerance = 0.0001 +) +expect_equal( +res, +res1 +) +expect_equal( +res, +aperm(res2, c(4,2,3,1,5,6)) +) +expect_equal( +res, +aperm(res3, c(5,2,3,4,6,1)) +) +expect_equal( +dim(res1), +c(time = 3, dat = 1, var = 1, sdate = 4, lat = 2, lon = 3) +) +expect_equal( +dim(res2), +c(sdate = 4, dat = 1, var = 1, time = 3, lat = 2, lon = 3) +) +expect_equal( +dim(res3), +c(lon = 3, dat = 1, var = 1, sdate = 4, time = 3, lat = 2) +) + +}) + + +test_that("2. The files are repeated", { + +ecmwf_path_hc <- paste0('/esarchive/exp/ecmwf/s2s-monthly_ensforhc/daily/$var$_f24h/$sdate$/$var$_$syear$.nc') +obs_path <-paste0('/esarchive/recon/ecmwf/era5/daily/$var$-r240x121/$var$_$file_date$.nc') +sdates.seq <- c("20161222","20161229", "20170105","20170112") + +suppressWarnings( +hcst <- Start(dat = ecmwf_path_hc, + var = 'sfcWind', + sdate = sdates.seq, + syear = indices(1:2), #'all', + time = 'all', + latitude = indices(1), + longitude = indices(1), + ensemble = indices(1), + syear_depends = 'sdate', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = c('sdate','syear') + ), + retrieve = F) +) +datess <- attr(hcst, 'Variables')$common$time +file_date <- unique(sapply(datess, format, '%Y%m')) + +suppressWarnings( +obs <- Start(dat = obs_path, + var = 'windagl100', + latitude = indices(1), + longitude = indices(1:2), + file_date= file_date, + time = values(datess), # 'sdate' 'syear' 'time' + time_var = 'time', + time_across = 'file_date', + merge_across_dims= TRUE, + merge_across_dims_narm = TRUE, + split_multiselected_dims = TRUE, + synonims = list(latitude=c('lat','latitude'), + longitude=c('lon','longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat',# + time = c('file_date')), + retrieve = F) +) + +fun <- function(x) { +return(x) +} +st1 <- Step(fun, target_dims = 'time', output_dims = 'time') +adst1 <- AddStep(obs, st1) +suppressWarnings( +res <- Compute(adst1, chunks = list(sdate = 1))$output1 +) +suppressWarnings( +res1 <- Compute(adst1, chunks = list(sdate = 2))$output1 +) + +st2 <- Step(fun, target_dims = 'sdate', output_dims = 'sdate') +adst2 <- AddStep(obs, st2) +suppressWarnings( +res2 <- Compute(adst2, chunks = list(time = 2))$output1 +) + +st3 <- Step(fun, target_dims = 'longitude', output_dims = 'longitude') +adst3 <- AddStep(obs, st3) +suppressWarnings( +res3 <- Compute(adst3, chunks = list(time = 2, sdate = 2))$output1 +) + +expect_equal( +drop(res)[1,1,,2], +c(8.324215, 10.622266, 14.304168, 12.299168), +tolerance = 0.0001 +) +expect_equal( +drop(res)[2,1,,1], +c(4.357206, 21.909714, 4.808544, 6.861799), +tolerance = 0.0001 +) +expect_equal( +res, +res1 +) +expect_equal( +res, +aperm(res2, c(7, 2, 3, 4, 5, 1, 6)) +) +expect_equal( +res, +aperm(res3, c(7, 2, 3, 4, 1, 5, 6)) +) +expect_equal( +dim(res1), +c(time = 47, dat = 1, var = 1, latitude = 1, longitude = 2, sdate = 4, syear = 2) +) +expect_equal( +dim(res2), +c(sdate = 4, dat = 1, var = 1, latitude = 1, longitude = 2, syear = 2, time = 47) +) +expect_equal( +dim(res3), +c(longitude = 2, dat = 1, var = 1, latitude = 1, sdate = 4, syear = 2, time = 47) +) + + +}) diff --git a/tests/testthat/test-Compute-inconsistent_target_dim.R b/tests/testthat/test-Compute-inconsistent_target_dim.R index fa4992afe565d5e28b501c8b2e3e488bce886261..13d2a44435799415b4091e0a8c6710ff05e78f1d 100644 --- a/tests/testthat/test-Compute-inconsistent_target_dim.R +++ b/tests/testthat/test-Compute-inconsistent_target_dim.R @@ -67,52 +67,22 @@ obs <- Start(data = path.obs, retrieve = FALSE) ) -lons.obs <- attr(obs, 'Variables')$common$longitude -lats.obs <- attr(obs, 'Variables')$common$latitude -dates.obs <- attr(obs, 'Variables')$common$time - -lons_exp <- as.vector(lons.exp) -lats_exp <- as.vector(lats.exp) -lons_obs <- as.vector(lons.obs) -lats_obs <- as.vector(lats.obs) - - -fun <- function(exp, obs, path.output, - lons_exp = lons_exp, lats_exp = lats_exp, - lons_obs = lons_obs, lats_obs = lats_obs) { - - e <- s2dv::MeanDims(drop(exp), c('member', 'time')) - sst.e <- ClimProjDiags::WeightedMean(e, lons_exp, lats_exp, - londim = which(names(dim(e)) == 'longitude'), - latdim = which(names(dim(e)) == 'latitude')) - index.exp <- (sst.e - mean(sst.e))/sd(sst.e) - - o <- s2dv::MeanDims(drop(obs), 'time') - sst.o <- ClimProjDiags::WeightedMean(o, lons_obs, lats_obs, - londim = which(names(dim(o)) == 'longitude'), - latdim = which(names(dim(o)) == 'latitude')) - index.obs <- (sst.o - mean(sst.o))/sd(sst.o) - - # give dim name - dim(index.exp) <- c(sdate = length(index.exp)) - dim(index.obs) <- c(sdate = length(index.obs)) - - return(list(ind_exp = index.exp, ind_obs = index.obs)) - +fun <- function(exp, obs) { +# exp: [member = 2, latitude = 11, longitude = 21] +# obs: [latitude = 41, longitude = 81] + return(list(exp = exp, obs = obs)) } step <- Step(fun, - target_dims = list(exp = c('member', 'sdate', 'time', 'latitude', 'longitude'), - obs = c('sdate', 'time', 'latitude', 'longitude')), - output_dims = list(ind_exp = 'sdate', ind_obs = 'sdate')) + target_dims = list(exp = c('member', 'latitude', 'longitude'), + obs = c('latitude', 'longitude')), + output_dims = list(exp = c('member', 'latitude', 'longitude'), + obs = c('latitude', 'longitude'))) -workflow <- AddStep(list(exp = exp, obs = obs), step, - lons_exp = lons_exp, lats_exp = lats_exp, - lons_obs = lons_obs, lats_obs = lats_obs) +workflow <- AddStep(list(exp = exp, obs = obs), step) suppressWarnings( -res <- Compute(workflow$ind_exp, - chunks = list(var = 1)) +res <- Compute(workflow$exp, chunks = list(sdate = 2)) ) expect_equal( @@ -125,17 +95,45 @@ c(data = 1, var = 1, sdate = 3, time = 3, latitude = 41, longitude = 81) ) expect_equal( names(res), -c('ind_exp', 'ind_obs') +c('exp', 'obs') +) +expect_equal( +dim(res$exp), +c(member = 2, latitude = 11, longitude = 21, data = 1, var = 1, sdate = 3, time = 3) +) +expect_equal( +dim(res$obs), +c(latitude = 41, longitude = 81, data = 1, var = 1, sdate = 3, time = 3) +) +expect_equal( +mean(res$exp), +299.5028, +tolerance = 0.0001 +) +expect_equal( +mean(res$obs), +299.8896, +tolerance = 0.0001 +) +expect_equal( +res$exp[1, 1:5, 2, 1, 1, 2, 2], +c(302.2021, 302.3682, 302.4512, 302.4404, 302.4922), +tolerance = 0.0001 +) +expect_equal( +res$exp[2, 1:5, 2, 1, 1, 2, 3], +c(301.4209, 301.4795, 301.4004, 301.3633, 301.5791), +tolerance = 0.0001 ) expect_equal( -mean(res$ind_exp)*10^18, --9.251859, -tolerance = 0.00001 +res$obs[1:5, 2, 1, 1, 2, 2], +c(301.5615, 301.5351, 301.6196, 301.7573, 301.8423), +tolerance = 0.0001 ) expect_equal( -mean(res$ind_obs)*10^15, --9.584944, -tolerance = 0.00001 +res$obs[5, 1:5, 1, 1, 3, 2], +c(299.6043, 299.6480, 299.6803, 299.6851, 299.6761), +tolerance = 0.0001 ) }) diff --git a/tests/testthat/test-Compute-irregular_regrid.R b/tests/testthat/test-Compute-irregular_regrid.R new file mode 100644 index 0000000000000000000000000000000000000000..928fa493be57d7b9f975c57bbf229f986dab6d94 --- /dev/null +++ b/tests/testthat/test-Compute-irregular_regrid.R @@ -0,0 +1,71 @@ +library(s2dv) + +context("Irregular regriding in the workflow") + +test_that("1. ex2_13", { + +path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/cmcc-cm2-sr5/cmip6-dcppA-hindcast_i1p1/', + 'DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/$member$/Omon/$var$/gn/v20210312/', + '$var$_*_s$sdate$-$member$_gn_$aux$.nc') +suppressWarnings( +data <- Start(dataset = path, + var = 'tos', + sdate = '1960', + aux = 'all', + aux_depends = 'sdate', + j = indices(2:361), # remove two indices to avoid white strips + i = indices(2:291), # remove two indices to avoid white strips + time = indices(1), + member = 'r1i1p1f1', + return_vars = list(j = NULL, i = NULL, + latitude = NULL, longitude = NULL), + retrieve = F) +) +func_regrid <- function(data) { + lons <- attr(data, 'Variables')$common$longitude + lats <- attr(data, 'Variables')$common$latitude + data <- s2dv::CDORemap(data, lons, lats, grid = 'r360x180', + method = 'bil', crop = FALSE) + lons_reg <- data[['lons']] + lats_reg <- data[['lats']] + return(list(data = data[[1]], lats = lats_reg, lons = lons_reg)) +} + +#NOTE: The data transposes if target_dims are only 'j' and 'i'. +# If only 'j' and 'i', output_dims will be 'lat', 'lon'. +step <- Step(fun = func_regrid, + target_dims = list(data = c('j', 'i')), + output_dims = list(data = c('lon', 'lat'), + lats = 'lat', lons = 'lon'), + use_attributes = list(data = "Variables")) +suppressWarnings( +wf <- AddStep(data, step) +) +suppressWarnings( +res <- Compute(workflow = wf$data, chunks = list(sdate = 1)) +) + +expect_equal( +dim(res$data), +c(lon = 360, lat = 180, dataset = 1, var = 1, sdate = 1, aux = 1, time = 1, member = 1) +) +expect_equal( +dim(res$lons), +c(lon = 360, dataset = 1, var = 1, sdate = 1, aux = 1, time = 1, member = 1) +) +expect_equal( +attr(data, 'Dimensions'), +c(dataset = 1, var = 1, sdate = 1, aux = 1, j = 360, i = 290, time = 1, member = 1) +) +expect_equal( +mean(res$data, na.rm = T), +13.20951, +tolerance = 0.0001 +) +expect_equal( +drop(res$data)[120,105:110], +c(28.32521, 28.07044, 27.59033, 27.02514, 26.55184, 26.67090), +tolerance = 0.0001 +) + +}) diff --git a/tests/testthat/test-Compute-transform_all.R b/tests/testthat/test-Compute-transform_all.R index 46430d7a2736f1946d6e369d32c4037179494aa8..2a676e44671e2d5fdb7e0b02be512d3ca344bedb 100644 --- a/tests/testthat/test-Compute-transform_all.R +++ b/tests/testthat/test-Compute-transform_all.R @@ -1,7 +1,7 @@ context("Transform with 'all'") test_that("1. Chunk along non-lat/lon dim", { -skip_on_cran() +#skip_on_cran() path <- '/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/Amon/$var$/gr/v20190713/$var$_Amon_*_s$sdate$-$member$_gr_$fyear$.nc' suppressWarnings( @@ -47,7 +47,7 @@ tolerance = 0.0001 }) test_that("2. chunk along lon", { -skip_on_cran() +#skip_on_cran() #!!!!!!!!!!!!!!!!!!!NOTE: the results are not identical when exp has extra cells = 2!!!!!!!!!!!!!!!!!! # But exp2 (retrieve = T) has the same results with extra_cells = 2 and 8. diff --git a/tests/testthat/test-Compute-transform_indices.R b/tests/testthat/test-Compute-transform_indices.R index d9d65cb8ede93b2b55859db60bca3036805c4008..3d7bb7d33174ebdf0af9e2f962e6d43bbea0a091 100644 --- a/tests/testthat/test-Compute-transform_indices.R +++ b/tests/testthat/test-Compute-transform_indices.R @@ -22,7 +22,7 @@ context("Transform with indices") test_that("1. global", { -skip_on_cran() +#skip_on_cran() path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' @@ -187,33 +187,33 @@ func <- function(x) { step <- Step(func, target_dims = 'time', output_dims = 'time') wf <- AddStep(exp, step) -#WRONG!!!!!!!!!! -#suppressWarnings( -#res_crop_T_1 <- Compute(wf, chunks = list(lon = 2)) -#) +#WRONG?????? +suppressWarnings( +res_crop_T_1 <- Compute(wf, chunks = list(lon = 2)) +) suppressWarnings( res_crop_T_2 <- Compute(wf, chunks = list(ensemble = 1)) ) -#WRONG!!!!!!!!!! -#suppressWarnings( -#res_crop_T_3 <- Compute(wf, chunks = list(lon = 3)) -#) +#WRONG????? +suppressWarnings( +res_crop_T_3 <- Compute(wf, chunks = list(lon = 3)) +) suppressWarnings( res_crop_T_4 <- Compute(wf, chunks = list(lat = 2)) ) -#expect_equal( -#res1$output1, -#res_crop_T_1$output1 -#) +expect_equal( +res1$output1, +res_crop_T_1$output1 +) expect_equal( res1$output1, res_crop_T_2$output1 ) -#expect_equal( -#res1$output1, -#res_crop_T_3$output1 -#) +expect_equal( +res1$output1, +res_crop_T_3$output1 +) expect_equal( res1$output1, res_crop_T_4$output1 @@ -251,7 +251,7 @@ res_crop_T_4$output1 test_that("2. regional, no border", { -skip_on_cran() +#skip_on_cran() path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' @@ -269,8 +269,7 @@ exp <- Start(dat = path, transform = CDORemapper, transform_extra_cells = 8, transform_params = list(grid = 'r100x50', - method = 'conservative', - crop = c(0, 22, -90, -60)), + method = 'conservative'), transform_vars = c('lat','lon'), synonims = list(lon = c('lon', 'longitude'), lat = c('lat', 'latitude')), return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'), @@ -304,27 +303,17 @@ res3$output1 expect_equal( drop(res$output1)[, 1], -c(241.5952, 243.0271, 247.6998, 246.7727, 248.7175, 267.7744, 273.2705), +c(241.4042, 242.5804, 246.8507, 245.8008, 246.4318, 267.0983), tolerance = 0.001 ) expect_equal( drop(res$output1)[, 2], -c(241.4042, 242.5804, 246.8507, 245.8008, 246.4318, 267.0983, 272.9651), +c(241.2223, 242.2564, 245.9863, 244.5377, 244.8937, 266.5749), tolerance = 0.001 ) expect_equal( drop(res$output1)[, 3], -c(241.2223, 242.2564, 245.9863, 244.5377, 244.8937, 266.5749, 272.5154), -tolerance = 0.001 -) -expect_equal( -drop(res$output1)[, 4], -c(241.0894, 242.1896, 245.3183, 243.1169, 243.9446, 266.4386, 272.4731), -tolerance = 0.001 -) -expect_equal( -drop(res$output1)[, 5], -c(241.0217, 242.3326, 244.6789, 241.6538, 244.3845, 266.6960, 272.4390), +c(241.0895, 242.1896, 245.3183, 243.1169, 243.9446, 266.4386), tolerance = 0.001 ) @@ -465,7 +454,7 @@ res_crop_T_3$output1 test_that("3. regional, at lon border", { -skip_on_cran() +#skip_on_cran() path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' @@ -482,8 +471,7 @@ exp <- Start(dat = path, lon_reorder = CircularSort(0, 360), transform = CDORemapper, transform_extra_cells = 8, - transform_params = list(grid = 'r100x50', method = 'conservative', - crop = c(0, 18, -90, -67)), + transform_params = list(grid = 'r100x50', method = 'conservative'), transform_vars = c('lat','lon'), synonims = list(lon = c('lon', 'longitude'), lat = c('lat', 'latitude')), return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'), @@ -539,79 +527,23 @@ drop(res1$output1)[, 5], c(241.0894, 242.1896, 245.3183, 243.1169, 243.9446, 266.4386), tolerance = 0.001 ) -expect_equal( -drop(res1$output1)[, 6], -c(241.0217, 242.3326, 244.6789, 241.6538, 244.3845, 266.6960), -tolerance = 0.001 -) -#------------------------------------------------------ -# crop = FALSE -suppressWarnings( -exp <- Start(dat = path, - var = 'tas', - sdate = '20000101', - ensemble = indices(1), - time = indices(1), - lat = indices(1:80), # 1:80 = -89.78488:-67.58778 - lon = indices(1:65),# 1:65 = 0.00000:17.7777778 - lat_reorder = Sort(), - lon_reorder = CircularSort(0, 360), - transform = CDORemapper, - transform_extra_cells = 8, - transform_params = list(grid = 'r100x50', method = 'conservative', - crop = F), - transform_vars = c('lat','lon'), - synonims = list(lon = c('lon', 'longitude'), lat = c('lat', 'latitude')), - return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'), - retrieve = F) -) +#--------------------------------- +# lat indices is reversed -func <- function(x) { - return(x) -} -step <- Step(func, target_dims = 'time', output_dims = 'time') -wf <- AddStep(exp, step) - -suppressWarnings( -res_crop_F_1 <- Compute(wf, chunks = list(lon = 2)) -) -suppressWarnings( -res_crop_F_2 <- Compute(wf, chunks = list(ensemble = 1)) -) -suppressWarnings( -res_crop_F_3 <- Compute(wf, chunks = list(lon = 3)) -) - -expect_equal( -as.vector(res1$output1), -as.vector(drop(res_crop_F_1$output1)[1:6, ]) -) -expect_equal( -res_crop_F_1$output1, -res_crop_F_2$output1 -) -expect_equal( -res_crop_F_1$output1, -res_crop_F_3$output1 -) - -#---------------------------------------------- -# crop = TRUE suppressWarnings( exp <- Start(dat = path, var = 'tas', sdate = '20000101', ensemble = indices(1), time = indices(1), - lat = indices(1:80), # 1:80 = -89.78488:-67.58778 + lat = indices(80:1), # 1:80 = -89.78488:-67.58778 lon = indices(1:65),# 1:65 = 0.00000:17.7777778 lat_reorder = Sort(), lon_reorder = CircularSort(0, 360), transform = CDORemapper, transform_extra_cells = 8, - transform_params = list(grid = 'r100x50', method = 'conservative', - crop = T), + transform_params = list(grid = 'r100x50', method = 'conservative'), transform_vars = c('lat','lon'), synonims = list(lon = c('lon', 'longitude'), lat = c('lat', 'latitude')), return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'), @@ -625,26 +557,122 @@ step <- Step(func, target_dims = 'time', output_dims = 'time') wf <- AddStep(exp, step) suppressWarnings( -res_crop_T_1 <- Compute(wf, chunks = list(lon = 2)) +res4 <- Compute(wf, chunks = list(lon = 2)) ) suppressWarnings( -res_crop_T_2 <- Compute(wf, chunks = list(ensemble = 1)) -) -suppressWarnings( -res_crop_T_3 <- Compute(wf, chunks = list(lon = 3)) +res5 <- Compute(wf, chunks = list(lat = 2)) ) expect_equal( -res_crop_F_1$output1, -res_crop_T_1$output1 +res4$output1, +res5$output1 ) expect_equal( -res_crop_T_1$output1, -res_crop_T_2$output1 -) -expect_equal( -res_crop_T_1$output1, -res_crop_T_3$output1 +as.vector(drop(res1$output1)[6:1, ]), +as.vector(drop(res4$output1)) ) +#------------------------------------------------------ +#NOTE_19/01/2022: crop is deprecated +## crop = FALSE +#suppressWarnings( +#exp <- Start(dat = path, +# var = 'tas', +# sdate = '20000101', +# ensemble = indices(1), +# time = indices(1), +# lat = indices(1:80), # 1:80 = -89.78488:-67.58778 +# lon = indices(1:65),# 1:65 = 0.00000:17.7777778 +# lat_reorder = Sort(), +# lon_reorder = CircularSort(0, 360), +# transform = CDORemapper, +# transform_extra_cells = 8, +# transform_params = list(grid = 'r100x50', method = 'conservative', +# crop = F), +# transform_vars = c('lat','lon'), +# synonims = list(lon = c('lon', 'longitude'), lat = c('lat', 'latitude')), +# return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'), +# retrieve = F) +#) +# +#func <- function(x) { +# return(x) +#} +#step <- Step(func, target_dims = 'time', output_dims = 'time') +#wf <- AddStep(exp, step) +# +#suppressWarnings( +#res_crop_F_1 <- Compute(wf, chunks = list(lon = 2)) +#) +#suppressWarnings( +#res_crop_F_2 <- Compute(wf, chunks = list(ensemble = 1)) +#) +#suppressWarnings( +#res_crop_F_3 <- Compute(wf, chunks = list(lon = 3)) +#) +# +#expect_equal( +#as.vector(res1$output1), +#as.vector(drop(res_crop_F_1$output1)[1:6, ]) +#) +#expect_equal( +#res_crop_F_1$output1, +#res_crop_F_2$output1 +#) +#expect_equal( +#res_crop_F_1$output1, +#res_crop_F_3$output1 +#) +# +##---------------------------------------------- +## crop = TRUE +#suppressWarnings( +#exp <- Start(dat = path, +# var = 'tas', +# sdate = '20000101', +# ensemble = indices(1), +# time = indices(1), +# lat = indices(1:80), # 1:80 = -89.78488:-67.58778 +# lon = indices(1:65),# 1:65 = 0.00000:17.7777778 +# lat_reorder = Sort(), +# lon_reorder = CircularSort(0, 360), +# transform = CDORemapper, +# transform_extra_cells = 8, +# transform_params = list(grid = 'r100x50', method = 'conservative', +# crop = T), +# transform_vars = c('lat','lon'), +# synonims = list(lon = c('lon', 'longitude'), lat = c('lat', 'latitude')), +# return_vars = list(lat = 'dat', lon = 'dat', time = 'sdate'), +# retrieve = F) +#) +# +#func <- function(x) { +# return(x) +#} +#step <- Step(func, target_dims = 'time', output_dims = 'time') +#wf <- AddStep(exp, step) +# +#suppressWarnings( +#res_crop_T_1 <- Compute(wf, chunks = list(lon = 2)) +#) +#suppressWarnings( +#res_crop_T_2 <- Compute(wf, chunks = list(ensemble = 1)) +#) +#suppressWarnings( +#res_crop_T_3 <- Compute(wf, chunks = list(lon = 3)) +#) +# +#expect_equal( +#res_crop_F_1$output1, +#res_crop_T_1$output1 +#) +#expect_equal( +#res_crop_T_1$output1, +#res_crop_T_2$output1 +#) +#expect_equal( +#res_crop_T_1$output1, +#res_crop_T_3$output1 +#) +# }) diff --git a/tests/testthat/test-Compute-transform_values.R b/tests/testthat/test-Compute-transform_values.R index 74975f7ca0904b46da0ec1608462894e9268cef2..1029b60fc1b504557d30fb8653d6fe8d639db05a 100644 --- a/tests/testthat/test-Compute-transform_values.R +++ b/tests/testthat/test-Compute-transform_values.R @@ -7,7 +7,7 @@ context("Compute: Transform and chunk values()") ##################################################################### test_that("1. Global", { -skip_on_cran() +#skip_on_cran() lons.min <- 0 lons.max <- 359.9 @@ -290,33 +290,33 @@ func <- function(exp) { step <- Step(func, target_dims = 'sdate', output_dims = 'sdate') wf <- AddStep(exp, step) -#WRONG -#suppressWarnings( -#res_crop_T_1 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 -#) +#WRONG? +suppressWarnings( +res_crop_T_1 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 +) suppressWarnings( res_crop_T_2 <- Compute(wf, chunks = list(ensemble = 1))$output1 ) -#WRONG -#suppressWarnings( -#res_crop_T_3 <- Compute(wf, chunks = list(longitude = 3))$output1 -#) +#WRONG? +suppressWarnings( +res_crop_T_3 <- Compute(wf, chunks = list(longitude = 3))$output1 +) suppressWarnings( res_crop_T_4 <- Compute(wf, chunks = list(latitude = 3))$output1 ) -#expect_equal( -#res1, -#res_crop_T_1 -#) +expect_equal( +res1, +res_crop_T_1 +) expect_equal( res1, res_crop_T_2 ) -#expect_equal( -#res1, -#res_crop_T_3 -#) +expect_equal( +res1, +res_crop_T_3 +) expect_equal( res1, res_crop_T_4 @@ -346,7 +346,7 @@ res_crop_T_4 test_that("2. Regional, no border", { -skip_on_cran() +#skip_on_cran() lons.min <- 10 lons.max <- 20 @@ -532,6 +532,78 @@ res_crop_T_1, res_crop_T_3 ) +# The region borders do not exist in the original grid value. For example, +# the original grid is [longitude = 1296], so 11 and 21 do not exist there +# (but 10 and 20 do, in the cases above) +lons.min <- 11 +lons.max <- 21 +lats.min <- 21 +lats.max <- 41 + +# crop = region +suppressWarnings( +exp <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', + var = 'tas', + sdate = '20000101', #paste0(2000:2001, '0101'), + ensemble = indices(1), #'all', + time = indices(1), + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = CircularSort(0, 360), + transform = CDORemapper, + transform_params = list(grid = 'r100x50', + method = 'con', + crop = c(lons.min, lons.max, lats.min, lats.max)), + transform_vars = c('latitude', 'longitude'), + transform_extra_cells = 8, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('longitude', 'lon')), + return_vars = list(latitude = NULL, #'dat', + longitude = NULL, #'dat', + time = 'sdate'), + retrieve= F) +) +func <- function(exp) { + return(exp) +} +step <- Step(func, + target_dims = 'sdate', output_dims = 'sdate') +wf <- AddStep(exp, step) +suppressWarnings( +res1 <- Compute(wf, chunks = list(latitude = 2, longitude = 2))$output1 +) +suppressWarnings( +res2 <- Compute(wf, chunks = list(ensemble = 1))$output1 +) +suppressWarnings( +res3 <- Compute(wf, chunks = list(latitude = 3))$output1 +) + +expect_equal( +dim(res1), +c(sdate = 1, dat = 1, var = 1, ensemble = 1, time = 1, latitude = 5, longitude = 2) +) +expect_equal( +res1, +res2 +) +expect_equal( +res1, +res3 +) + +expect_equal( +drop(res1)[, 1], +c(285.4820, 282.9362, 282.6088, 287.3716, 285.0194), +tolerance = 0.0001 +) +expect_equal( +drop(res1)[, 2], +c(286.1208, 284.3523, 285.9198, 287.7389, 286.1099), +tolerance = 0.0001 +) + }) ############################################################################ @@ -556,7 +628,7 @@ res_crop_T_3 test_that("3. Regional, at lon border", { -skip_on_cran() +#skip_on_cran() lons.min <- 0 lons.max <- 20 diff --git a/tests/testthat/test-Start-calendar.R b/tests/testthat/test-Start-calendar.R index 8ac0760502fe1924b109f00d14616145606bf2b0..4f58a8744cfd69504eceed8883c5b7f3b6a1758e 100644 --- a/tests/testthat/test-Start-calendar.R +++ b/tests/testthat/test-Start-calendar.R @@ -48,7 +48,7 @@ expect_equal( }) test_that("2. 365_day, daily, unit = 'days since 1984-01-01'", { -path_bcc_csm2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/bcc-csm2-mr/cmip6-dcppA-hindcast_i1p1/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200408/$var$_day_BCC-CSM2-MR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_19800101-19891231.nc' +path_bcc_csm2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/bcc-csm2-mr/cmip6-dcppA-hindcast_i1p1/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20191219/$var$_day_BCC-CSM2-MR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_19800101-19891231.nc' suppressWarnings( data <- Start(dat = path_bcc_csm2, diff --git a/tests/testthat/test-Start-indices_list_vector.R b/tests/testthat/test-Start-indices_list_vector.R index 39ecb24f15edde38460a02d06095e59f612dae19..0ad897a08b2e6b1677b3f85718e061a7284b5443 100644 --- a/tests/testthat/test-Start-indices_list_vector.R +++ b/tests/testthat/test-Start-indices_list_vector.R @@ -153,35 +153,36 @@ exp1 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var retrieve= T) ) -# lat and lon are vectors of indices -suppressWarnings( -exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', - var = 'tas', - sdate = '20000101', - ensemble = indices(1), - time = indices(1), - latitude = 30:1, - latitude_reorder = Sort(), - longitude = 40:1, - longitude_reorder = CircularSort(0, 360), - transform = CDORemapper, - transform_params = list(grid = 'r100x50', - method = 'con', - crop = c(0, 11, -90, -81)), - transform_vars = c('latitude', 'longitude'), - transform_extra_cells = 8, - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('longitude', 'lon')), - return_vars = list(latitude = NULL, - longitude = NULL, - time = 'sdate'), - retrieve= T) -) - -expect_equal( -as.vector(drop(exp1)[, 4:1]), -as.vector(exp2) -) +# This test is not valid because it doesn't make sense to use longitude = 40:1. With the automatic "crop" values, the result is not correct. +## lat and lon are vectors of indices +#suppressWarnings( +#exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc', +# var = 'tas', +# sdate = '20000101', +# ensemble = indices(1), +# time = indices(1), +# latitude = 30:1, +# latitude_reorder = Sort(), +# longitude = 40:1, +# longitude_reorder = CircularSort(0, 360), +# transform = CDORemapper, +# transform_params = list(grid = 'r100x50', +# method = 'con', +# crop = c(0, 11, -90, -81)), +# transform_vars = c('latitude', 'longitude'), +# transform_extra_cells = 8, +# synonims = list(latitude = c('lat', 'latitude'), +# longitude = c('longitude', 'lon')), +# return_vars = list(latitude = NULL, +# longitude = NULL, +# time = 'sdate'), +# retrieve= T) +#) +# +#expect_equal( +#as.vector(drop(exp1)[, 4:1]), +#as.vector(exp2) +#) }) diff --git a/tests/testthat/test-Start-largest_dims_length.R b/tests/testthat/test-Start-largest_dims_length.R index 59b73e89c4a8987d99524d4714e139e98f31cbfc..b448f895b44d150ea0bb519b288a8ee78d4c6ee1 100644 --- a/tests/testthat/test-Start-largest_dims_length.R +++ b/tests/testthat/test-Start-largest_dims_length.R @@ -134,3 +134,167 @@ dat3 <- Start(dataset = repos, ) }) + +test_that("2. inconsistent time length, merge_across_dims = T", { + +path <- paste0('/esarchive/exp/CMIP6/dcppA-hindcast/EC-Earth3/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/', + '$member$/Amon/$var$/gr/v20210309/', + '$var$_Amon_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc') + +suppressWarnings( +data <- Start(dataset = path, + var = 'psl', + sdate = '1970', + lat = indices(1), + lon = indices(1), + fyear = c('197011-197012', '197101-197112'), #2+12 + fmonth = 'all', + member = 'r6i2p1f1', + fmonth_across = 'fyear', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + largest_dims_length = TRUE, #c(fmonth = 12), + synonims = list(fmonth = c('time', 'fmonth'), + lon = c('lon', 'longitude'), + lat = c('lat', 'latitude')), + return_vars = list(lat = 'dataset', lon = 'dataset', + fmonth = 'fyear'), + retrieve = TRUE) +) +expect_equal( +dim(data), +c(dataset = 1, var = 1, sdate = 1, lat = 1, lon = 1, fmonth = 14, member = 1) +) +expect_equal( +as.vector(data)[1:5], +c(101341.03, 100831.62, 99877.38, 101355.11, 101067.74), +tolerance = 0.001 +) + +suppressWarnings( +data2 <- Start(dataset = path, + var = 'psl', + sdate = '1970', + lat = indices(1), + lon = indices(1), + fyear = c('197011-197012', '197101-197112'), #2+12 + fmonth = indices(1:14), + member = 'r6i2p1f1', + fmonth_across = 'fyear', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + largest_dims_length = TRUE, #c(fmonth = 12), + synonims = list(fmonth = c('time', 'fmonth'), + lon = c('lon', 'longitude'), + lat = c('lat', 'latitude')), + return_vars = list(lat = 'dataset', lon = 'dataset', + fmonth = 'fyear'), + retrieve = TRUE) +) + +expect_equal( +as.vector(data), +as.vector(data2) +) + +suppressWarnings( +data3 <- Start(dataset = path, + var = 'psl', + sdate = '1970', + lat = indices(1), + lon = indices(1), + fyear = c('197011-197012', '197101-197112'), #2+12 + fmonth = 'all', + member = 'r6i2p1f1', + fmonth_across = 'fyear', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + largest_dims_length = c(fmonth = 12), + synonims = list(fmonth = c('time', 'fmonth'), + lon = c('lon', 'longitude'), + lat = c('lat', 'latitude')), + return_vars = list(lat = 'dataset', lon = 'dataset', + fmonth = 'fyear'), + retrieve = TRUE) +) + +expect_equal( +as.vector(data), +as.vector(data3) +) + +#-------------------------------- +suppressWarnings( +data4 <- Start(dataset = path, + var = 'psl', + sdate = c('1970', '1971'), + lat = indices(1), + lon = indices(1), + fyear = 'all', + fmonth = 'all', + fyear_depends = 'sdate', + member = 'r6i2p1f1', + fmonth_across = 'fyear', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + largest_dims_length = TRUE, + synonims = list(fmonth = c('time', 'fmonth'), + lon = c('lon', 'longitude'), + lat = c('lat', 'latitude')), + return_vars = list(lat = 'dataset', lon = 'dataset', + fmonth = 'fyear'), + retrieve = TRUE) +) + +expect_equal( +dim(data4), +c(dataset = 1, var = 1, sdate = 2, lat = 1, lon = 1, fmonth = 122, member = 1) +) +expect_equal( +as.vector(drop(data4)[1,1:5]), +c(101341.03, 100831.62, 99877.38, 101355.11, 101067.74), +tolerance = 0.001 +) +expect_equal( +as.vector(drop(data4)[2,10:14]), +c(100515.2, 101354.9, 100657.0, 100349.4, 100199.6), +tolerance = 0.001 +) + +#-------------------------------- +suppressWarnings( +data5 <- Start(dataset = path, + var = 'psl', + sdate = c('1970', '1971'), + lat = indices(1), + lon = indices(1), + fyear = 'all', + fmonth = 'all', + fyear_depends = 'sdate', + member = 'r6i2p1f1', + fmonth_across = 'fyear', + merge_across_dims = TRUE, + merge_across_dims_narm = FALSE, #TRUE, + largest_dims_length = TRUE, + synonims = list(fmonth = c('time', 'fmonth'), + lon = c('lon', 'longitude'), + lat = c('lat', 'latitude')), + return_vars = list(lat = 'dataset', lon = 'dataset', + fmonth = 'fyear'), + retrieve = TRUE) +) + +expect_equal( +dim(data5), +c(dataset = 1, var = 1, sdate = 2, lat = 1, lon = 1, fmonth = 132, member = 1) +) +expect_equal( +which(is.na(drop(data5))), +c(5:24) +) +expect_equal( +as.vector(data4), +as.vector(data5)[-c(5:24)] +) + +}) diff --git a/tests/testthat/test-Start-reorder-lon-transform_-180to180.R b/tests/testthat/test-Start-reorder-lon-transform_-180to180.R index d8b43ee9f59e8a2c500b4841fc67d6993db2b442..4351aa438689ac88f387b95346cd05d891168362 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_-180to180.R +++ b/tests/testthat/test-Start-reorder-lon-transform_-180to180.R @@ -207,7 +207,7 @@ res <- Start(dat = list(list(path=path_exp)), ) expect_equal( range((attr(res, 'Variables')$dat1$longitude)), - c(170, 180), + c(170, 190), tolerance = 0.0001 ) expect_equal( @@ -216,7 +216,7 @@ res <- Start(dat = list(list(path=path_exp)), ) expect_equal( length(attr(res, 'Variables')$dat1$longitude), - 11 + 21 ) }) ############################################## diff --git a/tests/testthat/test-Start-reorder-lon-transform_0to360.R b/tests/testthat/test-Start-reorder-lon-transform_0to360.R index a722bea88af4600845894914bea273a630de9b17..3d2047ea27f31f98e84928e0cbcb7ecfef186df2 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_0to360.R +++ b/tests/testthat/test-Start-reorder-lon-transform_0to360.R @@ -133,7 +133,7 @@ res <- Start(dat = list(list(path=path_exp)), ) expect_equal( range((attr(res, 'Variables')$dat1$longitude)), - c(0, 10), + c(-10, 10), tolerance = 0.0001 ) expect_equal( @@ -142,7 +142,7 @@ res <- Start(dat = list(list(path=path_exp)), ) expect_equal( length(attr(res, 'Variables')$dat1$longitude), - 11 + 21 ) }) @@ -162,7 +162,7 @@ res <- Start(dat = list(list(path=path_exp)), longitude = values(list(lons.min, lons.max)), transform = CDORemapper, transform_params = list(grid ='r360x181', - method = 'con', crop = T), + method = 'con'), transform_vars = c('longitude', 'latitude'), transform_extra_cells = 2, synonims = list(latitude=c('lat','latitude'), @@ -175,7 +175,7 @@ res <- Start(dat = list(list(path=path_exp)), ) expect_equal( range((attr(res, 'Variables')$dat1$longitude)), - c(0, 10), + c(-10, 10), tolerance = 0.0001 ) expect_equal( @@ -184,7 +184,7 @@ res <- Start(dat = list(list(path=path_exp)), ) expect_equal( length(attr(res, 'Variables')$dat1$longitude), - 11 + 21 ) }) ############################################## @@ -229,46 +229,48 @@ res <- Start(dat = list(list(path=path_exp)), ) }) ############################################## -test_that("1-8-2-2-1-1-2-4", { -lons.min <- 350 -lons.max <- 370 -lats.min <- 10 -lats.max <- 20 -suppressWarnings( -res <- Start(dat = list(list(path=path_exp)), - var = variable, - member = indices(1), - sdate = sdate, - time = indices(4), - latitude = values(list(lats.min, lats.max)), - longitude = values(list(lons.min, lons.max)), - transform = CDORemapper, - transform_params = list(grid ='r360x181', - method = 'con', crop = T), - transform_vars = c('longitude', 'latitude'), - transform_extra_cells = 2, - synonims = list(latitude=c('lat','latitude'), - longitude=c('lon','longitude'), - member=c('ensemble','realization')), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = NULL), - retrieve = F) -) - expect_equal( - range((attr(res, 'Variables')$dat1$longitude)), - c(350, 359), - tolerance = 0.0001 - ) - expect_equal( - (attr(res, 'Variables')$dat1$longitude)[1] < (attr(res, 'Variables')$dat1$longitude)[2], - TRUE - ) - expect_equal( - length(attr(res, 'Variables')$dat1$longitude), - 10 - ) -}) +#NOTE_18/01/2022: This Start() call returns ERROR now since "crop" +#is decrepated and automatically assigned as c(lons.min, lons.max, lats.min, lats.max). lons.max cannot excess 360. +#test_that("1-8-2-2-1-1-2-4", { +#lons.min <- 350 +#lons.max <- 370 +#lats.min <- 10 +#lats.max <- 20 +#suppressWarnings( +#res <- Start(dat = list(list(path=path_exp)), +# var = variable, +# member = indices(1), +# sdate = sdate, +# time = indices(4), +# latitude = values(list(lats.min, lats.max)), +# longitude = values(list(lons.min, lons.max)), +# transform = CDORemapper, +# transform_params = list(grid ='r360x181', +# method = 'con', crop = T), +# transform_vars = c('longitude', 'latitude'), +# transform_extra_cells = 2, +# synonims = list(latitude=c('lat','latitude'), +# longitude=c('lon','longitude'), +# member=c('ensemble','realization')), +# return_vars = list(latitude = 'dat', +# longitude = 'dat', +# time = NULL), +# retrieve = F) +#) +# expect_equal( +# range((attr(res, 'Variables')$dat1$longitude)), +# c(350, 359), +# tolerance = 0.0001 +# ) +# expect_equal( +# (attr(res, 'Variables')$dat1$longitude)[1] < (attr(res, 'Variables')$dat1$longitude)[2], +# TRUE +# ) +# expect_equal( +# length(attr(res, 'Variables')$dat1$longitude), +# 10 +# ) +#}) ############################################## ############################################## ############################################## diff --git a/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R b/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R index 2a4f2ca6cd01fb6f464c6d05683d57e3b58eb09d..d4629af99d396bbc65a34824542c1b78181bc9d5 100644 --- a/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R +++ b/tests/testthat/test-Start-reorder-lon-transform_0to360Coarse.R @@ -137,7 +137,7 @@ res <- Start(dat = list(list(path=path_exp)), ) expect_equal( range((attr(res, 'Variables')$dat1$longitude)), - c(0, 10), + c(-10, 10), tolerance = 0.0001 ) expect_equal( @@ -146,7 +146,7 @@ res <- Start(dat = list(list(path=path_exp)), ) expect_equal( length(attr(res, 'Variables')$dat1$longitude), - 11 + 21 ) }) @@ -179,7 +179,7 @@ res <- Start(dat = list(list(path=path_exp)), ) expect_equal( range((attr(res, 'Variables')$dat1$longitude)), - c(0, 10), + c(-10, 10), tolerance = 0.0001 ) expect_equal( @@ -188,7 +188,7 @@ res <- Start(dat = list(list(path=path_exp)), ) expect_equal( length(attr(res, 'Variables')$dat1$longitude), - 11 + 21 ) }) ############################################## @@ -233,46 +233,48 @@ res <- Start(dat = list(list(path=path_exp)), ) }) ############################################## -test_that("1-8-2-2-1-1-2-4", { -lons.min <- 350 -lons.max <- 370 -lats.min <- 10 -lats.max <- 20 -suppressWarnings( -res <- Start(dat = list(list(path=path_exp)), - var = variable, - member = indices(1), - sdate = sdate, - time = indices(4), - latitude = values(list(lats.min, lats.max)), - longitude = values(list(lons.min, lons.max)), - transform = CDORemapper, - transform_params = list(grid ='r360x181', - method = 'con', crop = T), - transform_vars = c('longitude', 'latitude'), - transform_extra_cells = 2, - synonims = list(latitude=c('lat','latitude'), - longitude=c('lon','longitude'), - member=c('ensemble','realization')), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = NULL), - retrieve = F) -) - expect_equal( - range((attr(res, 'Variables')$dat1$longitude)), - c(350, 359), - tolerance = 0.0001 - ) - expect_equal( - (attr(res, 'Variables')$dat1$longitude)[1] < (attr(res, 'Variables')$dat1$longitude)[2], - TRUE - ) - expect_equal( - length(attr(res, 'Variables')$dat1$longitude), - 10 - ) -}) +#NOTE_18/01/2022: This Start() call returns ERROR now since "crop" +#is decrepated and automatically assigned as c(lons.min, lons.max, lats.min, lats.max). lons.max cannot excess 360. +#test_that("1-8-2-2-1-1-2-4", { +#lons.min <- 350 +#lons.max <- 370 +#lats.min <- 10 +#lats.max <- 20 +#suppressWarnings( +#res <- Start(dat = list(list(path=path_exp)), +# var = variable, +# member = indices(1), +# sdate = sdate, +# time = indices(4), +# latitude = values(list(lats.min, lats.max)), +# longitude = values(list(lons.min, lons.max)), +# transform = CDORemapper, +# transform_params = list(grid ='r360x181', +# method = 'con', crop = T), +# transform_vars = c('longitude', 'latitude'), +# transform_extra_cells = 2, +# synonims = list(latitude=c('lat','latitude'), +# longitude=c('lon','longitude'), +# member=c('ensemble','realization')), +# return_vars = list(latitude = 'dat', +# longitude = 'dat', +# time = NULL), +# retrieve = F) +#) +# expect_equal( +# range((attr(res, 'Variables')$dat1$longitude)), +# c(350, 359), +# tolerance = 0.0001 +# ) +# expect_equal( +# (attr(res, 'Variables')$dat1$longitude)[1] < (attr(res, 'Variables')$dat1$longitude)[2], +# TRUE +# ) +# expect_equal( +# length(attr(res, 'Variables')$dat1$longitude), +# 10 +# ) +#}) ############################################## ############################################## ############################################## diff --git a/tests/testthat/test-Start-reorder_all.R b/tests/testthat/test-Start-reorder_all.R new file mode 100644 index 0000000000000000000000000000000000000000..b8279de2fdf1c2d8c7622da51ce19bb239ad9824 --- /dev/null +++ b/tests/testthat/test-Start-reorder_all.R @@ -0,0 +1,146 @@ +# No transform, test reorder function Sort() and CircularSort() with selector 'all'. + + +context("No transform, reorder test: 'all'") + +#--------------------------------------------------------------- +# cdo is used to verify the data values +library(easyNCDF) +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +file <- NcOpen(path) +arr <- NcToArray(file, + dim_indices = list(time = 1, ensemble = 1, + latitude = 1:640, longitude = 1:1296), + vars_to_read = 'tas') + +#lat is from 90 to -90. +#lats <- NcToArray(file, +# dim_indices = list(latitude = 1:640), vars_to_read = 'latitude') +#lon is from 0 to 360. +#lons <- NcToArray(file, +# dim_indices = list(longitude = 1:1296), vars_to_read = 'longitude') +NcClose(file) +#--------------------------------------------------------------- + +path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' + +test_that("1. lat", { +# lon_reorder = CircularSort(0, 360) + +# lat should be ascending +suppressWarnings( +res1 <- Start(dat = path, + var = 'tas', + sdate = '20000101', + ensemble = indices(1), + time = indices(1), + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = CircularSort(0, 360), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('longitude', 'lon')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'sdate'), + retrieve = T) +) + +# Because Sort() is not specified, it follows the original order (descending). +suppressWarnings( +res2 <- Start(dat = path, + var = 'tas', + sdate = '20000101', + ensemble = indices(1), + time = indices(1), + latitude = 'all', +# latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = CircularSort(0, 360), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('longitude', 'lon')), + return_vars = list(latitude = NULL, #'dat', + longitude = NULL, #'dat', + time = 'sdate'), + retrieve = T) +) + +# lat should be descending +suppressWarnings( +res3 <- Start(dat = path, + var = 'tas', + sdate = '20000101', + ensemble = indices(1), + time = indices(1), + latitude = 'all', + latitude_reorder = Sort(decreasing = T), + longitude = 'all', + longitude_reorder = CircularSort(0, 360), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('longitude', 'lon')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'sdate'), + retrieve = T) +) + +# longitude_reorder = CircularSort(-180, 180) +suppressWarnings( +res4 <- Start(dat = path, + var = 'tas', + sdate = '20000101', + ensemble = indices(1), + time = indices(1), + latitude = 'all', + latitude_reorder = Sort(decreasing = T), + longitude = 'all', + longitude_reorder = CircularSort(-180, 180), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('longitude', 'lon')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'sdate'), + retrieve = T) +) + +expect_equal( +all(diff(attr(res1, 'Variables')$dat1$latitude) > 0), +TRUE +) +expect_equal( +as.vector(drop(res1)[640:1, ]), +as.vector(arr), +tolerance = 0.0001 +) +expect_equal( +all(diff(attr(res2, 'Variables')$common$latitude) < 0), +TRUE +) +expect_equal( +as.vector(res2), +as.vector(arr), +tolerance = 0.0001 +) +expect_equal( +all(diff(attr(res3, 'Variables')$dat1$latitude) < 0), +TRUE +) +expect_equal( +as.vector(res3), +as.vector(arr), +tolerance = 0.0001 +) + +expect_equal( +range(attr(res4, 'Variables')$dat1$longitude), +c(-180, 179.7222), +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(res4)[, c(649:1296, 1:648)]), +as.vector(arr), +tolerance = 0.0001 +) + +}) + diff --git a/tests/testthat/test-Start-reorder_indices.R b/tests/testthat/test-Start-reorder_indices.R new file mode 100644 index 0000000000000000000000000000000000000000..b2ca0ace687be4d7ff157cc64e4e357d34b0445e --- /dev/null +++ b/tests/testthat/test-Start-reorder_indices.R @@ -0,0 +1,145 @@ +# No transform, test reorder function Sort() and CircularSort() with selector indices(). + +context("No transform, reorder test: indices()") + +#--------------------------------------------------------------- +# cdo is used to verify the data values +library(easyNCDF) +path <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/tas_f6h/tas_20000101.nc" +file <- NcOpen(path) +arr <- NcToArray(file, + dim_indices = list(time = 1, ensemble = 1, + latitude = 1:640, longitude = 1:1296), + vars_to_read = 'tas') + +#lat is from 90 to -90. +#lats <- NcToArray(file, +# dim_indices = list(latitude = 1:640), vars_to_read = 'latitude') +#lon is from 0 to 360. +#lons <- NcToArray(file, +# dim_indices = list(longitude = 1:1296), vars_to_read = 'longitude') +NcClose(file) +#--------------------------------------------------------------- + +path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' + +test_that("1. lat", { +# lon_reorder = CircularSort(0, 360) + +# lat should be ascending +suppressWarnings( +res1 <- Start(dat = path, + var = 'tas', + sdate = '20000101', + ensemble = indices(1), + time = indices(1), + latitude = indices(1:640), + latitude_reorder = Sort(), + longitude = indices(1:1296), + longitude_reorder = CircularSort(0, 360), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('longitude', 'lon')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'sdate'), + retrieve = T) +) + +# Because Sort() is not specified, it follows the original order (descending). +suppressWarnings( +res2 <- Start(dat = path, + var = 'tas', + sdate = '20000101', + ensemble = indices(1), + time = indices(1), + latitude = indices(1:640), +# latitude_reorder = Sort(), + longitude = indices(1:1296), + longitude_reorder = CircularSort(0, 360), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('longitude', 'lon')), + return_vars = list(latitude = NULL, #'dat', + longitude = NULL, #'dat', + time = 'sdate'), + retrieve = T) +) + +# lat should be descending +suppressWarnings( +res3 <- Start(dat = path, + var = 'tas', + sdate = '20000101', + ensemble = indices(1), + time = indices(1), + latitude = indices(1:640), + latitude_reorder = Sort(decreasing = T), + longitude = indices(1:1296), + longitude_reorder = CircularSort(0, 360), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('longitude', 'lon')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'sdate'), + retrieve = T) +) + +# longitude_reorder = CircularSort(-180, 180) +suppressWarnings( +res4 <- Start(dat = path, + var = 'tas', + sdate = '20000101', + ensemble = indices(1), + time = indices(1), + latitude = indices(1:640), + latitude_reorder = Sort(decreasing = T), + longitude = indices(1:1296), + longitude_reorder = CircularSort(-180, 180), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('longitude', 'lon')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'sdate'), + retrieve = T) +) + +expect_equal( +all(diff(attr(res1, 'Variables')$dat1$latitude) > 0), +TRUE +) +expect_equal( +as.vector(drop(res1)[640:1, ]), +as.vector(arr), +tolerance = 0.0001 +) +expect_equal( +all(diff(attr(res2, 'Variables')$common$latitude) < 0), +TRUE +) +expect_equal( +as.vector(res2), +as.vector(arr), +tolerance = 0.0001 +) +expect_equal( +all(diff(attr(res3, 'Variables')$dat1$latitude) < 0), +TRUE +) +expect_equal( +as.vector(res3), +as.vector(arr), +tolerance = 0.0001 +) + +expect_equal( +range(attr(res4, 'Variables')$dat1$longitude), +c(-180, 179.7222), +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(res4)[, c(649:1296, 1:648)]), +as.vector(arr), +tolerance = 0.0001 +) + +}) + diff --git a/tests/testthat/test-Start-transform-lat-Sort-all.R b/tests/testthat/test-Start-transform-lat-Sort-all.R index 28393699cd5c4e15194963f60135ee57a9e99b57..b41ec0a106162306061b268a0d6e9e66e6a5b60d 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-all.R +++ b/tests/testthat/test-Start-transform-lat-Sort-all.R @@ -20,8 +20,10 @@ lats <- NcToArray(file, lons <- NcToArray(file, dim_indices = list(longitude = 1:1296), vars_to_read = 'longitude') NcClose(file) +suppressWarnings( arr2 <- s2dv::CDORemap(arr, lons = as.vector(lons), lats = as.vector(lats), grid = 'r100x50', method = 'con', crop = FALSE)$data_array +) #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' diff --git a/tests/testthat/test-Start-transform-lat-Sort-indices.R b/tests/testthat/test-Start-transform-lat-Sort-indices.R index 1a1f1ee924f0264c1371cafc33689ed0a29135eb..6c3a7976f50b9e3f68dbc072f0321ac0a6d60fc9 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-indices.R +++ b/tests/testthat/test-Start-transform-lat-Sort-indices.R @@ -25,8 +25,10 @@ lats <- NcToArray(file, lons <- NcToArray(file, dim_indices = list(longitude = 1:1296), vars_to_read = 'longitude') NcClose(file) +suppressWarnings( arr2 <- s2dv::CDORemap(arr, lons = as.vector(lons), lats = as.vector(lats), grid = 'r100x50', method = 'con', crop = FALSE)$data_array +) #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' diff --git a/tests/testthat/test-Start-transform-lat-Sort-values.R b/tests/testthat/test-Start-transform-lat-Sort-values.R index af00f73095106a416186ea4efa781aac0055f292..92490ae6fb03f264c4f04ee0d3a4a503cfa995a6 100644 --- a/tests/testthat/test-Start-transform-lat-Sort-values.R +++ b/tests/testthat/test-Start-transform-lat-Sort-values.R @@ -23,8 +23,10 @@ lats <- NcToArray(file, lons <- NcToArray(file, dim_indices = list(longitude = 1:1296), vars_to_read = 'longitude') NcClose(file) +suppressWarnings( arr2 <- s2dv::CDORemap(arr, lons = as.vector(lons), lats = as.vector(lats), grid = 'r100x50', method = 'con', crop = FALSE)$data_array +) #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' diff --git a/tests/testthat/test-Start-transform-three-selectors.R b/tests/testthat/test-Start-transform-three-selectors.R index 3eb00409ba6f0a65fc91943c026af9f57cacf592..657cca3589991dc3ad485090ffdc66cf4b1581b7 100644 --- a/tests/testthat/test-Start-transform-three-selectors.R +++ b/tests/testthat/test-Start-transform-three-selectors.R @@ -24,8 +24,10 @@ lats <- NcToArray(file, lons <- NcToArray(file, dim_indices = list(longitude = 1:1296), vars_to_read = 'longitude') NcClose(file) +suppressWarnings( arr2 <- s2dv::CDORemap(arr, lons = as.vector(lons), lats = as.vector(lats), grid = 'r100x50', method = 'con', crop = FALSE)$data_array +) #--------------------------------------------------------------- path <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' diff --git a/tests/testthat/test-Start-values_list_vector.R b/tests/testthat/test-Start-values_list_vector.R index d93b2473526a0237e59b6174902ab6761513deab..76c4f91d2758665a32cf49098353bc6c98dcf703 100644 --- a/tests/testthat/test-Start-values_list_vector.R +++ b/tests/testthat/test-Start-values_list_vector.R @@ -170,7 +170,7 @@ exp2 <- Start(dat = '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var time = indices(1), latitude = values(rev(c(81, 84.6, 88.2))), latitude_reorder = Sort(), - longitude = values(rev(c(0, 3.6, 7.2))), + longitude = values(c(0, 3.6, 7.2)), # It can't be reversed; different meanings longitude_reorder = CircularSort(0, 360), transform = CDORemapper, transform_params = list(grid = 'r100x50',