diff --git a/.Rbuildignore b/.Rbuildignore index 90018c782d3ac1075895bbfa778c1248ae5259dd..b320a05571bd3c450b5b1f2e522261ba923fc601 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,7 +6,7 @@ ^README\.md$ #\..*\.RData$ #^vignettes$ -#^tests$ +^tests$ ^inst/doc$ #^inst/doc/*$ #^inst/doc/figures/$ diff --git a/DESCRIPTION b/DESCRIPTION index 62087d9efb74bcdb0a355b312a340d6566ef7eb5..f761503614963190702c9aea93cd264bcfba3368 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,14 +1,14 @@ Package: startR Title: Automatically Retrieve Multidimensional Distributed Data Sets -Version: 2.1.0-5 +Version: 2.2.0 Authors@R: c( - person("BSC-CNS", role = c("aut", "cph")), person("Nicolau", "Manubens", , "nicolau.manubens@bsc.es", role = c("aut")), - person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("ctb", "cre")), - person("Nuria", "Perez-Zanon", , "nuria.perez@bsc.es", role = c("ctb"), comment = c(ORCID = "0000-0001-8568-3071")), + person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("aut", "cre")), + person("Nuria", "Perez-Zanon", , "nuria.perez@bsc.es", role = c("aut"), comment = c(ORCID = "0000-0001-8568-3071")), person("Javier", "Vegas", , "javier.vegas@bsc.es", role = c("ctb")), person("Pierre-Antoine", "Bretonniere", , "pierre-antoine.bretonniere@bsc.es", role = c("ctb")), - person("Roberto", "Serrano", , "rsnotivoli@gmal.com", role = c("ctb"))) + person("Roberto", "Serrano", , "rsnotivoli@gmal.com", role = c("ctb")), + person("BSC-CNS", role = c("aut", "cph"))) Description: Tool to automatically fetch, transform and arrange subsets of multi- dimensional data sets (collections of files) stored in local and/or remote file systems or servers, using multicore capabilities where possible. @@ -19,12 +19,12 @@ Description: Tool to automatically fetch, transform and arrange subsets of the tool suitable for any research field where large multidimensional data sets are involved. Depends: - R (>= 3.6.1) + R (>= 3.6.0) Imports: abind, bigmemory, future, - multiApply (>= 2.1.1), + multiApply (>= 2.1.0), parallel, easyNCDF, s2dv, @@ -34,9 +34,8 @@ Suggests: stats, utils, testthat -License: LGPL-3 +License: Apache License 2.0 URL: https://earth.bsc.es/gitlab/es/startR/ BugReports: https://earth.bsc.es/gitlab/es/startR/-/issues -LazyData: true SystemRequirements: cdo ecFlow RoxygenNote: 7.0.1 diff --git a/NAMESPACE b/NAMESPACE index 1375d83968e48c525cfdd4f0fb8e84f1b2dfbe1e..1434a0f7a06ce718c3ea494c6e749829a173585c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,4 +27,5 @@ importFrom(ClimProjDiags,Subset) importFrom(s2dv,CDORemap) importFrom(stats,na.omit) importFrom(stats,setNames) +importFrom(utils,getFromNamespace) importFrom(utils,str) diff --git a/NEWS.md b/NEWS.md index 8c7a11636c700e54e7c663ffedbc9295be22601b..c542dd1cc14451ce643b361c58583b7744312829 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,21 @@ +# startR v2.2.0 (Release date: 2022-02-11) +- License changes to Apache License 2.0 +- R version dependency changes to >= 3.6.0 +- The dependency on s2dverification changes to s2dv +- The transform parameter "crop" for CDORemapper() is deprecated. It is assigned as a vector of four numbers of the range of latitude and longitude selectors automatically by Start(). +- Chunking the transformed dimensions is available. +- The transform and reorder function works with selector 'all' and indices() now. +- Initialize big.matrix in Start() as NA when parameter "ObjectBigmemory" is specified or when the job is submitted to remote machine by Compute(). +- Bugfix of naming the chunks submitted to remote machine. It prevents job crashes when chunk_1 and chunk_11 run at the same time, for example. +- Adjust time attribute to UTC instead of local time zone, and correct the time calculation according to calendar type and units. +- The default value of Start() parameter "merge_across_dims_narm" is changed to TRUE. +- The metadata of startR object is refined. Different datasets are recorded separately. +- Force return_vars to have value when inner dim has dependency on file dim. +- Correct the wrong names of return_vars. If the names of return_vars are synonyms, change them back to the inner dim names. +- When merging one inner dimension across files, Start() can notice the different inner dimension length of files and merge without extra NAs. Need to specify "largest_dims_length = TRUE". +- Bugfixes for several reshaping problems with different combinations of parameters "merge_across_dims", "merge_across_dims_narm", and "split_multiselected_dims". +- Modify the dimension consistency check to only check margin dimensions. The target dimensions can have different lengths. + # startR v2.1.0 (Release date: 2020-10-30) - Bugfix for metadata retrieving when there are more than one dataset and one of them is missing. - Bugfix for the Start() parameter 'metadata_dims' is set to non-dat dimension. diff --git a/R/CDORemapper.R b/R/CDORemapper.R index 135a8cbd92807bdb391e78ff4625163202eb38b2..67c6b9e93b0b0e1f5c1b73efeeaefca0f21f2038 100644 --- a/R/CDORemapper.R +++ b/R/CDORemapper.R @@ -52,12 +52,13 @@ #' retrieve = FALSE) #' } #'@importFrom s2dv CDORemap +#'@importFrom utils getFromNamespace #'@export CDORemapper <- function(data_array, variables, file_selectors = NULL, crop_domain = NULL, ...) { file_dims <- names(file_selectors) - known_lon_names <- startR:::.KnownLonNames() - known_lat_names <- startR:::.KnownLatNames() + known_lon_names <- getFromNamespace('.KnownLonNames', 'startR')() + known_lat_names <- getFromNamespace('.KnownLatNames', 'startR')() if (!any(known_lon_names %in% names(variables)) || !any(known_lat_names %in% names(variables))) { stop("The longitude and latitude variables must be requested in ", @@ -100,17 +101,15 @@ CDORemapper <- function(data_array, variables, file_selectors = NULL, # Use crop_domain to get 'crop' if (!is.null(crop_domain)) { ## lon - known_lon_names <- startR:::.KnownLonNames() lon_name <- names(crop_domain)[which(names(crop_domain) %in% known_lon_names)] crop_lon <- unlist(crop_domain[[lon_name]]) ## lat - known_lat_names <- startR:::.KnownLatNames() lat_name <- names(crop_domain)[which(names(crop_domain) %in% known_lat_names)] crop_lat <- unlist(crop_domain[[lat_name]]) crop_values <- c(crop_lon, crop_lat) if ('crop' %in% names(extra_params)) { - .warning("Argument 'crop' in 'transform_params' for CDORemapper() is ", + warning("Argument 'crop' in 'transform_params' for CDORemapper() is ", "deprecated. It is automatically assigned as the selected domain ", "in Start() call.") } diff --git a/R/Start.R b/R/Start.R index c42cf62ccc5891719d4c4ac7476e5ae807f884c9..d11c669daeec82be745586fb321c789ec8e03376 100644 --- a/R/Start.R +++ b/R/Start.R @@ -967,10 +967,10 @@ Start <- function(..., # dim = indices/selectors, # the variable 'dat' is mounted with the information of each # dataset. # Take only the datasets for the requested chunk - dats_to_take <- chunk_indices(length(dim_params[[found_pattern_dim]]), - chunks[[found_pattern_dim]]['chunk'], - chunks[[found_pattern_dim]]['n_chunks'], - found_pattern_dim) + dats_to_take <- get_chunk_indices(length(dim_params[[found_pattern_dim]]), + chunks[[found_pattern_dim]]['chunk'], + chunks[[found_pattern_dim]]['n_chunks'], + found_pattern_dim) dim_params[[found_pattern_dim]] <- dim_params[[found_pattern_dim]][dats_to_take] dat <- dim_params[[found_pattern_dim]] #NOTE: This function creates the object 'dat_names' @@ -1396,7 +1396,7 @@ Start <- function(..., # dim = indices/selectors, # Take chunk if needed (only defined dim; undefined dims will be chunked later in # find_ufd_value(). if (chunks[[file_dim]]['n_chunks'] > 1) { - desired_chunk_indices <- chunk_indices( + desired_chunk_indices <- get_chunk_indices( length(dat_selectors[[file_dim]][[j]]), chunks[[file_dim]]['chunk'], chunks[[file_dim]]['n_chunks'], @@ -2398,7 +2398,7 @@ Start <- function(..., # dim = indices/selectors, # If the inner dim lengths differ among files, # need to know each length to create the indices for each file later. - # Record 'iinner_dim_lengths' here for later usage. + # Record 'inner_dim_lengths' here for later usage. inner_dim_lengths <- NULL if (largest_dims_length & !is.null(file_dim)) { # inner_dim_lengths here includes all the files, but we only want @@ -2826,15 +2826,15 @@ Start <- function(..., # dim = indices/selectors, } if (!is.list(sub_array_of_indices)) { sub_array_of_indices <- - sub_array_of_indices[chunk_indices(length(sub_array_of_indices), - chunks[[inner_dim]]["chunk"], - chunks[[inner_dim]]["n_chunks"], - inner_dim)] + sub_array_of_indices[get_chunk_indices(length(sub_array_of_indices), + chunks[[inner_dim]]["chunk"], + chunks[[inner_dim]]["n_chunks"], + inner_dim)] } else { tmp <- - chunk_indices(length(sub_array_of_indices[[1]]:sub_array_of_indices[[2]]), - chunks[[inner_dim]]["chunk"], chunks[[inner_dim]]["n_chunks"], - inner_dim) + get_chunk_indices(length(sub_array_of_indices[[1]]:sub_array_of_indices[[2]]), + chunks[[inner_dim]]["chunk"], chunks[[inner_dim]]["n_chunks"], + inner_dim) vect <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]] sub_array_of_indices[[1]] <- vect[tmp[1]] sub_array_of_indices[[2]] <- vect[tmp[length(tmp)]] @@ -3235,7 +3235,7 @@ Start <- function(..., # dim = indices/selectors, if (is.list(sub_array_of_indices)) { sub_array_of_indices <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]] } - sub_array_of_indices <- sub_array_of_indices[chunk_indices(length(sub_array_of_indices), + sub_array_of_indices <- sub_array_of_indices[get_chunk_indices(length(sub_array_of_indices), chunks[[inner_dim]]['chunk'], chunks[[inner_dim]]['n_chunks'], inner_dim)] @@ -4024,8 +4024,8 @@ Start <- function(..., # dim = indices/selectors, dims_to_crop <- which(!sapply(second_round_indices, is.null)) if (length(dims_to_crop) > 0) { dimnames_to_crop <- names(second_round_indices)[dims_to_crop] - sub_array <- Subset(sub_array, dimnames_to_crop, - second_round_indices[dimnames_to_crop]) + sub_array <- ClimProjDiags::Subset(sub_array, dimnames_to_crop, + second_round_indices[dimnames_to_crop]) } if (debug) { if (all(unlist(store_indices[1:6]) == 1)) { diff --git a/R/zzz.R b/R/zzz.R index 6b41a69343dea60f342f9d80b93388448235347b..0130724599137903e4c47d9fea52504dcd36d2fa 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -163,7 +163,7 @@ look_for_chunks <- function(dim_params, dim_names) { # This is a helper function to compute the chunk indices to take once the total # number of indices for a dimension has been discovered. - chunk_indices <- function(n_indices, chunk, n_chunks, dim_name) { + get_chunk_indices <- function(n_indices, chunk, n_chunks, dim_name) { if (n_chunks > n_indices) { stop("Requested to divide dimension '", dim_name, "' of length ", n_indices, " in ", n_chunks, " chunks, which is not possible.") @@ -408,7 +408,7 @@ find_ufd_value <- function(undefined_file_dims, dat, i, replace_values, var = unique(parsed_values), return_indices = FALSE) # Take chunk if needed - dat_selectors[[u_file_dim]][[j]] <- dat_selectors[[u_file_dim]][[j]][chunk_indices(length(dat_selectors[[u_file_dim]][[j]]), + dat_selectors[[u_file_dim]][[j]] <- dat_selectors[[u_file_dim]][[j]][get_chunk_indices(length(dat_selectors[[u_file_dim]][[j]]), chunks[[u_file_dim]]['chunk'], chunks[[u_file_dim]]['n_chunks'], u_file_dim)] @@ -1109,8 +1109,9 @@ rebuild_array_merge_split <- function(data_array_tmp, indices_chunk, all_split_d tmp <- cumsum(unlist(length_inner_across_dim)) tmp <- c(0, tmp) for (i in 1:length(length_inner_across_dim)) { - data_array_seperate[[i]] <- Subset(data_array_no_split, across_inner_dim, - (tmp[i] + 1):tmp[i + 1]) + data_array_seperate[[i]] <- ClimProjDiags::Subset(data_array_no_split, + across_inner_dim, + (tmp[i] + 1):tmp[i + 1]) } # re-build the array: chunk which_chunk <- as.numeric(names(final_order_list)) @@ -1121,9 +1122,9 @@ rebuild_array_merge_split <- function(data_array_tmp, indices_chunk, all_split_d array_piece <- list() ind_in_array_seperate <- as.list(rep(1, length(data_array_seperate))) for (i in 1:length(final_order_list)) { - array_piece[[i]] <- Subset(data_array_seperate[[which_chunk[i]]], - across_inner_dim, - ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1)) + array_piece[[i]] <- ClimProjDiags::Subset( + data_array_seperate[[which_chunk[i]]], across_inner_dim, + ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1)) ind_in_array_seperate[[which_chunk[i]]] <- ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] } diff --git a/startR-manual.pdf b/startR-manual.pdf index 0e205e687143fcabaa6c6c47c17b14e302458c9c..84ba5184bb5acc5bb7e84cf35887158bc6d1235a 100644 Binary files a/startR-manual.pdf and b/startR-manual.pdf differ