diff --git a/.Rbuildignore b/.Rbuildignore index eeac7b8d49f9b316dc301481288b0cef99db5909..90018c782d3ac1075895bbfa778c1248ae5259dd 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,13 +6,13 @@ ^README\.md$ #\..*\.RData$ #^vignettes$ -^tests$ +#^tests$ ^inst/doc$ #^inst/doc/*$ #^inst/doc/figures/$ #^inst/doc/usecase/$ #^inst/PlotProfiling\.R$ - +.gitlab-ci.yml # Suggested by http://r-pkgs.had.co.nz/package.html ^.*\.Rproj$ # Automatically added by RStudio, ^\.Rproj\.user$ # used for temporary files. diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..8cd9e963bf92843ac7de669222149d1c4dbddfc4 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,11 @@ +stages: + - build +build: + stage: build + script: + - module load R/3.6.1-foss-2015a-bare + - module load CDO/1.9.8-foss-2015a + - R CMD build --resave-data . + - R CMD check --as-cran --no-manual --run-donttest startR_*.tar.gz + - R -e 'covr::package_coverage()' + diff --git a/DESCRIPTION b/DESCRIPTION index 6d0d002f3375d2ee18dba3c4ecda8bcfd1e88931..5a33c3cf0b6cd3569354ef2356e7402c92c3a85c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: startR Title: Automatically Retrieve Multidimensional Distributed Data Sets -Version: 2.1.0-2 +Version: 2.1.0-3 Authors@R: c( person("BSC-CNS", role = c("aut", "cph")), person("Nicolau", "Manubens", , "nicolau.manubens@bsc.es", role = c("aut")), @@ -19,7 +19,7 @@ Description: Tool to automatically fetch, transform and arrange subsets of the tool suitable for any research field where large multidimensional data sets are involved. Depends: - R (>= 3.2.0) + R (>= 3.6.1) Imports: abind, bigmemory, diff --git a/R/Start.R b/R/Start.R index 811051166b33b519e0af407ee7a6a7908a5011f5..33d5809a80723800fe0d0931eebf74bd00daac7d 100644 --- a/R/Start.R +++ b/R/Start.R @@ -836,103 +836,21 @@ Start <- function(..., # dim = indices/selectors, Subset <- ClimProjDiags::Subset dim_params <- list(...) - # Take *_var parameters apart - var_params_ind <- grep('_var$', names(dim_params)) - var_params <- dim_params[var_params_ind] - # Check all *_var are NULL or vectors of character strings, and - # that they all have a matching dimension param. - i <- 1 - for (var_param in var_params) { - if (!is.character(var_param)) { - stop("All '*_var' parameters must be character strings.") - } else if (!any(grepl(paste0('^', strsplit(names(var_params)[i], - '_var$')[[1]][1], '$'), - names(dim_params)))) { - stop(paste0("All '*_var' parameters must be associated to a dimension parameter. Found parameter '", - names(var_params)[i], "' but no parameter '", - strsplit(names(var_params)[i], '_var$')[[1]][1], "'.")) - } - i <- i + 1 - } - # Make the keys of 'var_params' to be the name of - # the corresponding dimension. - if (length(var_params) < 1) { - var_params <- NULL - } else { - names(var_params) <- gsub('_var$', '', names(var_params)) - } + var_params <- take_var_params(dim_params) # Take *_reorder parameters apart - dim_reorder_params_ind <- grep('_reorder$', names(dim_params)) - dim_reorder_params <- dim_params[dim_reorder_params_ind] - # Make the keys of 'dim_reorder_params' to be the name of - # the corresponding dimension. - if (length(dim_reorder_params) < 1) { - dim_reorder_params <- NULL - } else { - names(dim_reorder_params) <- gsub('_reorder$', '', names(dim_reorder_params)) - } - + dim_reorder_params <- take_var_reorder(dim_params) + # Take *_tolerance parameters apart tolerance_params_ind <- grep('_tolerance$', names(dim_params)) tolerance_params <- dim_params[tolerance_params_ind] # Take *_depends parameters apart - depends_params_ind <- grep('_depends$', names(dim_params)) - depends_params <- dim_params[depends_params_ind] - # Check all *_depends are NULL or vectors of character strings, and - # that they all have a matching dimension param. - i <- 1 - for (depends_param in depends_params) { - if (!is.character(depends_param) || (length(depends_param) > 1)) { - stop("All '*_depends' parameters must be single character strings.") - } else if (!any(grepl(paste0('^', strsplit(names(depends_params)[i], - '_depends$')[[1]][1], '$'), - names(dim_params)))) { - stop(paste0("All '*_depends' parameters must be associated to a dimension parameter. Found parameter '", - names(depends_params)[i], "' but no parameter '", - strsplit(names(depends_params)[i], '_depends$')[[1]][1], "'.")) - } - i <- i + 1 - } - # Make the keys of 'depends_params' to be the name of - # the corresponding dimension. - if (length(depends_params) < 1) { - depends_params <- NULL - } else { - names(depends_params) <- gsub('_depends$', '', names(depends_params)) - } - # Change name to depending_file_dims - depending_file_dims <- depends_params - + depending_file_dims <- take_var_depends(dim_params) + # Take *_across parameters apart - across_params_ind <- grep('_across$', names(dim_params)) - across_params <- dim_params[across_params_ind] - # Check all *_across are NULL or vectors of character strings, and - # that they all have a matching dimension param. - i <- 1 - for (across_param in across_params) { - if (!is.character(across_param) || (length(across_param) > 1)) { - stop("All '*_across' parameters must be single character strings.") - } else if (!any(grepl(paste0('^', strsplit(names(across_params)[i], - '_across$')[[1]][1], '$'), - names(dim_params)))) { - stop(paste0("All '*_across' parameters must be associated to a dimension parameter. Found parameter '", - names(across_params)[i], "' but no parameter '", - strsplit(names(across_params)[i], '_across$')[[1]][1], "'.")) - } - i <- i + 1 - } - # Make the keys of 'across_params' to be the name of - # the corresponding dimension. - if (length(across_params) < 1) { - across_params <- NULL - } else { - names(across_params) <- gsub('_across$', '', names(across_params)) - } - # Change name to inner_dims_across_files - inner_dims_across_files <- across_params + inner_dims_across_files <- take_var_across(dim_params) # Check merge_across_dims if (!is.logical(merge_across_dims)) { @@ -951,71 +869,11 @@ Start <- function(..., # dim = indices/selectors, } # Leave alone the dimension parameters in the variable dim_params - if (length(c(var_params_ind, dim_reorder_params_ind, tolerance_params_ind, - depends_params_ind, across_params_ind)) > 0) { - dim_params <- dim_params[-c(var_params_ind, dim_reorder_params_ind, - tolerance_params_ind, depends_params_ind, - across_params_ind)] - # Reallocating pairs of across file and inner dimensions if they have - # to be merged. They are put one next to the other to ease merge later. - if (merge_across_dims) { - for (inner_dim_across in names(inner_dims_across_files)) { - inner_dim_pos <- which(names(dim_params) == inner_dim_across) - file_dim_pos <- which(names(dim_params) == inner_dims_across_files[[inner_dim_across]]) - new_pos <- inner_dim_pos - if (file_dim_pos < inner_dim_pos) { - new_pos <- new_pos - 1 - } - dim_params_to_move <- dim_params[c(inner_dim_pos, file_dim_pos)] - dim_params <- dim_params[-c(inner_dim_pos, file_dim_pos)] - new_dim_params <- list() - if (new_pos > 1) { - new_dim_params <- c(new_dim_params, dim_params[1:(new_pos - 1)]) - } - new_dim_params <- c(new_dim_params, dim_params_to_move) - if (length(dim_params) >= new_pos) { - new_dim_params <- c(new_dim_params, dim_params[new_pos:length(dim_params)]) - } - dim_params <- new_dim_params - } - } - } + dim_params <- rebuild_dim_params(dim_params, merge_across_dims, + inner_dims_across_files) dim_names <- names(dim_params) - if (is.null(dim_names)) { - stop("At least one pattern dim must be specified.") - } - # Look for chunked dims - chunks <- vector('list', length(dim_names)) - names(chunks) <- dim_names - for (dim_name in dim_names) { - if (!is.null(attr(dim_params[[dim_name]], 'chunk'))) { - chunks[[dim_name]] <- attr(dim_params[[dim_name]], 'chunk') - attributes(dim_params[[dim_name]]) <- attributes(dim_params[[dim_name]])[-which(names(attributes(dim_params[[dim_name]])) == 'chunk')] - } else { - chunks[[dim_name]] <- c(chunk = 1, n_chunks = 1) - } - } - # This is a helper function to compute the chunk indices to take once the total - # number of indices for a dimension has been discovered. - chunk_indices <- function(n_indices, chunk, n_chunks, dim_name) { - if (n_chunks > n_indices) { - stop("Requested to divide dimension '", dim_name, "' of length ", - n_indices, " in ", n_chunks, " chunks, which is not possible.") - } - chunk_sizes <- rep(floor(n_indices / n_chunks), n_chunks) - chunks_to_extend <- n_indices - chunk_sizes[1] * n_chunks - if (chunks_to_extend > 0) { - chunk_sizes[1:chunks_to_extend] <- chunk_sizes[1:chunks_to_extend] + 1 - } - chunk_size <- chunk_sizes[chunk] - offset <- 0 - if (chunk > 1) { - offset <- sum(chunk_sizes[1:(chunk - 1)]) - } - indices <- 1:chunk_sizes[chunk] + offset - array(indices, dim = setNames(length(indices), dim_name)) - } + chunks <- look_for_chunks(dim_params, dim_names) # Check pattern_dims if (is.null(pattern_dims)) { diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 0000000000000000000000000000000000000000..af04e6e2c60fc8e5a8c4d60d1d19a3afbfd547ab --- /dev/null +++ b/R/zzz.R @@ -0,0 +1,184 @@ +# Take *_var parameters apart +take_var_params <- function(dim_params) { + # Take *_var parameters apart + var_params_ind <- grep('_var$', names(dim_params)) + var_params <- dim_params[var_params_ind] + # Check all *_var are NULL or vectors of character strings, and + # that they all have a matching dimension param. + i <- 1 + for (var_param in var_params) { + if (!is.character(var_param)) { + stop("All '*_var' parameters must be character strings.") + } else if (!any(grepl(paste0('^', strsplit(names(var_params)[i], + '_var$')[[1]][1], '$'), + names(dim_params)))) { + stop(paste0("All '*_var' parameters must be associated to a dimension parameter. Found parameter '", + names(var_params)[i], "' but no parameter '", + strsplit(names(var_params)[i], '_var$')[[1]][1], "'.")) + } + i <- i + 1 + } + # Make the keys of 'var_params' to be the name of + # the corresponding dimension. + if (length(var_params) < 1) { + var_params <- NULL + } else { + names(var_params) <- gsub('_var$', '', names(var_params)) + } + return(var_params) +} + +# Take *_reorder parameters apart +take_var_reorder <- function(dim_params) { + # Take *_reorder parameters apart + dim_reorder_params_ind <- grep('_reorder$', names(dim_params)) + dim_reorder_params <- dim_params[dim_reorder_params_ind] + # Make the keys of 'dim_reorder_params' to be the name of + # the corresponding dimension. + if (length(dim_reorder_params) < 1) { + dim_reorder_params <- NULL + } else { + names(dim_reorder_params) <- gsub('_reorder$', '', names(dim_reorder_params)) + } + return(dim_reorder_params) +} + +# Take *_depends parameters apart +take_var_depends <- function(dim_params) { + depends_params_ind <- grep('_depends$', names(dim_params)) + depends_params <- dim_params[depends_params_ind] + # Check all *_depends are NULL or vectors of character strings, and + # that they all have a matching dimension param. + i <- 1 + for (depends_param in depends_params) { + if (!is.character(depends_param) || (length(depends_param) > 1)) { + stop("All '*_depends' parameters must be single character strings.") + } else if (!any(grepl(paste0('^', strsplit(names(depends_params)[i], + '_depends$')[[1]][1], '$'), + names(dim_params)))) { + stop(paste0("All '*_depends' parameters must be associated to a dimension parameter. Found parameter '", + names(depends_params)[i], "' but no parameter '", + strsplit(names(depends_params)[i], '_depends$')[[1]][1], "'.")) + } + i <- i + 1 + } + # Make the keys of 'depends_params' to be the name of + # the corresponding dimension. + if (length(depends_params) < 1) { + depends_params <- NULL + } else { + names(depends_params) <- gsub('_depends$', '', names(depends_params)) + } + return(depends_params) +} + +# Take *_across parameters apart +take_var_across <- function(dim_params) { + across_params_ind <- grep('_across$', names(dim_params)) + across_params <- dim_params[across_params_ind] + # Check all *_across are NULL or vectors of character strings, and + # that they all have a matching dimension param. + i <- 1 + for (across_param in across_params) { + if (!is.character(across_param) || (length(across_param) > 1)) { + stop("All '*_across' parameters must be single character strings.") + } else if (!any(grepl(paste0('^', strsplit(names(across_params)[i], + '_across$')[[1]][1], '$'), + names(dim_params)))) { + stop(paste0("All '*_across' parameters must be associated to a dimension parameter. Found parameter '", + names(across_params)[i], "' but no parameter '", + strsplit(names(across_params)[i], '_across$')[[1]][1], "'.")) + } + i <- i + 1 + } + # Make the keys of 'across_params' to be the name of + # the corresponding dimension. + if (length(across_params) < 1) { + across_params <- NULL + } else { + names(across_params) <- gsub('_across$', '', names(across_params)) + } + return(across_params) +} + +# Leave alone the dimension parameters in the variable dim_params +rebuild_dim_params <- function(dim_params, merge_across_dims, + inner_dims_across_files) { + var_params_ind <- grep('_var$', names(dim_params)) + dim_reorder_params_ind <- grep('_reorder$', names(dim_params)) + tolerance_params_ind <- grep('_tolerance$', names(dim_params)) + depends_params_ind <- grep('_depends$', names(dim_params)) + across_params_ind <- grep('_across$', names(dim_params)) + # Leave alone the dimension parameters in the variable dim_params + if (length(c(var_params_ind, dim_reorder_params_ind, tolerance_params_ind, + depends_params_ind, across_params_ind)) > 0) { + dim_params <- dim_params[-c(var_params_ind, dim_reorder_params_ind, + tolerance_params_ind, depends_params_ind, + across_params_ind)] + # Reallocating pairs of across file and inner dimensions if they have + # to be merged. They are put one next to the other to ease merge later. + if (merge_across_dims) { + for (inner_dim_across in names(inner_dims_across_files)) { + inner_dim_pos <- which(names(dim_params) == inner_dim_across) + file_dim_pos <- which(names(dim_params) == inner_dims_across_files[[inner_dim_across]]) + new_pos <- inner_dim_pos + if (file_dim_pos < inner_dim_pos) { + new_pos <- new_pos - 1 + } + dim_params_to_move <- dim_params[c(inner_dim_pos, file_dim_pos)] + dim_params <- dim_params[-c(inner_dim_pos, file_dim_pos)] + new_dim_params <- list() + if (new_pos > 1) { + new_dim_params <- c(new_dim_params, dim_params[1:(new_pos - 1)]) + } + new_dim_params <- c(new_dim_params, dim_params_to_move) + if (length(dim_params) >= new_pos) { + new_dim_params <- c(new_dim_params, dim_params[new_pos:length(dim_params)]) + } + dim_params <- new_dim_params + } + } + } + dim_names <- names(dim_params) + if (is.null(dim_names)) { + stop("At least one pattern dim must be specified.") + } + return(dim_params) +} + +# Look for chunked dims +look_for_chunks <- function(dim_params, dim_names) { + chunks <- vector('list', length(dim_names)) + names(chunks) <- dim_names + for (dim_name in dim_names) { + if (!is.null(attr(dim_params[[dim_name]], 'chunk'))) { + chunks[[dim_name]] <- attr(dim_params[[dim_name]], 'chunk') + attributes(dim_params[[dim_name]]) <- attributes(dim_params[[dim_name]])[-which(names(attributes(dim_params[[dim_name]])) == 'chunk')] + } else { + chunks[[dim_name]] <- c(chunk = 1, n_chunks = 1) + } + } + return(chunks) +} + +# This is a helper function to compute the chunk indices to take once the total +# number of indices for a dimension has been discovered. + chunk_indices <- function(n_indices, chunk, n_chunks, dim_name) { + if (n_chunks > n_indices) { + stop("Requested to divide dimension '", dim_name, "' of length ", + n_indices, " in ", n_chunks, " chunks, which is not possible.") + } + chunk_sizes <- rep(floor(n_indices / n_chunks), n_chunks) + chunks_to_extend <- n_indices - chunk_sizes[1] * n_chunks + if (chunks_to_extend > 0) { + chunk_sizes[1:chunks_to_extend] <- chunk_sizes[1:chunks_to_extend] + 1 + } + chunk_size <- chunk_sizes[chunk] + offset <- 0 + if (chunk > 1) { + offset <- sum(chunk_sizes[1:(chunk - 1)]) + } + indices <- 1:chunk_sizes[chunk] + offset + array(indices, dim = setNames(length(indices), dim_name)) + } +