From 93223232166c0fca21bc549d28e9ecac6bd7d23f Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 18 Feb 2021 15:15:21 +0100 Subject: [PATCH 1/5] Initial bunch of step functions for Start --- .Rbuildignore | 2 +- R/Start.R | 161 +++---------------------------------------- R/zzz.R | 184 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 195 insertions(+), 152 deletions(-) create mode 100644 R/zzz.R diff --git a/.Rbuildignore b/.Rbuildignore index eeac7b8..d60cb0b 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,7 +6,7 @@ ^README\.md$ #\..*\.RData$ #^vignettes$ -^tests$ +#^tests$ ^inst/doc$ #^inst/doc/*$ #^inst/doc/figures/$ diff --git a/R/Start.R b/R/Start.R index efa7e0d..e77c297 100644 --- a/R/Start.R +++ b/R/Start.R @@ -836,103 +836,22 @@ Start <- function(..., # dim = indices/selectors, Subset <- ClimProjDiags::Subset dim_params <- list(...) - + .warning(str(dim_params)) # Take *_var parameters apart - var_params_ind <- grep('_var$', names(dim_params)) - var_params <- dim_params[var_params_ind] - # Check all *_var are NULL or vectors of character strings, and - # that they all have a matching dimension param. - i <- 1 - for (var_param in var_params) { - if (!is.character(var_param)) { - stop("All '*_var' parameters must be character strings.") - } else if (!any(grepl(paste0('^', strsplit(names(var_params)[i], - '_var$')[[1]][1], '$'), - names(dim_params)))) { - stop(paste0("All '*_var' parameters must be associated to a dimension parameter. Found parameter '", - names(var_params)[i], "' but no parameter '", - strsplit(names(var_params)[i], '_var$')[[1]][1], "'.")) - } - i <- i + 1 - } - # Make the keys of 'var_params' to be the name of - # the corresponding dimension. - if (length(var_params) < 1) { - var_params <- NULL - } else { - names(var_params) <- gsub('_var$', '', names(var_params)) - } + var_params <- take_var_params(dim_params) # Take *_reorder parameters apart - dim_reorder_params_ind <- grep('_reorder$', names(dim_params)) - dim_reorder_params <- dim_params[dim_reorder_params_ind] - # Make the keys of 'dim_reorder_params' to be the name of - # the corresponding dimension. - if (length(dim_reorder_params) < 1) { - dim_reorder_params <- NULL - } else { - names(dim_reorder_params) <- gsub('_reorder$', '', names(dim_reorder_params)) - } - + dim_reorder_params <- take_var_reorder(dim_params) + # Take *_tolerance parameters apart tolerance_params_ind <- grep('_tolerance$', names(dim_params)) tolerance_params <- dim_params[tolerance_params_ind] # Take *_depends parameters apart - depends_params_ind <- grep('_depends$', names(dim_params)) - depends_params <- dim_params[depends_params_ind] - # Check all *_depends are NULL or vectors of character strings, and - # that they all have a matching dimension param. - i <- 1 - for (depends_param in depends_params) { - if (!is.character(depends_param) || (length(depends_param) > 1)) { - stop("All '*_depends' parameters must be single character strings.") - } else if (!any(grepl(paste0('^', strsplit(names(depends_params)[i], - '_depends$')[[1]][1], '$'), - names(dim_params)))) { - stop(paste0("All '*_depends' parameters must be associated to a dimension parameter. Found parameter '", - names(depends_params)[i], "' but no parameter '", - strsplit(names(depends_params)[i], '_depends$')[[1]][1], "'.")) - } - i <- i + 1 - } - # Make the keys of 'depends_params' to be the name of - # the corresponding dimension. - if (length(depends_params) < 1) { - depends_params <- NULL - } else { - names(depends_params) <- gsub('_depends$', '', names(depends_params)) - } - # Change name to depending_file_dims - depending_file_dims <- depends_params - + depending_file_dims <- take_var_depends(dim_params) + # Take *_across parameters apart - across_params_ind <- grep('_across$', names(dim_params)) - across_params <- dim_params[across_params_ind] - # Check all *_across are NULL or vectors of character strings, and - # that they all have a matching dimension param. - i <- 1 - for (across_param in across_params) { - if (!is.character(across_param) || (length(across_param) > 1)) { - stop("All '*_across' parameters must be single character strings.") - } else if (!any(grepl(paste0('^', strsplit(names(across_params)[i], - '_across$')[[1]][1], '$'), - names(dim_params)))) { - stop(paste0("All '*_across' parameters must be associated to a dimension parameter. Found parameter '", - names(across_params)[i], "' but no parameter '", - strsplit(names(across_params)[i], '_across$')[[1]][1], "'.")) - } - i <- i + 1 - } - # Make the keys of 'across_params' to be the name of - # the corresponding dimension. - if (length(across_params) < 1) { - across_params <- NULL - } else { - names(across_params) <- gsub('_across$', '', names(across_params)) - } - # Change name to inner_dims_across_files - inner_dims_across_files <- across_params + inner_dims_across_files <- take_var_across(dim_params) # Check merge_across_dims if (!is.logical(merge_across_dims)) { @@ -951,71 +870,11 @@ Start <- function(..., # dim = indices/selectors, } # Leave alone the dimension parameters in the variable dim_params - if (length(c(var_params_ind, dim_reorder_params_ind, tolerance_params_ind, - depends_params_ind, across_params_ind)) > 0) { - dim_params <- dim_params[-c(var_params_ind, dim_reorder_params_ind, - tolerance_params_ind, depends_params_ind, - across_params_ind)] - # Reallocating pairs of across file and inner dimensions if they have - # to be merged. They are put one next to the other to ease merge later. - if (merge_across_dims) { - for (inner_dim_across in names(inner_dims_across_files)) { - inner_dim_pos <- which(names(dim_params) == inner_dim_across) - file_dim_pos <- which(names(dim_params) == inner_dims_across_files[[inner_dim_across]]) - new_pos <- inner_dim_pos - if (file_dim_pos < inner_dim_pos) { - new_pos <- new_pos - 1 - } - dim_params_to_move <- dim_params[c(inner_dim_pos, file_dim_pos)] - dim_params <- dim_params[-c(inner_dim_pos, file_dim_pos)] - new_dim_params <- list() - if (new_pos > 1) { - new_dim_params <- c(new_dim_params, dim_params[1:(new_pos - 1)]) - } - new_dim_params <- c(new_dim_params, dim_params_to_move) - if (length(dim_params) >= new_pos) { - new_dim_params <- c(new_dim_params, dim_params[new_pos:length(dim_params)]) - } - dim_params <- new_dim_params - } - } - } + dim_params <- rebuild_dim_params(dim_params, merge_across_dims, + inner_dims_across_files) dim_names <- names(dim_params) - if (is.null(dim_names)) { - stop("At least one pattern dim must be specified.") - } - # Look for chunked dims - chunks <- vector('list', length(dim_names)) - names(chunks) <- dim_names - for (dim_name in dim_names) { - if (!is.null(attr(dim_params[[dim_name]], 'chunk'))) { - chunks[[dim_name]] <- attr(dim_params[[dim_name]], 'chunk') - attributes(dim_params[[dim_name]]) <- attributes(dim_params[[dim_name]])[-which(names(attributes(dim_params[[dim_name]])) == 'chunk')] - } else { - chunks[[dim_name]] <- c(chunk = 1, n_chunks = 1) - } - } - # This is a helper function to compute the chunk indices to take once the total - # number of indices for a dimension has been discovered. - chunk_indices <- function(n_indices, chunk, n_chunks, dim_name) { - if (n_chunks > n_indices) { - stop("Requested to divide dimension '", dim_name, "' of length ", - n_indices, " in ", n_chunks, " chunks, which is not possible.") - } - chunk_sizes <- rep(floor(n_indices / n_chunks), n_chunks) - chunks_to_extend <- n_indices - chunk_sizes[1] * n_chunks - if (chunks_to_extend > 0) { - chunk_sizes[1:chunks_to_extend] <- chunk_sizes[1:chunks_to_extend] + 1 - } - chunk_size <- chunk_sizes[chunk] - offset <- 0 - if (chunk > 1) { - offset <- sum(chunk_sizes[1:(chunk - 1)]) - } - indices <- 1:chunk_sizes[chunk] + offset - array(indices, dim = setNames(length(indices), dim_name)) - } + chunks <- look_for_chunks(dim_params, dim_names) # Check pattern_dims if (is.null(pattern_dims)) { diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 0000000..af04e6e --- /dev/null +++ b/R/zzz.R @@ -0,0 +1,184 @@ +# Take *_var parameters apart +take_var_params <- function(dim_params) { + # Take *_var parameters apart + var_params_ind <- grep('_var$', names(dim_params)) + var_params <- dim_params[var_params_ind] + # Check all *_var are NULL or vectors of character strings, and + # that they all have a matching dimension param. + i <- 1 + for (var_param in var_params) { + if (!is.character(var_param)) { + stop("All '*_var' parameters must be character strings.") + } else if (!any(grepl(paste0('^', strsplit(names(var_params)[i], + '_var$')[[1]][1], '$'), + names(dim_params)))) { + stop(paste0("All '*_var' parameters must be associated to a dimension parameter. Found parameter '", + names(var_params)[i], "' but no parameter '", + strsplit(names(var_params)[i], '_var$')[[1]][1], "'.")) + } + i <- i + 1 + } + # Make the keys of 'var_params' to be the name of + # the corresponding dimension. + if (length(var_params) < 1) { + var_params <- NULL + } else { + names(var_params) <- gsub('_var$', '', names(var_params)) + } + return(var_params) +} + +# Take *_reorder parameters apart +take_var_reorder <- function(dim_params) { + # Take *_reorder parameters apart + dim_reorder_params_ind <- grep('_reorder$', names(dim_params)) + dim_reorder_params <- dim_params[dim_reorder_params_ind] + # Make the keys of 'dim_reorder_params' to be the name of + # the corresponding dimension. + if (length(dim_reorder_params) < 1) { + dim_reorder_params <- NULL + } else { + names(dim_reorder_params) <- gsub('_reorder$', '', names(dim_reorder_params)) + } + return(dim_reorder_params) +} + +# Take *_depends parameters apart +take_var_depends <- function(dim_params) { + depends_params_ind <- grep('_depends$', names(dim_params)) + depends_params <- dim_params[depends_params_ind] + # Check all *_depends are NULL or vectors of character strings, and + # that they all have a matching dimension param. + i <- 1 + for (depends_param in depends_params) { + if (!is.character(depends_param) || (length(depends_param) > 1)) { + stop("All '*_depends' parameters must be single character strings.") + } else if (!any(grepl(paste0('^', strsplit(names(depends_params)[i], + '_depends$')[[1]][1], '$'), + names(dim_params)))) { + stop(paste0("All '*_depends' parameters must be associated to a dimension parameter. Found parameter '", + names(depends_params)[i], "' but no parameter '", + strsplit(names(depends_params)[i], '_depends$')[[1]][1], "'.")) + } + i <- i + 1 + } + # Make the keys of 'depends_params' to be the name of + # the corresponding dimension. + if (length(depends_params) < 1) { + depends_params <- NULL + } else { + names(depends_params) <- gsub('_depends$', '', names(depends_params)) + } + return(depends_params) +} + +# Take *_across parameters apart +take_var_across <- function(dim_params) { + across_params_ind <- grep('_across$', names(dim_params)) + across_params <- dim_params[across_params_ind] + # Check all *_across are NULL or vectors of character strings, and + # that they all have a matching dimension param. + i <- 1 + for (across_param in across_params) { + if (!is.character(across_param) || (length(across_param) > 1)) { + stop("All '*_across' parameters must be single character strings.") + } else if (!any(grepl(paste0('^', strsplit(names(across_params)[i], + '_across$')[[1]][1], '$'), + names(dim_params)))) { + stop(paste0("All '*_across' parameters must be associated to a dimension parameter. Found parameter '", + names(across_params)[i], "' but no parameter '", + strsplit(names(across_params)[i], '_across$')[[1]][1], "'.")) + } + i <- i + 1 + } + # Make the keys of 'across_params' to be the name of + # the corresponding dimension. + if (length(across_params) < 1) { + across_params <- NULL + } else { + names(across_params) <- gsub('_across$', '', names(across_params)) + } + return(across_params) +} + +# Leave alone the dimension parameters in the variable dim_params +rebuild_dim_params <- function(dim_params, merge_across_dims, + inner_dims_across_files) { + var_params_ind <- grep('_var$', names(dim_params)) + dim_reorder_params_ind <- grep('_reorder$', names(dim_params)) + tolerance_params_ind <- grep('_tolerance$', names(dim_params)) + depends_params_ind <- grep('_depends$', names(dim_params)) + across_params_ind <- grep('_across$', names(dim_params)) + # Leave alone the dimension parameters in the variable dim_params + if (length(c(var_params_ind, dim_reorder_params_ind, tolerance_params_ind, + depends_params_ind, across_params_ind)) > 0) { + dim_params <- dim_params[-c(var_params_ind, dim_reorder_params_ind, + tolerance_params_ind, depends_params_ind, + across_params_ind)] + # Reallocating pairs of across file and inner dimensions if they have + # to be merged. They are put one next to the other to ease merge later. + if (merge_across_dims) { + for (inner_dim_across in names(inner_dims_across_files)) { + inner_dim_pos <- which(names(dim_params) == inner_dim_across) + file_dim_pos <- which(names(dim_params) == inner_dims_across_files[[inner_dim_across]]) + new_pos <- inner_dim_pos + if (file_dim_pos < inner_dim_pos) { + new_pos <- new_pos - 1 + } + dim_params_to_move <- dim_params[c(inner_dim_pos, file_dim_pos)] + dim_params <- dim_params[-c(inner_dim_pos, file_dim_pos)] + new_dim_params <- list() + if (new_pos > 1) { + new_dim_params <- c(new_dim_params, dim_params[1:(new_pos - 1)]) + } + new_dim_params <- c(new_dim_params, dim_params_to_move) + if (length(dim_params) >= new_pos) { + new_dim_params <- c(new_dim_params, dim_params[new_pos:length(dim_params)]) + } + dim_params <- new_dim_params + } + } + } + dim_names <- names(dim_params) + if (is.null(dim_names)) { + stop("At least one pattern dim must be specified.") + } + return(dim_params) +} + +# Look for chunked dims +look_for_chunks <- function(dim_params, dim_names) { + chunks <- vector('list', length(dim_names)) + names(chunks) <- dim_names + for (dim_name in dim_names) { + if (!is.null(attr(dim_params[[dim_name]], 'chunk'))) { + chunks[[dim_name]] <- attr(dim_params[[dim_name]], 'chunk') + attributes(dim_params[[dim_name]]) <- attributes(dim_params[[dim_name]])[-which(names(attributes(dim_params[[dim_name]])) == 'chunk')] + } else { + chunks[[dim_name]] <- c(chunk = 1, n_chunks = 1) + } + } + return(chunks) +} + +# This is a helper function to compute the chunk indices to take once the total +# number of indices for a dimension has been discovered. + chunk_indices <- function(n_indices, chunk, n_chunks, dim_name) { + if (n_chunks > n_indices) { + stop("Requested to divide dimension '", dim_name, "' of length ", + n_indices, " in ", n_chunks, " chunks, which is not possible.") + } + chunk_sizes <- rep(floor(n_indices / n_chunks), n_chunks) + chunks_to_extend <- n_indices - chunk_sizes[1] * n_chunks + if (chunks_to_extend > 0) { + chunk_sizes[1:chunks_to_extend] <- chunk_sizes[1:chunks_to_extend] + 1 + } + chunk_size <- chunk_sizes[chunk] + offset <- 0 + if (chunk > 1) { + offset <- sum(chunk_sizes[1:(chunk - 1)]) + } + indices <- 1:chunk_sizes[chunk] + offset + array(indices, dim = setNames(length(indices), dim_name)) + } + -- GitLab From 2a236320e185a96b7242b509132275e7be4e5033 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 18 Feb 2021 15:18:19 +0100 Subject: [PATCH 2/5] include gitlab-ci.yml --- .gitlab-ci.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .gitlab-ci.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..e94ce08 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,11 @@ +stages: + - build +build: + stage: build + script: + - module load R/3.2.0-foss-2015a-bare + - module load CDO/1.9.8-foss-2015a + - R CMD build --resave-data . + - R CMD check --as-cran --no-manual --run-donttest startR_*.tar.gz + - R -e 'covr::package_coverage()' + -- GitLab From 4504266f5e63aae808dec43e7f8a8f5e06ae38c2 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 18 Feb 2021 16:19:02 +0100 Subject: [PATCH 3/5] Using version R 3.6.1 --- .Rbuildignore | 2 +- DESCRIPTION | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index d60cb0b..90018c7 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -12,7 +12,7 @@ #^inst/doc/figures/$ #^inst/doc/usecase/$ #^inst/PlotProfiling\.R$ - +.gitlab-ci.yml # Suggested by http://r-pkgs.had.co.nz/package.html ^.*\.Rproj$ # Automatically added by RStudio, ^\.Rproj\.user$ # used for temporary files. diff --git a/DESCRIPTION b/DESCRIPTION index 6d0d002..dec451d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,7 +19,7 @@ Description: Tool to automatically fetch, transform and arrange subsets of the tool suitable for any research field where large multidimensional data sets are involved. Depends: - R (>= 3.2.0) + R (>= 3.6.1) Imports: abind, bigmemory, -- GitLab From 77ae20352f6d1bb7b6b5da2998a646fa79f587fb Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 18 Feb 2021 16:22:39 +0100 Subject: [PATCH 4/5] change module in gitlab-ci.yml --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e94ce08..8cd9e96 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -3,7 +3,7 @@ stages: build: stage: build script: - - module load R/3.2.0-foss-2015a-bare + - module load R/3.6.1-foss-2015a-bare - module load CDO/1.9.8-foss-2015a - R CMD build --resave-data . - R CMD check --as-cran --no-manual --run-donttest startR_*.tar.gz -- GitLab From 55648e7e766f8bac8c58173d11b93e9d231fe13a Mon Sep 17 00:00:00 2001 From: nperez Date: Mon, 1 Mar 2021 10:39:59 +0100 Subject: [PATCH 5/5] Bump version number and remove params print --- DESCRIPTION | 2 +- R/Start.R | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index dec451d..5a33c3c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: startR Title: Automatically Retrieve Multidimensional Distributed Data Sets -Version: 2.1.0-2 +Version: 2.1.0-3 Authors@R: c( person("BSC-CNS", role = c("aut", "cph")), person("Nicolau", "Manubens", , "nicolau.manubens@bsc.es", role = c("aut")), diff --git a/R/Start.R b/R/Start.R index 344bdc8..33d5809 100644 --- a/R/Start.R +++ b/R/Start.R @@ -836,7 +836,6 @@ Start <- function(..., # dim = indices/selectors, Subset <- ClimProjDiags::Subset dim_params <- list(...) - .warning(str(dim_params)) # Take *_var parameters apart var_params <- take_var_params(dim_params) -- GitLab