diff --git a/R/Start.R b/R/Start.R index 2854453fc65ee7b90c39a639dda28d66a1a6b9f3..288524a6b56687e1cfadc390aa95ff53aff9e046 100644 --- a/R/Start.R +++ b/R/Start.R @@ -18,7 +18,8 @@ Start <- function(..., # dim = indices/selectors, apply_indices_after_transform = FALSE, pattern_dims = NULL, metadata_dims = NULL, - selector_checker = SelectorChecker, + selector_checker = SelectorChecker, + retrieve = FALSE, num_procs = NULL, silent = FALSE, debug = FALSE) { #, config_file = NULL #dictionary_dim_names = , @@ -461,6 +462,11 @@ Start <- function(..., # dim = indices/selectors, } transform_extra_cells <- round(transform_extra_cells) + # Check retrieve + if (!is.logical(retrieve)) { + stop("Parameter 'retrieve' must be TRUE or FALSE.") + } + # Check num_procs if (!is.null(num_procs)) { if (!is.numeric(num_procs)) { @@ -2251,6 +2257,9 @@ print("-> PROCEEDING TO CROP VARIABLES") new_dims <- .MergeArrayDims(dim(array_of_files_to_load), total_inner_dims) final_dims <- pmax(new_dims[[1]], new_dims[[2]])[dim_names] + # The following several lines will only be run if retrieve = TRUE + if (retrieve) { + ########## CREATING THE SHARED MATRIX AND DISPATCHING WORK PIECES ########### # TODO: try performance of storing all in cols instead of rows # Create the shared memory array, and a pointer to it, to be sent @@ -2553,6 +2562,8 @@ print("-> WORK PIECES BUILT") array_of_not_found_files <- NULL } + } # End if (retrieve) + # Replace the vars and common vars by the transformed vars and common vars for (i in 1:length(dat)) { if (length(names(transformed_vars[[i]])) > 0) { @@ -2572,19 +2583,38 @@ print(str(transformed_vars)) print("-> THE PICKED VARS:") print(str(picked_vars)) } - if (!silent) { - .message("Successfully retrieved data.") - } file_selectors <- NULL for (i in 1:length(dat)) { file_selectors[[dat[[i]][['name']]]] <- dat[[i]][['selectors']][which(names(dat[[i]][['selectors']]) %in% found_file_dims[[i]])] } - list(Data = data_array, - Variables = c(list(common = picked_common_vars), picked_vars), - Files = array_of_files_to_load, - NotFoundFiles = array_of_not_found_files, - FileSelectors = file_selectors) + if (retrieve) { + if (!silent) { + .message("Successfully retrieved data.") + } + structure( + list(Data = data_array, + Variables = c(list(common = picked_common_vars), picked_vars), + Files = array_of_files_to_load, + NotFoundFiles = array_of_not_found_files, + FileSelectors = file_selectors), + class = 'startR_cube' + ) + } else { + if (!silent) { + .message("Successfully discovered data dimensions.") + } + startR_call <- match.call() + startR_call[['retrieve']] <- TRUE + structure( + list(Dimensions = final_dims, + Variables = c(list(common = picked_common_vars), picked_vars), + ExpectedFiles = array_of_files_to_load, + FileSelectors = file_selectors, + StartRCall = startR_call), + class = 'startR_header' + ) + } } # This function is the responsible for loading the data of each work diff --git a/man/Start.Rd b/man/Start.Rd index d56d23f58a6718729f18e08835fffcaaddf12b7a..bda85a8c6b0dc4b4aa8b62fc53d5427e52de06fd 100644 --- a/man/Start.Rd +++ b/man/Start.Rd @@ -38,6 +38,7 @@ Start(..., pattern_dims = NULL, metadata_dims = NULL, selector_checker = SelectorChecker, + retrieve = FALSE, num_procs = NULL, silent = FALSE, debug = FALSE) @@ -344,6 +345,9 @@ It expects to receive a vector of character strings with the names of the file d } \item{selector_checker}{ Function used internaly by \code{Start()} to translate a set of selectors (values for a dimension associated to a coordinate variable) into a set of numeric indices. It takes by default \code{SelectorChecker} and, in principle, it should not be required to change it for customized file formats. The option to replace it is left open for more versatility. See the code of \code{SelectorChecker} for details on the inputs, functioning and outputs of a selector checker. + } + \item{retrieve}{ +Logical value telling whether to retrieve the data defined in the \code{Start} call or to explore only its dimension lengths and names, and the values for the file and inner dimensions. Takes FALSE by default. } \item{num_procs}{ Number of processes to be created for the parallel execution of the retrieval / transformation / arrangement of the multiple involved files in a call to \code{Start()}. Takes by default the number of available cores (as detected by \code{detectCores()} in the package 'future'). @@ -352,13 +356,14 @@ Number of processes to be created for the parallel execution of the retrieval / Boolean flag, whether to display progress messages (FALSE; default) or not (TRUE). } \item{debug}{ -Whether to return detailed messages on the progress and operations in a \code{Start()} call (TRUE) or not (FALSE; default). +Whether to return detailed messages on the progress and operations in a \code{Start} call (TRUE) or not (FALSE; default). } } \details{ Check \href{https://earth.bsc.es/gitlab/es/startR}{the startR website} for more information. } \value{ +If \code{retrieve = TRUE} the involved data is loaded into RAM memory and an object of the class 'startR_cube' with the following components is returned:\cr \item{Data}{ Multidimensional data array with named dimensions, with the data values requested via \dots and other parameters. This array can potentially contain metadata in the attribute 'variables'. } @@ -374,6 +379,22 @@ Array with the same shape as \code{$Files} but with \code{NULL} in the positions \item{FileSelectors}{ Multidimensional character string array with named dimensions, with the same shape as \code{$Files} and \code{$NotFoundFiles}, which contains the components used to build up the paths to each of the files in the data sources. } +If \code{retrieve = FALSE} the involved data is not loaded into RAM memory and an object of the class 'startR_header' with the following components is returned:\cr + \item{Dimensions}{ +Named vector with the dimension lengths and names of the data involved in the \code{Start} call. + } + \item{Variales}{ +Named list of 1 + N components, containing lists of retrieved variables (as requested in \code{return_vars}) common to all the data sources (in the 1st component, \code{$common}), and for each of the N dara sources (named after the source name, as specified in \dots, or, if not specified, \code{$dat1}, \code{$dat2}, ..., \code{$datN}). Each of the variables are contained in a multidimensional array with named dimensions, and potentially with the attribute 'variables' with additional auxiliary data. + } + \item{Files}{ +Multidimensonal character string array with named dimensions. Its dimensions are the file dimensions (as requested in \dots). Each cell in this array contains a path to a file to be retrieved (which may exist or not). + } + \item{FileSelectors}{ +Multidimensional character string array with named dimensions, with the same shape as \code{$Files} and \code{$NotFoundFiles}, which contains the components used to build up the paths to each of the files in the data sources. + } + \item{StartRCall}{ +List of parameters sent to the \code{Start} call, with the parameter \code{retrieve} set to \code{TRUE}. Intended for calling in order to retrieve the associated data a posteriori with a call to \code{do.call}. + } } \author{ History:\cr