# Take *_var parameters apart take_var_params <- function(dim_params) { # Take *_var parameters apart var_params_ind <- grep('_var$', names(dim_params)) var_params <- dim_params[var_params_ind] # Check all *_var are NULL or vectors of character strings, and # that they all have a matching dimension param. i <- 1 for (var_param in var_params) { if (!is.character(var_param)) { stop("All '*_var' parameters must be character strings.") } else if (!any(grepl(paste0('^', strsplit(names(var_params)[i], '_var$')[[1]][1], '$'), names(dim_params)))) { stop(paste0("All '*_var' parameters must be associated to a dimension parameter. Found parameter '", names(var_params)[i], "' but no parameter '", strsplit(names(var_params)[i], '_var$')[[1]][1], "'.")) } i <- i + 1 } # Make the keys of 'var_params' to be the name of # the corresponding dimension. if (length(var_params) < 1) { var_params <- NULL } else { names(var_params) <- gsub('_var$', '', names(var_params)) } return(var_params) } # Take *_reorder parameters apart take_var_reorder <- function(dim_params) { # Take *_reorder parameters apart dim_reorder_params_ind <- grep('_reorder$', names(dim_params)) dim_reorder_params <- dim_params[dim_reorder_params_ind] # Make the keys of 'dim_reorder_params' to be the name of # the corresponding dimension. if (length(dim_reorder_params) < 1) { dim_reorder_params <- NULL } else { names(dim_reorder_params) <- gsub('_reorder$', '', names(dim_reorder_params)) } return(dim_reorder_params) } # Take *_depends parameters apart take_var_depends <- function(dim_params) { depends_params_ind <- grep('_depends$', names(dim_params)) depends_params <- dim_params[depends_params_ind] # Check all *_depends are NULL or vectors of character strings, and # that they all have a matching dimension param. i <- 1 for (depends_param in depends_params) { if (!is.character(depends_param) || (length(depends_param) > 1)) { stop("All '*_depends' parameters must be single character strings.") } else if (!any(grepl(paste0('^', strsplit(names(depends_params)[i], '_depends$')[[1]][1], '$'), names(dim_params)))) { stop(paste0("All '*_depends' parameters must be associated to a dimension parameter. Found parameter '", names(depends_params)[i], "' but no parameter '", strsplit(names(depends_params)[i], '_depends$')[[1]][1], "'.")) } i <- i + 1 } # Make the keys of 'depends_params' to be the name of # the corresponding dimension. if (length(depends_params) < 1) { depends_params <- NULL } else { names(depends_params) <- gsub('_depends$', '', names(depends_params)) } return(depends_params) } # Take *_across parameters apart take_var_across <- function(dim_params) { across_params_ind <- grep('_across$', names(dim_params)) across_params <- dim_params[across_params_ind] # Check all *_across are NULL or vectors of character strings, and # that they all have a matching dimension param. i <- 1 for (across_param in across_params) { if (!is.character(across_param) || (length(across_param) > 1)) { stop("All '*_across' parameters must be single character strings.") } else if (!any(grepl(paste0('^', strsplit(names(across_params)[i], '_across$')[[1]][1], '$'), names(dim_params)))) { stop(paste0("All '*_across' parameters must be associated to a dimension parameter. Found parameter '", names(across_params)[i], "' but no parameter '", strsplit(names(across_params)[i], '_across$')[[1]][1], "'.")) } i <- i + 1 } # Make the keys of 'across_params' to be the name of # the corresponding dimension. if (length(across_params) < 1) { across_params <- NULL } else { names(across_params) <- gsub('_across$', '', names(across_params)) } return(across_params) } # Leave alone the dimension parameters in the variable dim_params rebuild_dim_params <- function(dim_params, merge_across_dims, inner_dims_across_files) { var_params_ind <- grep('_var$', names(dim_params)) dim_reorder_params_ind <- grep('_reorder$', names(dim_params)) tolerance_params_ind <- grep('_tolerance$', names(dim_params)) depends_params_ind <- grep('_depends$', names(dim_params)) across_params_ind <- grep('_across$', names(dim_params)) # Leave alone the dimension parameters in the variable dim_params if (length(c(var_params_ind, dim_reorder_params_ind, tolerance_params_ind, depends_params_ind, across_params_ind)) > 0) { dim_params <- dim_params[-c(var_params_ind, dim_reorder_params_ind, tolerance_params_ind, depends_params_ind, across_params_ind)] # Reallocating pairs of across file and inner dimensions if they have # to be merged. They are put one next to the other to ease merge later. if (merge_across_dims) { for (inner_dim_across in names(inner_dims_across_files)) { inner_dim_pos <- which(names(dim_params) == inner_dim_across) file_dim_pos <- which(names(dim_params) == inner_dims_across_files[[inner_dim_across]]) new_pos <- inner_dim_pos if (file_dim_pos < inner_dim_pos) { new_pos <- new_pos - 1 } dim_params_to_move <- dim_params[c(inner_dim_pos, file_dim_pos)] dim_params <- dim_params[-c(inner_dim_pos, file_dim_pos)] new_dim_params <- list() if (new_pos > 1) { new_dim_params <- c(new_dim_params, dim_params[1:(new_pos - 1)]) } new_dim_params <- c(new_dim_params, dim_params_to_move) if (length(dim_params) >= new_pos) { new_dim_params <- c(new_dim_params, dim_params[new_pos:length(dim_params)]) } dim_params <- new_dim_params } } } dim_names <- names(dim_params) if (is.null(dim_names)) { stop("At least one pattern dim must be specified.") } return(dim_params) } # Look for chunked dims look_for_chunks <- function(dim_params, dim_names) { chunks <- vector('list', length(dim_names)) names(chunks) <- dim_names for (dim_name in dim_names) { if (!is.null(attr(dim_params[[dim_name]], 'chunk'))) { chunks[[dim_name]] <- attr(dim_params[[dim_name]], 'chunk') attributes(dim_params[[dim_name]]) <- attributes(dim_params[[dim_name]])[-which(names(attributes(dim_params[[dim_name]])) == 'chunk')] } else { chunks[[dim_name]] <- c(chunk = 1, n_chunks = 1) } } return(chunks) } # This is a helper function to compute the chunk indices to take once the total # number of indices for a dimension has been discovered. chunk_indices <- function(n_indices, chunk, n_chunks, dim_name) { if (n_chunks > n_indices) { stop("Requested to divide dimension '", dim_name, "' of length ", n_indices, " in ", n_chunks, " chunks, which is not possible.") } chunk_sizes <- rep(floor(n_indices / n_chunks), n_chunks) chunks_to_extend <- n_indices - chunk_sizes[1] * n_chunks if (chunks_to_extend > 0) { chunk_sizes[1:chunks_to_extend] <- chunk_sizes[1:chunks_to_extend] + 1 } chunk_size <- chunk_sizes[chunk] offset <- 0 if (chunk > 1) { offset <- sum(chunk_sizes[1:(chunk - 1)]) } indices <- 1:chunk_sizes[chunk] + offset array(indices, dim = setNames(length(indices), dim_name)) } # Check pattern_dims # Function found_pattern_dims may change pattern_dims in the parent.env found_pattern_dims <- function(pattern_dims, dim_names, var_params, dim_params, dim_reorder_params) { if (is.null(pattern_dims)) { .warning(paste0("Parameter 'pattern_dims' not specified. Taking the first dimension, '", dim_names[1], "' as 'pattern_dims'.")) assign('pattern_dims', dim_names[1], envir = parent.frame()) pattern_dims <- dim_names[1] } else if (is.character(pattern_dims) && (length(pattern_dims) > 0)) { assign('pattern_dims', unique(pattern_dims), envir = parent.frame()) pattern_dims <- unique(pattern_dims) } else { stop("Parameter 'pattern_dims' must be a vector of character strings.") } if (any(names(var_params) %in% pattern_dims)) { stop("'*_var' parameters specified for pattern dimensions. Remove or fix them.") } # Find the pattern dimension with the pattern specifications found_pattern_dim <- NULL for (pattern_dim in pattern_dims) { # Check all specifications in pattern_dim are valid # dat <- datasets <- dim_params[[pattern_dim]] dat <- dim_params[[pattern_dim]] if (is.null(dat) || !(is.character(dat) && all(nchar(dat) > 0)) && !is.list(dat)) { stop(paste0("Parameter '", pattern_dim, "' must be a list of lists with pattern specifications or a vector of character strings.")) } if (!is.null(dim_reorder_params[[pattern_dim]])) { .warning(paste0("A reorder for the selectors of '", pattern_dim, "' has been specified, but it is a pattern dimension and the reorder will be ignored.")) } if (is.list(dat) || any(sapply(dat, is.list))) { if (is.null(found_pattern_dim)) { found_pattern_dim <- pattern_dim } else { stop("Found more than one pattern dim with pattern specifications (list of lists). One and only one pattern dim must contain pattern specifications.") } } } if (is.null(found_pattern_dim)) { .warning(paste0("Could not find any pattern dim with explicit data set descriptions (in the form of list of lists). Taking the first pattern dim, '", pattern_dims[1], "', as dimension with pattern specifications.")) found_pattern_dim <- pattern_dims[1] } return(found_pattern_dim) } # The variable 'dat' is mounted with the information (name, path) of each dataset. # NOTE: This function creates the object 'dat_names' in the parent env. mount_dat <- function(dat, pattern_dim, found_pattern_dim) { # dat_info_names <- c('name', 'path')#, 'nc_var_name', 'suffix', 'var_min', 'var_max', 'dimnames') dat_to_fetch <- c() dat_names <- c() if (!is.list(dat)) { dat <- as.list(dat) } else { if (!any(sapply(dat, is.list))) { dat <- list(dat) } } for (i in 1:length(dat)) { if (is.character(dat[[i]]) && length(dat[[i]]) == 1 && nchar(dat[[i]]) > 0) { if (grepl('^(\\./|\\.\\./|/.*/|~/)', dat[[i]])) { dat[[i]] <- list(path = dat[[i]]) } else { dat[[i]] <- list(name = dat[[i]]) } } else if (!is.list(dat[[i]])) { stop(paste0("Parameter '", pattern_dim, "' is incorrect. It must be a list of lists or character strings.")) } #if (!(all(names(dat[[i]]) %in% dat_info_names))) { # stop("Error: parameter 'dat' is incorrect. There are unrecognized components in the information of some of the datasets. Check 'dat' in ?Load for details.") #} if (!('name' %in% names(dat[[i]]))) { dat[[i]][['name']] <- paste0('dat', i) if (!('path' %in% names(dat[[i]]))) { stop(paste0("Parameter '", found_pattern_dim, "' is incorrect. A 'path' should be provided for each dataset if no 'name' is provided.")) } } else if (!('path' %in% names(dat[[i]]))) { dat_to_fetch <- c(dat_to_fetch, i) } #if ('path' %in% names(dat[[i]])) { # if (!('nc_var_name' %in% names(dat[[i]]))) { # dat[[i]][['nc_var_name']] <- '$var_name$' # } # if (!('suffix' %in% names(dat[[i]]))) { # dat[[i]][['suffix']] <- '' # } # if (!('var_min' %in% names(dat[[i]]))) { # dat[[i]][['var_min']] <- '' # } # if (!('var_max' %in% names(dat[[i]]))) { # dat[[i]][['var_max']] <- '' # } #} dat_names <- c(dat_names, dat[[i]][['name']]) } if ((length(dat_to_fetch) > 0) && (length(dat_to_fetch) < length(dat))) { .warning("'path' has been provided for some datasets. Any information in the configuration file related to these will be ignored.") } if (length(dat_to_fetch) > 0) { stop("Specified only the name for some data sets, but not the path ", "pattern. This option has not been yet implemented.") } assign('dat_names', dat_names, envir = parent.frame()) return(dat) } # Add attributes indicating whether this dimension selector is value or indice add_value_indices_flag <- function(x) { if (is.null(attr(x, 'values')) || is.null(attr(x, 'indices'))) { flag <- (any(x %in% c('all', 'first', 'last')) || is.numeric(unlist(x))) attr(x, 'values') <- !flag attr(x, 'indices') <- flag } return(x) } # Find the value for the undefined selector (i.e., indices()). Use the value from the first # found file. # Note that "dat[[i]][['path']]" in parent env. is changed in this function. find_ufd_value <- function(undefined_file_dims, dat, i, replace_values, first_file, file_dims, path_glob_permissive, depending_file_dims, dat_selectors, selector_checker, chunks) { first_values <- vector('list', length = length(undefined_file_dims)) names(first_values) <- undefined_file_dims found_values <- 0 stop <- FALSE try_dim <- 1 last_success <- 1 while ((found_values < length(undefined_file_dims)) && !stop) { u_file_dim <- undefined_file_dims[try_dim] if (is.null(first_values[[u_file_dim]])) { path_with_globs_and_tag <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values[-which(file_dims == u_file_dim)], allow_undefined_key_vars = TRUE) found_value <- .FindTagValue(path_with_globs_and_tag, first_file, u_file_dim) if (!is.null(found_value)) { found_values <- found_values + 1 last_success <- try_dim first_values[[u_file_dim]] <- found_value replace_values[[u_file_dim]] <- found_value } } try_dim <- (try_dim %% length(undefined_file_dims)) + 1 if (try_dim == last_success) { stop <- TRUE } } if (found_values < length(undefined_file_dims)) { stop(paste0("Path pattern of dataset '", dat[[i]][['name']], "' is too complex. Could not automatically ", "detect values for all non-explicitly defined ", "indices. Check its pattern: ", dat[[i]][['path']])) } ## TODO: Replace ReplaceGlobExpressions by looped call to FindTagValue? As done above ## Maybe it can solve more cases actually. I got warnings in ReplGlobExp with a typical ## cmor case, requesting all members and chunks for fixed var and sdate. Not fixing ## sdate raised 'too complex' error. # Replace shell globs in path pattern and keep the file_dims as tags dat[[i]][['path']] <- .ReplaceGlobExpressions(dat[[i]][['path']], first_file, replace_values, file_dims, dat[[i]][['name']], path_glob_permissive) # Now time to look for the available values for the non # explicitly defined selectors for the file dimensions. #print("H") # Check first the ones that do not depend on others. ufd <- c(undefined_file_dims[which(!(undefined_file_dims %in% names(depending_file_dims)))], undefined_file_dims[which(undefined_file_dims %in% names(depending_file_dims))]) for (u_file_dim in ufd) { replace_values[undefined_file_dims] <- first_values replace_values[[u_file_dim]] <- '*' depended_dim <- NULL depended_dim_values <- NA #NOTE: Here 'selectors' is always 1. Is it supposed to be like this? selectors <- dat_selectors[[u_file_dim]][[1]] if (u_file_dim %in% names(depending_file_dims)) { depended_dim <- depending_file_dims[[u_file_dim]] depended_dim_values <- dat_selectors[[depended_dim]][[1]] dat_selectors[[u_file_dim]] <- vector('list', length = length(depended_dim_values)) names(dat_selectors[[u_file_dim]]) <- depended_dim_values } else { dat_selectors[[u_file_dim]] <- list() } if (u_file_dim %in% unlist(depending_file_dims)) { depending_dims <- names(depending_file_dims)[which(sapply(depending_file_dims, function(x) u_file_dim %in% x))] replace_values[depending_dims] <- rep('*', length(depending_dims)) } for (j in 1:length(depended_dim_values)) { parsed_values <- c() if (!is.null(depended_dim)) { replace_values[[depended_dim]] <- depended_dim_values[j] } path_with_globs <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values) found_files <- Sys.glob(path_with_globs) ## TODO: Enhance this error message, or change by warning. ## Raises if a wrong sdate is specified, for example. if (length(found_files) == 0) { .warning(paste0("Could not find files for any '", u_file_dim, "' for '", depended_dim, "' = '", depended_dim_values[j], "'.")) dat_selectors[[u_file_dim]][[j]] <- NA } else { for (found_file in found_files) { path_with_globs_and_tag <- .ReplaceVariablesInString(dat[[i]][['path']], replace_values[-which(file_dims == u_file_dim)], allow_undefined_key_vars = TRUE) parsed_values <- c(parsed_values, .FindTagValue(path_with_globs_and_tag, found_file, u_file_dim)) } #TODO: selector_checker() doesn't allow selectors to be characters. For selectors # like "member = 'r7i1p1f1", it cannot be defined with values. dat_selectors[[u_file_dim]][[j]] <- selector_checker(selectors = selectors, var = unique(parsed_values), return_indices = FALSE) # Take chunk if needed dat_selectors[[u_file_dim]][[j]] <- dat_selectors[[u_file_dim]][[j]][chunk_indices(length(dat_selectors[[u_file_dim]][[j]]), chunks[[u_file_dim]]['chunk'], chunks[[u_file_dim]]['n_chunks'], u_file_dim)] } } } #NOTE: change 'dat' in parent env. because "dat[[i]][['path']]" is changed. assign('dat', dat, envir = parent.frame()) return(dat_selectors) }