diff --git a/s2dv_review/issue/test.R b/s2dv_review/issue/test.R new file mode 100644 index 0000000000000000000000000000000000000000..d72af314604e3b98e5c108efeb4815c64d2e267b --- /dev/null +++ b/s2dv_review/issue/test.R @@ -0,0 +1 @@ +asd diff --git a/s2dv_review/Composite.R b/s2dv_review/test-compare/Composite/Composite.R similarity index 78% rename from s2dv_review/Composite.R rename to s2dv_review/test-compare/Composite/Composite.R index 9c263bdd052c9c827208f67ba65b9ce2b3e541a4..564f1a2c1c2c1615853a27976404537434e2d032 100644 --- a/s2dv_review/Composite.R +++ b/s2dv_review/test-compare/Composite/Composite.R @@ -1,7 +1,7 @@ Composite <- function(var, occ, lag = 0, eno = FALSE, fileout = NULL) { if ( dim(var)[3] != length(occ) ) { - stop("temporal dimension of var is not equal to length of occ.") + stop("Temporal dimension of var is not equal to length of occ.") } K <- max(occ) composite <- array(dim = c(dim(var)[1 : 2], K)) @@ -23,7 +23,7 @@ Composite <- function(var, occ, lag = 0, eno = FALSE, fileout = NULL) { toberemoved = which(0 > indices | indices > dim(var)[3]) if (length(toberemoved) > 0) { - indices=indices[-toberemoved] + indices = indices[-toberemoved] } if (eno == TRUE) { n_k <- Eno(var[,, indices], posdim = 3) @@ -31,19 +31,19 @@ Composite <- function(var, occ, lag = 0, eno = FALSE, fileout = NULL) { n_k <- length(indices) } if (length(indices) == 1) { - composite[,, k] <- var[,, indices] + composite[, , k] <- var[, , indices] warning(paste("Composite", k, "has length 1 and pvalue is NA.")) } else { - composite[,,k] <- Mean1Dim(var[,, indices], posdim = 3, narm = TRUE) + composite[, , k] <- Mean1Dim(var[, , indices], posdim = 3, narm = TRUE) } - stdv_k <- apply(var[,, indices], c(1, 2), sd, na.rm = TRUE) + stdv_k <- apply(var[, , indices], c(1, 2), sd, na.rm = TRUE) - tvalue <- (mean_tot - composite[,, k]) / + tvalue <- (mean_tot - composite[, , k]) / sqrt(stdv_tot ^ 2 / n_tot + stdv_k ^ 2 / n_k) dof <- (stdv_tot ^ 2 / n_tot + stdv_k ^ 2 / n_k) ^ 2 / ((stdv_tot ^ 2 / n_tot) ^ 2 / (n_tot - 1) + (stdv_k ^ 2 / n_k) ^ 2 / (n_k - 1)) - pvalue[,, k] <- 2 * pt(-abs(tvalue), df = dof) + pvalue[, , k] <- 2 * pt(-abs(tvalue), df = dof) } if ( is.null(fileout) == FALSE ) { diff --git a/s2dv_review/test-compare/Composite/test-Composite.R b/s2dv_review/test-compare/Composite/test-Composite.R new file mode 100644 index 0000000000000000000000000000000000000000..c0ed868ab89ec237fd0dbcf8c6042020f85283b8 --- /dev/null +++ b/s2dv_review/test-compare/Composite/test-Composite.R @@ -0,0 +1,36 @@ +library(s2dverification) +source('/home/Earth/aho/aho-testtest/s2dv_review/Composite.R') + +context("Generic tests") +test_that("Sanity checks", { + + expect_error( + Composite(var = array(1:20, dim = c(2, 5, 2)), c(1, 1, 0)), + "temporal dimension of var is not equal to length of occ.") + + expect_warning( + Composite(var = array(1:40, dim = c(2, 5, 4)), c(1, 2, 2, 2)), + "Composite 1 has length 1 and pvalue is NA.") + + var <- array(rep(c(1, 3, 2, 1, 2), 8), dim = c(x = 2, y = 4, time = 5)) + occ <- c(1, 2, 2, 2, 1) + output <- c(x = 2, y = 4, 2) #dim(asd$composite) + expect_equal( + dim(Composite(var, occ)$composite), + output + ) + output <- c(1.5, 2.0, 2.5, 2.0) + expect_equal( + Composite(var, occ)$composite[1, , 1], + output + ) + + var <- array(rep(c(1, 3, 2, 1, 2), 8), dim = c(x = 2, y = 4, time = 5)) + occ <- c(1, 2, 2, 3, 3) + output <- array(as.numeric(rep(NA, 8)), dim = c(2, 4)) + expect_equal( + Composite(var, occ)$pvalue[, , 1], + output + ) + +}) diff --git a/s2dv_review/test-compare/Eno/Eno_new.R b/s2dv_review/test-compare/Eno/Eno_new.R new file mode 100644 index 0000000000000000000000000000000000000000..02f30fff20d28f4f380bcd0635a4329bd8bf8126 --- /dev/null +++ b/s2dv_review/test-compare/Eno/Eno_new.R @@ -0,0 +1,66 @@ +Eno_new <- function(obs, posdim, na.action = na.pass, ncores = NULL) { + + #Check params: + + if (is.null(obs)) { + stop("Parameter 'obs' cannot be NULL.") + } + if (!is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + + if (is.null(posdim)) { + stop("Parameter 'posdim' cannot be NULL.") + } + + if (posdim %% 1 != 0 && !is.character(posdim)) { +# if (is.null(dim(obs))) { #a vector +# posdim <- 1 +# } else if (length(dim(obs)) == 1) { +# posdim <- 1 +# } else { + stop("Parameter 'posdim' must be an integer or character.") + } + + if (is.integer(posdim) && posdim > length(dim(obs))) { + stop("Parameter 'posdim' excesses the dimension length of parameter 'obs'.") + } + if (is.character(posdim) && !any(posdim %in% names(dim(obs)))) { + stop("Parameter 'posdim' does not match any dimension name of parameter 'obs'.") + } + + +# if (!is.null(dim(obs))) { + effnumobs <- Apply(data = list(obs), + target_dims = posdim, + fun = .Eno, + na.action = na.action, + ncores = ncores)$output1 + #parallel = parallel) + #effnumobs <- adrop(eno, 1) #dont drop length(dim) = 1. Keep the dim same as input. +# } else { +# effnumobs <- .Eno(obs) +# } + return(effnumobs) +} + +.Eno <- function(x, na.action) { + + if (length(sort(x)) > 1) { + n <- length(sort(x)) + if (n == 0) { + n <- 1 + } + a <- acf(x, lag.max = n - 1, plot = FALSE, + na.action = na.action)$acf[2:n, 1, 1] + s <- 0 + for (k in 1:(n - 1)) { + s <- s + (((n - k) / n) * a[k]) + } + eno <- min(n / (1 + (2 * s)), n) + } else { + eno <- NA + } + eno +} + diff --git a/s2dv_review/test-compare/Eno/Eno_old.R b/s2dv_review/test-compare/Eno/Eno_old.R new file mode 100644 index 0000000000000000000000000000000000000000..a5f89c8ef44f01dea060fd39d32cc54fb2e14e53 --- /dev/null +++ b/s2dv_review/test-compare/Eno/Eno_old.R @@ -0,0 +1,66 @@ +Eno_old <- function(obs, posdim) { + dimsvar <- dim(obs) + if (is.null(dimsvar)) { + dimsvar <- length(obs) + } + enlobs <- Enlarge(obs, 10) + outdim <- c(dimsvar, array(1, dim = (10 - length(dimsvar)))) + posaperm <- 1:10 + posaperm[posdim] <- 1 + posaperm[1] <- posdim + enlobs <- aperm(enlobs, posaperm) + dimsaperm <- outdim[posaperm] + # + # Loop on all dimensions to compute effective number of observations + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + enleno <- array(dim = c(1, dimsaperm[2:10])) + for (j2 in 1:dimsaperm[2]) { + for (j3 in 1:dimsaperm[3]) { + for (j4 in 1:dimsaperm[4]) { + for (j5 in 1:dimsaperm[5]) { + for (j6 in 1:dimsaperm[6]) { + for (j7 in 1:dimsaperm[7]) { + for (j8 in 1:dimsaperm[8]) { + for (j9 in 1:dimsaperm[9]) { + for (j10 in 1:dimsaperm[10]) { + tmp <- enlobs[, j2, j3, j4, j5, j6, j7, j8, j9, j10] + if (length(sort(tmp)) > 1 ) { + n <- length(sort(tmp)) + a <- acf(tmp, lag.max = n - 1, plot = FALSE, + na.action = na.pass)$acf[2:n, 1, 1] + s <- 0 + for (k in 1:(n - 1)) { + s <- s + (((n - k) / n) * a[k]) + } + enleno[1, j2, j3, j4, j5, j6, j7, j8, j9, + j10] <- min(n / (1 + (2 * s)), n) + } + } + } + } + } + } + } + } + } + } + # + # Back to the original dimensions + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + #dimsvar <- dimsvar[-posdim] + if (length(dimsvar) == 1) { + dimsvar <- 1 + } else { + dimsvar <- dimsvar[-posdim] + } + effnumobs <- array(dim = dimsvar) + effnumobs[] <- aperm(enleno, posaperm) + # + # Outputs + # ~~~~~~~~~ + # + effnumobs +} + diff --git a/s2dv_review/test-compare/Eno/test-compare.R b/s2dv_review/test-compare/Eno/test-compare.R new file mode 100644 index 0000000000000000000000000000000000000000..f6e5bf66ade3790f6d20b118e2d7119183bea27e --- /dev/null +++ b/s2dv_review/test-compare/Eno/test-compare.R @@ -0,0 +1,23 @@ +library(s2dverification) +library(multiApply) +source('Eno_old.R') +source('Eno_new.R') + +#test1 + var <- array(1:20, dim = c(2, 4, 5)) + var[1,1,1] <- NA + res1 <- Eno_new(obs = var, posdim = 2, na.action = na.pass) + res2 <- Eno_old(obs = var, posdim = 2) + + print("Test 1: ") + print(all.equal(res1, res2, check.attributes = FALSE)) + +#test2 + var <- array(rnorm(80), dim = c(1, 2, 4, 10)) + res1 <- Eno_new(obs = var, posdim = 1) + res2 <- Eno_old(obs = var, posdim = 1) + + print("Test 2: ") + print(all.equal(res1, res2, check.attributes = FALSE)) + +print('All tests pass!') diff --git a/s2dv_review/test-compare/Season/Season_new.R b/s2dv_review/test-compare/Season/Season_new.R new file mode 100644 index 0000000000000000000000000000000000000000..7a0ae331349f6e6d25d61efb8433ed870e784fd9 --- /dev/null +++ b/s2dv_review/test-compare/Season/Season_new.R @@ -0,0 +1,169 @@ +#'Computes Seasonal Means +#' +#'Computes seasonal means (or other operations) on monthly timeseries from n-dimensional arrays with named dimensions +#' +#'@param var a numeric n-dimensional array with named dimensions on monthy frequency. +#'@param posdim a character indicating the name of the dimension or a integer numeric indicating the position along which to compute seasonal means (or other operations). By default, 'time' dimension is expected. +#'@param monini an integer indicating the first month of the time series: 1 to 12. +#'@param moninf an integer indicating the month when to start the seasonal means: 1 to 12. +#'@param monsup an integer indicating the month when to stop the seasonal means: 1 to 12. +#'@param operation a character or function indicating the name of a function to be applied in seasonal basins. By default, means are computed. Other operations can be 'sum' for total precipitation. +#' +#'@return Array with the same dimensions as var except along the posdim dimension whose length corresponds to the number of seasons. Partial seasons are not accounted for. +#' +#'@import multiApply +#'@examples +#'dat <- 1 : (12 * 5 * 2 * 3 * 2) +#'dim(dat) <- c(dat = 1, memb = 3, time = 12 * 5, lon = 2, lat = 2) +#'res <- Season(var = dat, monini = 1, moninf = 1, monsup = 2) +#'dat <- 1 : (12 * 2 * 3) +#'dim(dat) <- c(dat = 2, time = 12, memb = 3) +#'res <- Season(var = dat, monini = 1, moninf =1, monsup = 2) +#'@export +Season_new <- function(var, posdim = 'time', monini, moninf, monsup, + operation = mean) { + # Check var + if (is.null(var)) { + stop("Parameter 'var' cannot be NULL.") + } + if (!is.numeric(var)) { + stop("Parameter 'var' must be a numeric array.") + } + if (is.null(dim(var))) { + dim(var) <- c(length(var)) + if (is.character(posdim)) { + names(dim(var)) <- posdim + } else { + names(dim(var)) <- 'time' + posdim <- 'time' + } + time_dim <- 1 + } else { + if (is.null(names(dim(var)))) { + if (is.numeric(posdim)) { + names(dim(var)) <- paste0("D", 1 : length(dim(var))) + names(dim(var))[posdim] <- 'time' + time_dim <- posdim + posdim <- 'time' + } else { + stop("Parameter 'var' must contain dimesnion names.") + } + } else { + if (is.numeric(posdim)) { + time_dim <- posdim + posdim <- names(dim(var))[posdim] + } else if (is.character(posdim)) { + time_dim <- which(names(dim(var)) == posdim) + } else { + stop("Parameter 'posdim' must be a integer or character", + "indicating the 'time' dimension.") + } + } + } + + dim_names <- names(dim(var)) +# series <- apply(var, margins, .Season, +# monini = monini, moninf = moninf, monsup = monsup, +# operation = operation) + series <- Apply(list(var), + target_dims = posdim, + fun = .Season, + monini = monini, moninf = moninf, monsup = monsup, + operation = operation)$output1 + if (is.null(dim(series))) { + dim(series) <- c(time = length(series)) + } else if (names(dim(series))[1] != "") { #& length(dim(series)) > 1) { + dim(series) <- c(1, dim(series)) + names(dim(series))[1] <- posdim + } else { + names(dim(series))[1] <- posdim + } + if (any(dim_names != names(dim(series)))) { + pos <- match(dim_names, names(dim(series))) + series <- aperm(series, pos) + names(dim(series)) <- dim_names + } + return(series) +} + +.Season <- function(x, monini, moninf, monsup, operation = mean) { + # Checks: + if (!is.numeric(x)) { + stop("Parameter 'x' must be a numeric vector.") + } + if (!is.numeric(monini)) { + stop("Parameter 'monini' must be numeric.") + } + if (length(monini) > 1) { + monini <- monini[1] + warning("Parameter 'monini' has length > 1 and only the first ", + "element will be used.") + } + if (monini %% 1 != 0) { + stop("Parameter 'monini' must be an integer.") + } + if (!is.numeric(moninf)) { + stop("Parameter 'moninf' must be numeric.") + } + if (length(moninf) > 1) { + moninf <- moninf[1] + warning("Parameter 'moninf' has length > 1 and only the first ", + "element will be used.") + } + if (moninf %% 1 != 0) { + stop("Parameter 'moninf' must be an integer.") + } + if (!is.numeric(monsup)) { + stop("Parameter 'monsup' must be numeric.") + } + if (length(monsup) > 1) { + monsup <- monsup[1] + warning("Parameter 'monsup' has length > 1 and only the first ", + "element will be used.") + } + if (monsup %% 1 != 0) { + stop("Parameter 'monsup' must be an integer.") + } + # Check fun operation + if (is.character(operation)) { + fun_name <- operation + err <- try({operation <- get(operation)}, silent = TRUE) + if (!is.function(operation)) { + stop("Could not find the function '", fun_name, "'.") + } + } + if (!is.function(operation)) { + stop("Parameter 'operation' must be a function or a character string ", + "with the name of a function.") + } + + # Correction e.g. 'winter': + while (monsup < moninf) { + monsup <- monsup + 12 + } + # Correction need if monini is not January: + moninf <- moninf - monini + 1 + monsup <- monsup - monini + 1 + moninf <- ifelse(moninf <= 0, moninf + 12, moninf) + monsup <- ifelse(monsup <= 0, monsup + 12, monsup) + + #### Create position index: + # Basic index: + pos <- moninf : monsup + # Extended index for all period: + if (length(x) > pos[length(pos)]) { + pos2 <- lapply(pos, function(y) {seq(y, length(x), 12)}) + } else { + pos2 <- pos + } + # Correct if the final season is not complete: + maxyear <- min(unlist(lapply(pos2, length))) + pos2 <- lapply(pos2, function(y) {y[1 : maxyear]}) + # Convert to array: + pos2 <- unlist(pos2) + dim(pos2) <- c(year = maxyear, month = length(pos2)/maxyear) + + timeseries <- apply(pos2, 1, function(y) {operation(x[y])}) + return(timeseries) +} + diff --git a/s2dv_review/test-compare/Season/Season_old.R b/s2dv_review/test-compare/Season/Season_old.R new file mode 100644 index 0000000000000000000000000000000000000000..52e1861068ad538a44c95210793dc67ad86913f5 --- /dev/null +++ b/s2dv_review/test-compare/Season/Season_old.R @@ -0,0 +1,58 @@ +Season_old <- function(var, posdim = 4, monini, moninf, monsup) { + while (monsup < moninf) { + monsup <- monsup + 12 + } + # + # Enlarge the size of var to 10 + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + dimsvar <- dim(var) + if (is.null(dimsvar)) { + dimsvar <- length(var) + } + ntime <- dimsvar[posdim] + enlvar <- Enlarge(var, 10) + outdim <- c(dimsvar, array(1, dim = (10 - length(dimsvar)))) + u <- IniListDims(outdim, 10) + v <- IniListDims(outdim, 10) + # + # Compute the seasonal means + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + ind <- 1:ntime + months <- ((ind - 1) + monini - 1) %% 12 + 1 + years <- ((ind - 1) + monini - 1) %/% 12 + + for (jmon in moninf:monsup) { + u[[posdim]] <- ind[which(months == ((jmon - 1) %% 12 + 1))] + ind0 <- u[[posdim]][1] + indf <- u[[posdim]][length(u[[posdim]])] + if (indf > (ntime - (monsup - jmon))) { + u[[posdim]] <- u[[posdim]][-which(u[[posdim]] == indf)] + } + if (ind0 < (jmon - moninf + 1)) { + u[[posdim]] <- u[[posdim]][-which(u[[posdim]] == ind0)] + } + if (jmon == moninf) { + nseas <- length(u[[posdim]]) + dimsvar[posdim] <- nseas + outdim[posdim] <- nseas + enlvarout <- array(0, dim = outdim) + } + v[[posdim]] <- 1:nseas + enlvarout[v[[1]], v[[2]], v[[3]], v[[4]], v[[5]], v[[6]], v[[7]], v[[8]], + v[[9]], v[[10]]] <- enlvarout[v[[1]], v[[2]], v[[3]], v[[4]], + v[[5]], v[[6]], v[[7]], v[[8]], + v[[9]], v[[10]]] + enlvar[u[[1]], + u[[2]], u[[3]], u[[4]], u[[5]], u[[6]], + u[[7]], u[[8]], u[[9]], u[[10]]] + } + varout <- array(dim = dimsvar) + varout[] <- enlvarout + varout <- varout / (monsup - moninf + 1) + # + # Outputs + # ~~~~~~~~~ + # + varout +} diff --git a/s2dv_review/test-compare/Season/test-compare.R b/s2dv_review/test-compare/Season/test-compare.R new file mode 100644 index 0000000000000000000000000000000000000000..c089a653305846fd374b8a38aab3466036753bdc --- /dev/null +++ b/s2dv_review/test-compare/Season/test-compare.R @@ -0,0 +1,45 @@ +library(s2dverification) +library(multiApply) +source('Season_old.R') +source('Season_new.R') + +#test1 + var <- array(1 : 20, dim = c(2, 4, 5)) + res1 <- Season_new(var, posdim = 3, monini = 1, moninf = 1, monsup = 2) + res2 <- Season_old(var, posdim = 3, monini = 1, moninf = 1, monsup = 2) + + print("Test 1: ") + print(all.equal(res1, res2, check.attributes = FALSE)) + +#test2 + names(dim(var)) <- c('x', 'y', 'time') + res1 <- Season_new(var, monini = 1, moninf = 1, monsup = 2) + res2 <- Season_old(var, posdim = 3, monini = 1, moninf = 1, monsup = 2) + + print("Test 2: ") + print(all.equal(res1, res2, check.attributes = FALSE)) + +#test3 + res1 <- Season_new(var, posdim = 1, monini = 1, moninf = 1, monsup = 2) + res2 <- Season_old(var, posdim = 1, monini = 1, moninf = 1, monsup = 2) + + print("Test 3: ") + print(all.equal(res1, res2, check.attributes = FALSE)) + +#test4 + var <- 1 : (12 * 2 * 3) + dim(var) <- c(dat = 2, time = 12, sdate = 3) + res1 <- Season_new(var, posdim = 2, monini = 1, moninf = 1, monsup = 2) + res2 <- Season_old(var, posdim = 2, monini = 1, moninf = 1, monsup = 2) + + print("Test 4: ") + print(all.equal(res1, res2, check.attributes = FALSE)) + +#test5 + res1 <- Season_new(var, posdim = 2, monini = 10, moninf = 2, monsup = 4) + res2<- Season_old(var, posdim = 2, monini = 10, moninf = 2, monsup = 4) + + print("Test 5: ") + print(all.equal(res1, res2, check.attributes = FALSE)) + +print('All tests pass!') diff --git a/startR/Start_test.R b/startR/Start_test.R new file mode 100644 index 0000000000000000000000000000000000000000..9ba28ba4e97c2f6662a40e425c0eb7be707e3f7e --- /dev/null +++ b/startR/Start_test.R @@ -0,0 +1,3529 @@ + library(startR) +.ReplaceVariablesInString <- function(string, replace_values, allow_undefined_key_vars = FALSE) { + # This function replaces all the occurrences of a variable in a string by + # their corresponding string stored in the replace_values. + if (length(strsplit(string, "\\$")[[1]]) > 1) { + parts <- strsplit(string, "\\$")[[1]] + output <- "" + i <- 0 + for (part in parts) { + if (i %% 2 == 0) { + output <- paste(output, part, sep = "") + } else { + if (part %in% names(replace_values)) { + output <- paste(output, .ReplaceVariablesInString(replace_values[[part]], replace_values, allow_undefined_key_vars), sep = "") + } else if (allow_undefined_key_vars) { + output <- paste0(output, "$", part, "$") + } else { + stop(paste('Error: The variable $', part, '$ was not defined in the configuration file.', sep = '')) + } + } + i <- i + 1 + } + output + } else { + string + } +} + +.FindTagValue <- function(path_with_globs_and_tag, actual_path, tag) { + if (!all(sapply(c(path_with_globs_and_tag, actual_path, tag), is.character))) { + stop("All 'path_with_globs_and_tag', 'actual_path' and 'tag' must be character strings.") + } + + if (grepl('$', tag, fixed = TRUE)) { + stop("The provided 'tag' must not contain '$' symbols.") + } + full_tag <- paste0('$', tag, '$') + + if (!grepl(full_tag, path_with_globs_and_tag, fixed = TRUE)) { + stop("The provided 'path_with_globs_and_tag' must contain the tag in 'tag' surrounded by '$' symbols.") + } + + parts <- strsplit(path_with_globs_and_tag, full_tag, fixed = TRUE)[[1]] + if (length(parts) == 1) { + parts <- c(parts, '') + } + parts[1] <- paste0('^', parts[1]) + parts[length(parts)] <- paste0(parts[length(parts)], '$') + + # Group the parts in 2 groups, in a way that both groups have a number + # of characters as similar as possible. + part_lengths <- sapply(parts, nchar) + group_len_diffs <- sapply(1:(length(parts) - 1), + function(x) { + sum(part_lengths[(x + 1):length(parts)]) - sum(part_lengths[1:x]) + } + ) + clp <- chosen_left_part <- which.min(group_len_diffs)[1] + + left_expr <- paste(parts[1:clp], collapse = full_tag) + left_expr <- gsub('?', '.', left_expr, fixed = TRUE) + # The .*? will force lazy evaluation (find the shortest match from the + # beginning of the actual_path). + left_expr <- gsub('*', '.*?', left_expr, fixed = TRUE) + left_expr <- gsub(full_tag, '.*?', left_expr, fixed = TRUE) + left_match <- regexec(left_expr, actual_path)[[1]] + if (left_match < 0) { + stop("Unexpected error in .FindTagValue.") + } + + right_expr <- paste(parts[(clp + 1):(length(parts))], collapse = full_tag) + right_expr <- gsub('?', '.', right_expr, fixed = TRUE) + # For lazy evaulation to work, pattern and string have to be reversed. + right_expr <- gsub('*', '.*?', right_expr, fixed = TRUE) + right_expr <- gsub(full_tag, '.*?', right_expr, fixed = TRUE) + right_expr <- gsub('$', '^', right_expr, fixed = TRUE) + rev_str <- function(s) { + paste(rev(strsplit(s, NULL)[[1]]), collapse = '') + } + right_expr <- rev_str(right_expr) + right_expr <- gsub('?*.', '.*?', right_expr, fixed = TRUE) + right_match <- regexec(right_expr, rev_str(actual_path))[[1]] + if (right_match < 0) { + stop("Unexpected error in .FindTagValue.") + } + right_match[] <- nchar(actual_path) - + (right_match[] + attr(right_match, 'match.length') - 1) + 1 + + if ((left_match + attr(left_match, 'match.length')) > + (right_match - 1)) { + NULL + } else { + substr(actual_path, left_match + attr(left_match, 'match.length'), + right_match - 1) + } +} +.ReplaceGlobExpressions <- function(path_with_globs, actual_path, + replace_values, tags_to_keep, + dataset_name, permissive) { + # The goal of this function is to replace the shell globbing expressions in + # a path pattern (that may contain shell globbing expressions and Load() + # tags) by the corresponding part of the real existing path. + # What is done actually is to replace all the values of the tags in the + # actual path by the corresponding $TAG$ + # + # It takes mainly two inputs. The path with expressions and tags, e.g.: + # /data/experiments/*/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_*$START_DATE$*.nc + # and a complete known path to one of the matching files, e.g.: + # /data/experiments/ecearth/i00k/tos/tos_fc0-1_19901101_199011-199110.nc + # and it returns the path pattern but without shell globbing expressions: + # /data/experiments/ecearth/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_fc0-1_$START_DATE$_199011-199110.nc + # + # To do that, it needs also as inputs the list of replace values (the + # association of each tag to their value). + # + # All the tags not present in the parameter tags_to_keep will be repalced. + # + # Not all cases can be resolved with the implemented algorithm. In an + # unsolvable case a warning is given and one possible guess is returned. + # + # In some cases it is interesting to replace only the expressions in the + # path to the file, but not the ones in the file name itself. To keep the + # expressions in the file name, the parameter permissive can be set to + # TRUE. To replace all the expressions it can be set to FALSE. + + # Tests + #a <- "/esarchive/exp/ecearth/a13c/3hourly/$var$_*/$var$_*-LR_historical_r1i1p1f1_gr_$chunk$.nc" + #b <- "/esarchive/exp/ecearth/a13c/3hourly/psl_f6h/psl_E3hrPt_EC-Earth3-LR_historical_r1i1p1f1_gr_195001010000-195001312100.nc" + #c <- list(dat = 'dat1', var = 'psl', chunk = '195001010000-195001312100') + #d <- c('dat', 'var', 'chunk') + #e <- 'dat1' + #f <- FALSE #TRUE/0/1/2/3 + #r <- .ReplaceGlobExpressions(a, b, c, d, e, f) + + clean <- function(x) { + if (nchar(x) > 0) { + x <- gsub('\\\\', '', x) + x <- gsub('\\^', '', x) + x <- gsub('\\$', '', x) + x <- unname(sapply(strsplit(x, '[',fixed = TRUE)[[1]], function(y) gsub('.*]', '.', y))) + do.call(paste0, as.list(x)) + } else { + x + } + } + + strReverse <- function(x) sapply(lapply(strsplit(x, NULL), rev), paste, collapse = "") + + if (permissive == 0) { + permissive <- FALSE + } else { + if (permissive == TRUE) { + permissive_levels <- 1 + } else { + permissive_levels <- round(permissive[1]) + permissive <- TRUE + } + } + + if (permissive) { + actual_path_chunks <- strsplit(actual_path, '/')[[1]] + if (permissive_levels >= length(actual_path_chunks)) { + stop("Error: Provided levels out of scope in parameter 'permissive'.") + } + permissive_levels <- 1:permissive_levels + permissive_levels <- length(actual_path_chunks) - (rev(permissive_levels) - 1) + actual_path <- paste(actual_path_chunks[-permissive_levels], collapse = '/') + file_name <- paste(actual_path_chunks[permissive_levels], collapse = '/') + if (length(actual_path_chunks) > 1) { + file_name <- paste0('/', file_name) + } + path_with_globs_chunks <- strsplit(path_with_globs, '/')[[1]] + path_with_globs <- paste(path_with_globs_chunks[-permissive_levels], + collapse = '/') + path_with_globs_no_tags <- .ReplaceVariablesInString(path_with_globs, replace_values) + file_name_with_globs <- paste(path_with_globs_chunks[permissive_levels], collapse = '/') + if (length(path_with_globs_chunks) > 1) { + file_name_with_globs <- paste0('/', file_name_with_globs) + } + right_known <- head(strsplit(file_name_with_globs, '*', fixed = TRUE)[[1]], 1) + right_known_no_tags <- .ReplaceVariablesInString(right_known, replace_values) + path_with_globs_no_tags_rx <- utils::glob2rx(paste0(path_with_globs_no_tags, right_known_no_tags)) + match <- regexpr(gsub('$', '', path_with_globs_no_tags_rx, fixed = TRUE), paste0(actual_path, file_name)) + if (match != 1) { + stop("Incorrect parameters to replace glob expressions. The path with expressions does not match the actual path.") + } + #if (attr(match, 'match.length') - nchar(right_known_no_tags) < nchar(actual_path)) { + # path_with_globs_no_tags <- paste0(path_with_globs_no_tags, right_known_no_tags, '*') + # file_name_with_globs <- sub(right_known, '/*', file_name_with_globs) + #} + } + path_with_globs_rx <- utils::glob2rx(path_with_globs) + values_to_replace <- c() + tags_to_replace_starts <- c() + tags_to_replace_ends <- c() + give_warning <- FALSE + for (tag in tags_to_keep) { + matches <- gregexpr(paste0('$', tag, '$'), path_with_globs_rx, fixed = TRUE)[[1]] + lengths <- attr(matches, 'match.length') + if (!(length(matches) == 1 && matches[1] == -1)) { + for (i in 1:length(matches)) { + left <- NULL + if (matches[i] > 1) { + left <- .ReplaceVariablesInString(substr(path_with_globs_rx, 1, matches[i] - 1), replace_values) + left_known <- strReverse(head(strsplit(strReverse(left), strReverse('.*'), fixed = TRUE)[[1]], 1)) + } + right <- NULL + if ((matches[i] + lengths[i] - 1) < nchar(path_with_globs_rx)) { + right <- .ReplaceVariablesInString(substr(path_with_globs_rx, matches[i] + lengths[i], nchar(path_with_globs_rx)), replace_values) + right_known <- head(strsplit(right, '.*', fixed = TRUE)[[1]], 1) + } + final_match <- NULL + match_limits <- NULL + if (!is.null(left)) { + left_match <- regexpr(paste0(left, replace_values[[tag]], right_known), actual_path) + match_len <- attr(left_match, 'match.length') + left_match_limits <- c(left_match + match_len - 1 - nchar(clean(right_known)) - nchar(replace_values[[tag]]) + 1, + left_match + match_len - 1 - nchar(clean(right_known))) + if (!(left_match < 1)) { + match_limits <- left_match_limits + } + } + right_match <- NULL + if (!is.null(right)) { + right_match <- regexpr(paste0(left_known, replace_values[[tag]], right), actual_path) + match_len <- attr(right_match, 'match.length') + right_match_limits <- c(right_match + nchar(clean(left_known)), + right_match + nchar(clean(left_known)) + nchar(replace_values[[tag]]) - 1) + if (is.null(match_limits) && !(right_match < 1)) { + match_limits <- right_match_limits + } + } + if (!is.null(right_match) && !is.null(left_match)) { + if (!identical(right_match_limits, left_match_limits)) { + give_warning <- TRUE + } + } + if (is.null(match_limits)) { + stop("Too complex path pattern specified for ", dataset_name, + ". Specify a simpler path pattern for this dataset.") + } + values_to_replace <- c(values_to_replace, tag) + tags_to_replace_starts <- c(tags_to_replace_starts, match_limits[1]) + tags_to_replace_ends <- c(tags_to_replace_ends, match_limits[2]) + } + } + } + + actual_path_with_tags <- actual_path + if (length(tags_to_replace_starts) > 0) { + reorder <- sort(tags_to_replace_starts, index.return = TRUE) + tags_to_replace_starts <- reorder$x + values_to_replace <- values_to_replace[reorder$ix] + tags_to_replace_ends <- tags_to_replace_ends[reorder$ix] + while (length(values_to_replace) > 0) { + actual_path_with_tags <- paste0(substr(actual_path_with_tags, 1, head(tags_to_replace_starts, 1) - 1), + '$', head(values_to_replace, 1), '$', + substr(actual_path_with_tags, head(tags_to_replace_ends, 1) + 1, nchar(actual_path_with_tags))) + extra_chars <- nchar(head(values_to_replace, 1)) + 2 - (head(tags_to_replace_ends, 1) - head(tags_to_replace_starts, 1) + 1) + values_to_replace <- values_to_replace[-1] + tags_to_replace_starts <- tags_to_replace_starts[-1] + tags_to_replace_ends <- tags_to_replace_ends[-1] + tags_to_replace_starts <- tags_to_replace_starts + extra_chars + tags_to_replace_ends <- tags_to_replace_ends + extra_chars + } + } + + if (give_warning) { + .warning(paste0("Too complex path pattern specified for ", dataset_name, + ". Double check carefully the '$Files' fetched for this dataset or specify a simpler path pattern.")) + } + + if (permissive) { + paste0(actual_path_with_tags, file_name_with_globs) + } else { + actual_path_with_tags + } +} +.MergeArrayDims <- function(dims1, dims2) { + new_dims1 <- c() + new_dims2 <- c() + while (length(dims1) > 0) { + if (names(dims1)[1] %in% names(dims2)) { + pos <- which(names(dims2) == names(dims1)[1]) + dims_to_add <- rep(1, pos - 1) + if (length(dims_to_add) > 0) { + names(dims_to_add) <- names(dims2[1:(pos - 1)]) + } + new_dims1 <- c(new_dims1, dims_to_add, dims1[1]) + new_dims2 <- c(new_dims2, dims2[1:pos]) + dims1 <- dims1[-1] + dims2 <- dims2[-c(1:pos)] + } else { + new_dims1 <- c(new_dims1, dims1[1]) + new_dims2 <- c(new_dims2, 1) + names(new_dims2)[length(new_dims2)] <- names(dims1)[1] + dims1 <- dims1[-1] + } + } + if (length(dims2) > 0) { + dims_to_add <- rep(1, length(dims2)) + names(dims_to_add) <- names(dims2) + new_dims1 <- c(new_dims1, dims_to_add) + new_dims2 <- c(new_dims2, dims2) + } + list(new_dims1, new_dims2, pmax(new_dims1, new_dims2)) +} + +.MergeArrays <- function(array1, array2, along) { + if (!(is.null(array1) || is.null(array2))) { + if (!(identical(names(dim(array1)), names(dim(array2))) && + identical(dim(array1)[-which(names(dim(array1)) == along)], + dim(array2)[-which(names(dim(array2)) == along)]))) { + new_dims <- .MergeArrayDims(dim(array1), dim(array2)) + dim(array1) <- new_dims[[1]] + dim(array2) <- new_dims[[2]] + for (j in 1:length(dim(array1))) { + if (names(dim(array1))[j] != along) { + if (dim(array1)[j] != dim(array2)[j]) { + if (which.max(c(dim(array1)[j], dim(array2)[j])) == 1) { + na_array_dims <- dim(array2) + na_array_dims[j] <- dim(array1)[j] - dim(array2)[j] + na_array <- array(dim = na_array_dims) + array2 <- abind(array2, na_array, along = j) + names(dim(array2)) <- names(na_array_dims) + } else { + na_array_dims <- dim(array1) + na_array_dims[j] <- dim(array2)[j] - dim(array1)[j] + na_array <- array(dim = na_array_dims) + array1 <- abind(array1, na_array, along = j) + names(dim(array1)) <- names(na_array_dims) + } + } + } + } + } + if (!(along %in% names(dim(array2)))) { + stop("The dimension specified in 'along' is not present in the ", + "provided arrays.") + } + array1 <- abind(array1, array2, along = which(names(dim(array1)) == along)) + names(dim(array1)) <- names(dim(array2)) + } else if (is.null(array1)) { + array1 <- array2 + } + array1 +} + +# Takes as input a list of arrays. The list must have named dimensions. +.MergeArrayOfArrays <- function(array_of_arrays) { + MergeArrays <- .MergeArrays + array_dims <- (dim(array_of_arrays)) + dim_names <- names(array_dims) + + # Merge the chunks. + for (dim_index in 1:length(dim_names)) { + dim_sub_array_of_chunks <- dim_sub_array_of_chunk_indices <- NULL + if (dim_index < length(dim_names)) { + dim_sub_array_of_chunks <- array_dims[(dim_index + 1):length(dim_names)] + names(dim_sub_array_of_chunks) <- dim_names[(dim_index + 1):length(dim_names)] + dim_sub_array_of_chunk_indices <- dim_sub_array_of_chunks + sub_array_of_chunk_indices <- array(1:prod(dim_sub_array_of_chunk_indices), + dim_sub_array_of_chunk_indices) + } else { + sub_array_of_chunk_indices <- NULL + } + sub_array_of_chunks <- vector('list', prod(dim_sub_array_of_chunks)) + dim(sub_array_of_chunks) <- dim_sub_array_of_chunks + for (i in 1:prod(dim_sub_array_of_chunks)) { + if (!is.null(sub_array_of_chunk_indices)) { + chunk_sub_indices <- which(sub_array_of_chunk_indices == i, arr.ind = TRUE)[1, ] + } else { + chunk_sub_indices <- NULL + } + for (j in 1:(array_dims[dim_index])) { + new_chunk <- do.call('[[', c(list(x = array_of_arrays), + as.list(c(j, chunk_sub_indices)))) + if (is.null(new_chunk)) { + stop("Chunks missing.") + } + if (is.null(sub_array_of_chunks[[i]])) { + sub_array_of_chunks[[i]] <- new_chunk + } else { + sub_array_of_chunks[[i]] <- MergeArrays(sub_array_of_chunks[[i]], + new_chunk, + dim_names[dim_index]) + } + } + } + array_of_arrays <- sub_array_of_chunks + rm(sub_array_of_chunks) + gc() + } + + array_of_arrays[[1]] +} +.message <- function(...) { + # Function to use the 'message' R function with our custom settings + # Default: new line at end of message, indent to 0, exdent to 3, + # collapse to \n* + args <- list(...) + + ## In case we need to specify message arguments + if (!is.null(args[["appendLF"]])) { + appendLF <- args[["appendLF"]] + } else { + ## Default value in message function + appendLF <- TRUE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value in message function + domain <- NULL + } + args[["appendLF"]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n*" + } + args[["collapse"]] <- NULL + + ## Message tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "* " + } + args[["tag"]] <- NULL + + message(paste0(tag, paste(strwrap( + args, indent = indent, exdent = exdent + ), collapse = collapse)), appendLF = appendLF, domain = domain) +} + +.warning <- function(...) { + # Function to use the 'warning' R function with our custom settings + # Default: no call information, indent to 0, exdent to 3, + # collapse to \n + args <- list(...) + + ## In case we need to specify warning arguments + if (!is.null(args[["call."]])) { + call <- args[["call."]] + } else { + ## Default: don't show info about the call where the warning came up + call <- FALSE + } + if (!is.null(args[["immediate."]])) { + immediate <- args[["immediate."]] + } else { + ## Default value in warning function + immediate <- FALSE + } + if (!is.null(args[["noBreaks."]])) { + noBreaks <- args[["noBreaks."]] + } else { + ## Default value warning function + noBreaks <- FALSE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value warning function + domain <- NULL + } + args[["call."]] <- NULL + args[["immediate."]] <- NULL + args[["noBreaks."]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n!" + } + args[["collapse"]] <- NULL + + ## Warning tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "! Warning: " + } + args[["tag"]] <- NULL + + warning(paste0(tag, paste(strwrap( + args, indent = indent, exdent = exdent + ), collapse = collapse)), call. = call, immediate. = immediate, + noBreaks. = noBreaks, domain = domain) +} + +.LoadDataFile <- function(work_piece, shared_matrix_pointer, + file_data_reader, synonims, + transform, transform_params, + silent = FALSE, debug = FALSE) { +# suppressPackageStartupMessages({library(bigmemory)}) +### TODO: Specify dependencies as parameter +# suppressPackageStartupMessages({library(ncdf4)}) + +#print("1") + store_indices <- as.list(work_piece[['store_position']]) + first_round_indices <- work_piece[['first_round_indices']] + second_round_indices <- work_piece[['second_round_indices']] +#print("2") + file_to_open <- work_piece[['file_path']] + sub_array <- file_data_reader(file_to_open, NULL, + work_piece[['file_selectors']], + first_round_indices, synonims) +print('sub_array') +print(dim(sub_array)) +if (debug) { +if (all(unlist(store_indices[1:6]) == 1)) { +print("-> LOADING A WORK PIECE") +print("-> STRUCTURE OF READ UNTRANSFORMED DATA:") +print(str(sub_array)) +print("-> STRUCTURE OF VARIABLES TO TRANSFORM:") +print(str(work_piece[['vars_to_transform']])) +print("-> COMMON ARRAY DIMENSIONS:") +print(str(work_piece[['store_dims']])) +} +} + if (!is.null(sub_array)) { + # Apply data transformation once we have the data arrays. + if (!is.null(transform)) { +if (debug) { +if (all(unlist(store_indices[1:6]) == 1)) { +print("-> PROCEEDING TO TRANSFORM ARRAY") +print("-> DIMENSIONS OF ARRAY RIGHT BEFORE TRANSFORMING:") +print(dim(sub_array)) +} +} + sub_array <- do.call(transform, c(list(data_array = sub_array, + variables = work_piece[['vars_to_transform']], + file_selectors = work_piece[['file_selectors']]), + transform_params)) +if (debug) { +if (all(unlist(store_indices[1:6]) == 1)) { +print("-> STRUCTURE OF ARRAY AND VARIABLES RIGHT AFTER TRANSFORMING:") +print(str(sub_array)) +print("-> DIMENSIONS OF ARRAY RIGHT AFTER TRANSFORMING:") +print(dim(sub_array$data_array)) +} +} + sub_array <- sub_array$data_array + # Subset with second round of indices + dims_to_crop <- which(!sapply(second_round_indices, is.null)) + if (length(dims_to_crop) > 0) { + dimnames_to_crop <- names(second_round_indices)[dims_to_crop] + sub_array <- Subset(sub_array, dimnames_to_crop, + second_round_indices[dimnames_to_crop]) + } +if (debug) { +if (all(unlist(store_indices[1:6]) == 1)) { +print("-> STRUCTURE OF ARRAY AND VARIABLES RIGHT AFTER SUBSETTING WITH 2nd ROUND INDICES:") +print(str(sub_array)) +} +} + } + + metadata <- attr(sub_array, 'variables') + + names_bk <- names(store_indices) +print('names(store_indices)') +print(names(store_indices)) +print('names(first_round_indices)') +print(names(first_round_indices)) +print('second_round_indices') +print(second_round_indices) +#print(str(attr(sub_array, 'variables'))) + store_indices <- lapply(names(store_indices), + function (x) { + if (!(x %in% names(first_round_indices))) { + store_indices[[x]] + } else if (is.null(second_round_indices[[x]])) { +# 1:dim(sub_array)[x] +###modified!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1 + 1:attr(sub_array, 'variables')[[1]]$dim[[3]]$len + } else { + if (is.numeric(second_round_indices[[x]])) { + ## TODO: Review carefully this line. Inner indices are all + ## aligned to the left-most positions. If dataset A has longitudes + ## 1, 2, 3, 4 but dataset B has only longitudes 3 and 4, then + ## they will be stored as follows: + ## 1, 2, 3, 4 + ## 3, 4, NA, NA + ##x - min(x) + 1 + 1:length(second_round_indices[[x]]) + } else { + 1:length(second_round_indices[[x]]) + } + } + }) + names(store_indices) <- names_bk +print('store_indices$time=') +print(store_indices$time) + +if (debug) { +if (all(unlist(store_indices) == 1)) { +print("-> STRUCTURE OF FIRST ROUND INDICES FOR THIS WORK PIECE:") +print(str(first_round_indices)) +print("-> STRUCTURE OF SECOND ROUND INDICES FOR THIS WORK PIECE:") +print(str(second_round_indices)) +print("-> STRUCTURE OF STORE INDICES FOR THIS WORK PIECE:") +print(str(store_indices)) +} +} + + store_indices <- lapply(store_indices, as.integer) + store_dims <- work_piece[['store_dims']] + + # split the storage work of the loaded subset in parts + largest_dim_name <- names(dim(sub_array))[which.max(dim(sub_array))] + max_parts <- length(store_indices[[largest_dim_name]]) + + # Indexing a data file of N MB with expand.grid takes 30*N MB + # The peak ram of Start is, minimum, 2 * total data to load from all files + # due to inefficiencies in other regions of the code + # The more parts we split the indexing done below in, the lower + # the memory footprint of the indexing and the fast. + # But more than 10 indexing iterations (parts) for each MB processed + # makes the iteration slower (tested empirically on BSC workstations). + subset_size_in_mb <- prod(dim(sub_array)) * 8 / 1024 / 1024 + best_n_parts <- ceiling(subset_size_in_mb * 10) + # We want to set n_parts to a greater value than the one that would + # result in a memory footprint (of the subset indexing code below) equal + # to 2 * total data to load from all files. + # s = subset size in MB + # p = number of parts to break it in + # T = total size of data to load + # then, s / p * 30 = 2 * T + # then, p = s * 15 / T + min_n_parts <- ceiling(prod(dim(sub_array)) * 15 / prod(store_dims)) + # Make sure we pick n_parts much greater than the minimum calculated + n_parts <- min_n_parts * 10 + if (n_parts > best_n_parts) { + n_parts <- best_n_parts + } + # Boundary checks + if (n_parts < 1) { + n_parts <- 1 + } + if (n_parts > max_parts) { + n_parts <- max_parts + } + + if (n_parts > 1) { + make_parts <- function(length, n) { + clusters <- cut(1:length, n, labels = FALSE) + lapply(1:n, function(y) which(clusters == y)) + } + part_indices <- make_parts(max_parts, n_parts) + parts <- lapply(part_indices, + function(x) { + store_indices[[largest_dim_name]][x] + }) + } else { + part_indices <- list(1:max_parts) + parts <- store_indices[largest_dim_name] + } + +print('store_dims') +print(store_dims) + # do the storage work + weights <- sapply(1:length(store_dims), + function(i) prod(c(1, store_dims)[1:i])) + part_indices_in_sub_array <- as.list(rep(TRUE, length(dim(sub_array)))) + names(part_indices_in_sub_array) <- names(dim(sub_array)) + data_array <- bigmemory::attach.big.matrix(shared_matrix_pointer) + for (i in 1:n_parts) { + store_indices[[largest_dim_name]] <- parts[[i]] + # Converting array indices to vector indices + matrix_indices <- do.call("expand.grid", store_indices) + # Given a matrix where each row is a set of array indices of an element + # the vector indices are computed + matrix_indices <- 1 + colSums(t(matrix_indices - 1) * weights) + part_indices_in_sub_array[[largest_dim_name]] <- part_indices[[i]] +print('1.5') + data_array[matrix_indices] <- as.vector(do.call('[', + c(list(x = sub_array), + part_indices_in_sub_array))) +print(data_array[matrix_indices]) + } + rm(data_array) + gc() + + if (!is.null(work_piece[['save_metadata_in']])) { + saveRDS(metadata, file = work_piece[['save_metadata_in']]) + } + } + if (!is.null(work_piece[['progress_amount']]) && !silent) { + message(work_piece[['progress_amount']], appendLF = FALSE) + } + is.null(sub_array) +} + +#---------------------------------------------------------------- +# repos <- '/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$memb$/Omon/$var$/gr/v20190508/$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$memb$_gr_$chunk$.nc' +# +# lons.min <- 0 +# lons.max <- 360 +# lats.min <- -90 +# lats.max <- 90 +# +# dat = repos +# var = 'tos' +# memb = c('r1i1p1f1') +# sdate = paste(seq(1961,1962)) +# time = c(3:21) +# chunk = 'all' +# lat = values(list(lats.min, lats.max)) #'all', CANNOT USE 'all' +# lon = values(list(lons.min, lons.max)) #'all', CANNOT USE 'all' +# chunk_depends = 'sdate' +# time_across = 'chunk' +# +# merge_across_dims = FALSE +# transform = CDORemapper +# transform_params = list(grid='/esarchive/obs/ukmo/hadcrut_v4.6/monthly_mean/tasanomaly/tasanomaly_201712.nc', method='con') +# transform_vars = c('lat', 'lon') +# return_vars = list(lat = 'dat', lon = 'dat') +# retrieve = TRUE +# +# synonims = NULL +# file_opener = NcOpener +# file_var_reader = NcVarReader +# file_dim_reader = NcDimReader +# file_data_reader = NcDataReader +# file_closer = NcCloser +# transform_extra_cells = 0 +# apply_indices_after_transform = FALSE +# pattern_dims = NULL +# metadata_dims = NULL +# selector_checker = SelectorChecker +# split_multiselected_dims = FALSE +# path_glob_permissive = FALSE +# num_procs = 1 +# silent = FALSE +# debug = FALSE + +#---------------------------------------------------------- + + repos <- '/home/Earth/aho/$var$_Amon_EC-Earth3_historical_r20i1p1f1_gr_$chunk$.nc' + + lons.min <- 0 + lons.max <- 360 + lats.min <- -90 + lats.max <- 90 + + dat = repos + var = 'clivi' + time = c(5:21) + chunk = 'all' + lat = values(list(lats.min, lats.max)) #'all', CANNOT USE 'all' + lon = values(list(lons.min, lons.max)) #'all', CANNOT USE 'all' + #chunk_depends = 'sdate' + time_across = 'chunk' + + merge_across_dims = FALSE + transform = NULL + transform_params = NULL + transform_vars = NULL + return_vars = NULL #list(lat = 'dat', lon = 'dat') + retrieve = TRUE + + synonims = NULL + file_opener = NcOpener + file_var_reader = NcVarReader + file_dim_reader = NcDimReader + file_data_reader = NcDataReader + file_closer = NcCloser + transform_extra_cells = 0 + apply_indices_after_transform = FALSE + pattern_dims = NULL + metadata_dims = NULL + selector_checker = SelectorChecker + split_multiselected_dims = FALSE + path_glob_permissive = FALSE + num_procs = 1 + silent = FALSE + debug = FALSE + +#---------------------------------------------------------- +# dim_params <- list(dat = dat, var = var, memb = memb, sdate = sdate, time = time, chunk = chunk, lat = lat, lon = lon, chunk_depends = chunk_depends, time_across = time_across) + + dim_params <- list(dat = dat, var = var, time = time, chunk = chunk, lat = lat, lon = lon, time_across = time_across) + + var_params_ind <- grep("_var$", names(dim_params)) + var_params <- dim_params[var_params_ind] + i <- 1 + for (var_param in var_params) { + if (!is.character(var_param)) { + stop("All '*_var' parameters must be character strings.") + } + else if (!any(grepl(paste0("^", strsplit(names(var_params)[i], + "_var$")[[1]][1], "$"), names(dim_params)))) { + stop(paste0("All '*_var' parameters must be associated to a dimension parameter. Found parameter '", + names(var_params)[i], "' but no parameter '", + strsplit(names(var_params)[i], "_var$")[[1]][1], + "'.")) + } + i <- i + 1 + } + if (length(var_params) < 1) { + var_params <- NULL + } else { + names(var_params) <- gsub("_var$", "", names(var_params)) + } + dim_reorder_params_ind <- grep("_reorder$", names(dim_params)) + dim_reorder_params <- dim_params[dim_reorder_params_ind] + if (length(dim_reorder_params) < 1) { + dim_reorder_params <- NULL + } else { + names(dim_reorder_params) <- gsub("_reorder$", "", names(dim_reorder_params)) + } + tolerance_params_ind <- grep("_tolerance$", names(dim_params)) + tolerance_params <- dim_params[tolerance_params_ind] + depends_params_ind <- grep("_depends$", names(dim_params)) + depends_params <- dim_params[depends_params_ind] + i <- 1 + for (depends_param in depends_params) { + if (!is.character(depends_param) || (length(depends_param) > + 1)) { + stop("All '*_depends' parameters must be single character strings.") + } + else if (!any(grepl(paste0("^", strsplit(names(depends_params)[i], + "_depends$")[[1]][1], "$"), names(dim_params)))) { + stop(paste0("All '*_depends' parameters must be associated to a dimension parameter. Found parameter '", + names(depends_params)[i], "' but no parameter '", + strsplit(names(depends_params)[i], "_depends$")[[1]][1], + "'.")) + } + i <- i + 1 + } + if (length(depends_params) < 1) { + depends_params <- NULL + } else { + names(depends_params) <- gsub("_depends$", "", names(depends_params)) + } + depending_file_dims <- depends_params + across_params_ind <- grep("_across$", names(dim_params)) + across_params <- dim_params[across_params_ind] + i <- 1 + for (across_param in across_params) { + if (!is.character(across_param) || (length(across_param) > + 1)) { + stop("All '*_across' parameters must be single character strings.") + } + else if (!any(grepl(paste0("^", strsplit(names(across_params)[i], + "_across$")[[1]][1], "$"), names(dim_params)))) { + stop(paste0("All '*_across' parameters must be associated to a dimension parameter. Found parameter '", + names(across_params)[i], "' but no parameter '", + strsplit(names(across_params)[i], "_across$")[[1]][1], + "'.")) + } + i <- i + 1 + } + if (length(across_params) < 1) { + across_params <- NULL + } else { + names(across_params) <- gsub("_across$", "", names(across_params)) + } + inner_dims_across_files <- across_params + if (!is.logical(merge_across_dims)) { + stop("Parameter 'merge_across_dims' must be TRUE or FALSE.") + } + if (length(c(var_params_ind, dim_reorder_params_ind, tolerance_params_ind, + depends_params_ind, across_params_ind)) > 0) { + dim_params <- dim_params[-c(var_params_ind, dim_reorder_params_ind, + tolerance_params_ind, depends_params_ind, across_params_ind)] + if (merge_across_dims) { + for (inner_dim_across in names(inner_dims_across_files)) { + inner_dim_pos <- which(names(dim_params) == inner_dim_across) + file_dim_pos <- which(names(dim_params) == inner_dims_across_files[[inner_dim_across]]) + new_pos <- inner_dim_pos + if (file_dim_pos < inner_dim_pos) { + new_pos <- new_pos - 1 + } + dim_params_to_move <- dim_params[c(inner_dim_pos, + file_dim_pos)] + dim_params <- dim_params[-c(inner_dim_pos, file_dim_pos)] + new_dim_params <- list() + if (new_pos > 1) { + new_dim_params <- c(new_dim_params, dim_params[1:(new_pos - + 1)]) + } + new_dim_params <- c(new_dim_params, dim_params_to_move) + if (length(dim_params) >= new_pos) { + new_dim_params <- c(new_dim_params, dim_params[new_pos:length(dim_params)]) + } + dim_params <- new_dim_params + } + } + } + dim_names <- names(dim_params) + if (is.null(dim_names)) { + stop("At least one pattern dim must be specified.") + } + chunks <- vector("list", length(dim_names)) + names(chunks) <- dim_names + for (dim_name in dim_names) { + if (!is.null(attr(dim_params[[dim_name]], "chunk"))) { + chunks[[dim_name]] <- attr(dim_params[[dim_name]], + "chunk") + attributes(dim_params[[dim_name]]) <- attributes(dim_params[[dim_name]])[-which(names(attributes(dim_params[[dim_name]])) == + "chunk")] + } else { + chunks[[dim_name]] <- c(chunk = 1, n_chunks = 1) + } + } + chunk_indices <- function(n_indices, chunk, n_chunks, dim_name) { + if (n_chunks > n_indices) { + stop("Requested to divide dimension '", dim_name, + "' of length ", n_indices, " in ", n_chunks, + " chunks, which is not possible.") + } + chunk_sizes <- rep(floor(n_indices/n_chunks), n_chunks) + chunks_to_extend <- n_indices - chunk_sizes[1] * n_chunks + if (chunks_to_extend > 0) { + chunk_sizes[1:chunks_to_extend] <- chunk_sizes[1:chunks_to_extend] + + 1 + } + chunk_size <- chunk_sizes[chunk] + offset <- 0 + if (chunk > 1) { + offset <- sum(chunk_sizes[1:(chunk - 1)]) + } + indices <- 1:chunk_sizes[chunk] + offset + array(indices, dim = setNames(length(indices), dim_name)) + } + if (is.null(pattern_dims)) { + warning(paste0("Parameter 'pattern_dims' not specified. Taking the first dimension, '", + dim_names[1], "' as 'pattern_dims'.")) + pattern_dims <- dim_names[1] + } else if (is.character(pattern_dims) && (length(pattern_dims) > + 0)) { + pattern_dims <- unique(pattern_dims) + } else { + stop("Parameter 'pattern_dims' must be a vector of character strings.") + } + if (any(names(var_params) %in% pattern_dims)) { + stop("'*_var' parameters specified for pattern dimensions. Remove or fix them.") + } + found_pattern_dim <- NULL + for (pattern_dim in pattern_dims) { + dat <- datasets <- dim_params[[pattern_dim]] + if (is.null(dat) || !(is.character(dat) && all(nchar(dat) > + 0)) && !is.list(dat)) { + stop(paste0("Parameter '", pattern_dim, "' must be a list of lists with pattern specifications or a vector of character strings.")) + } + if (!is.null(dim_reorder_params[[pattern_dim]])) { + warning(paste0("A reorder for the selectors of '", + pattern_dim, "' has been specified, but it is a pattern dimension and the reorder will be ignored.")) + } + if (is.list(dat) || any(sapply(dat, is.list))) { + if (is.null(found_pattern_dim)) { + found_pattern_dim <- pattern_dim + } else { + stop("Found more than one pattern dim with pattern specifications (list of lists). One and only one pattern dim must contain pattern specifications.") + } + } + } + if (is.null(found_pattern_dim)) { + warning(paste0("Could not find any pattern dim with explicit data set descriptions (in the form of list of lists). Taking the first pattern dim, '", + pattern_dims[1], "', as dimension with pattern specifications.")) + found_pattern_dim <- pattern_dims[1] + } + i <- 1 + for (dim_reorder_param in dim_reorder_params) { + if (!is.function(dim_reorder_param)) { + stop("All '*_reorder' parameters must be functions.") + } else if (!any(grepl(paste0("^", strsplit(names(dim_reorder_params)[i], + "_reorder$")[[1]][1], "$"), names(dim_params)))) { + stop(paste0("All '*_reorder' parameters must be associated to a dimension parameter. Found parameter '", + names(dim_reorder_params)[i], "' but no parameter '", + strsplit(names(dim_reorder_params)[i], "_reorder$")[[1]][1], + "'.")) + } + i <- i + 1 + } + i <- 1 + for (tolerance_param in tolerance_params) { + if (!any(grepl(paste0("^", strsplit(names(tolerance_params)[i], + "_tolerance$")[[1]][1], "$"), names(dim_params)))) { + stop(paste0("All '*_tolerance' parameters must be associated to a dimension parameter. Found parameter '", + names(tolerance_params)[i], "' but no parameter '", + strsplit(names(tolerance_params)[i], "_tolerance$")[[1]][1], + "'.")) + } + i <- i + 1 + } + if (length(tolerance_params) < 1) { + tolerance_params <- NULL + } else { + names(tolerance_params) <- gsub("_tolerance$", "", names(tolerance_params)) + } + if (!is.null(metadata_dims)) { + if (is.na(metadata_dims)) { + metadata_dims <- NULL + } else if (!is.character(metadata_dims) || (length(metadata_dims) < + 1)) { + stop("Parameter 'metadata' dims must be a vector of at least one character string.") + } + } else { + metadata_dims <- pattern_dims + } + dats_to_take <- chunk_indices(length(dim_params[[found_pattern_dim]]), + chunks[[found_pattern_dim]]["chunk"], chunks[[found_pattern_dim]]["n_chunks"], + found_pattern_dim) + dim_params[[found_pattern_dim]] <- dim_params[[found_pattern_dim]][dats_to_take] + dat <- datasets <- dim_params[[found_pattern_dim]] + dat_info_names <- c("name", "path") + dat_to_fetch <- c() + dat_names <- c() + if (!is.list(dat)) { + dat <- as.list(dat) + } else { + if (!any(sapply(dat, is.list))) { + dat <- list(dat) + } + } + for (i in 1:length(dat)) { + if (is.character(dat[[i]]) && length(dat[[i]]) == 1 && + nchar(dat[[i]]) > 0) { + if (grepl("^(\\./|\\.\\./|/.*/|~/)", dat[[i]])) { + dat[[i]] <- list(path = dat[[i]]) + } else { + dat[[i]] <- list(name = dat[[i]]) + } + } else if (!is.list(dat[[i]])) { + stop(paste0("Parameter '", pattern_dim, "' is incorrect. It must be a list of lists or character strings.")) + } + if (!("name" %in% names(dat[[i]]))) { + dat[[i]][["name"]] <- paste0("dat", i) + if (!("path" %in% names(dat[[i]]))) { + stop(paste0("Parameter '", found_pattern_dim, + "' is incorrect. A 'path' should be provided for each dataset if no 'name' is provided.")) + } + } else if (!("path" %in% names(dat[[i]]))) { + dat_to_fetch <- c(dat_to_fetch, i) + } + dat_names <- c(dat_names, dat[[i]][["name"]]) + } + if ((length(dat_to_fetch) > 0) && (length(dat_to_fetch) < + length(dat))) { + warning("'path' has been provided for some datasets. Any information in the configuration file related to these will be ignored.") + } + if (length(dat_to_fetch) > 0) { + stop("Specified only the name for some data sets, but not the path ", + "pattern. This option has not been yet implemented.") + } + if (!is.null(inner_dims_across_files)) { + new_idaf <- list() + for (i in names(inner_dims_across_files)) { + if (!(inner_dims_across_files[[i]] %in% names(new_idaf))) { + new_idaf[[inner_dims_across_files[[i]]]] <- i + } else { + new_idaf[[inner_dims_across_files[[i]]]] <- c(new_idaf[[inner_dims_across_files[[i]]]], + i) + } + } + inner_dims_across_files <- new_idaf + } + if (is.null(return_vars)) { + return_vars <- list() + } + if (!is.list(return_vars)) { + stop("Parameter 'return_vars' must be a list or NULL.") + } + if (length(return_vars) > 0 && is.null(names(return_vars))) { + stop("Parameter 'return_vars' must be a named list.") + } + i <- 1 + while (i <= length(return_vars)) { + if (length(return_vars[[i]]) > 0) { + if (!is.character(return_vars[[i]])) { + stop("The ", i, "th specification in 'return_vars' is malformed. It ", + "must be a vector of character strings of valid file dimension ", + "names.") + } + } + i <- i + 1 + } + if (!is.null(synonims)) { + error <- FALSE + if (!is.list(synonims)) { + error <- TRUE + } + for (synonim_entry in names(synonims)) { + if (!(synonim_entry %in% names(dim_params)) && !(synonim_entry %in% + names(return_vars))) { + error <- TRUE + } + if (!is.character(synonims[[synonim_entry]]) || length(synonims[[synonim_entry]]) < + 1) { + error <- TRUE + } + } + if (error) { + stop("Parameter 'synonims' must be a named list, where the names are ", + "a name of a requested dimension or variable and the values are ", + "vectors of character strings with at least one alternative name ", + " for each dimension or variable in 'synonims'.") + } + } + if (length(unique(names(synonims))) < length(names(synonims))) { + stop("There must not be repeated entries in 'synonims'.") + } + if (length(unique(unlist(synonims))) < length(unlist(synonims))) { + stop("There must not be repeated values in 'synonims'.") + } + dim_entries_to_add <- which(!(names(dim_params) %in% names(synonims))) + if (length(dim_entries_to_add) > 0) { + synonims[names(dim_params)[dim_entries_to_add]] <- as.list(names(dim_params)[dim_entries_to_add]) + } + var_entries_to_add <- which(!(names(var_params) %in% names(synonims))) + if (length(var_entries_to_add) > 0) { + synonims[names(var_params)[var_entries_to_add]] <- as.list(names(var_params)[var_entries_to_add]) + } + if (is.null(selector_checker) || !is.function(selector_checker)) { + stop("Parameter 'selector_checker' must be a function.") + } + if (is.null(file_opener) || !is.function(file_opener)) { + stop("Parameter 'file_opener' must be a function.") + } + if (!is.null(file_var_reader) && !is.function(file_var_reader)) { + stop("Parameter 'file_var_reader' must be a function.") + } + if (!is.null(file_dim_reader) && !is.function(file_dim_reader)) { + stop("Parameter 'file_dim_reader' must be a function.") + } + if (is.null(file_data_reader) || !is.function(file_data_reader)) { + stop("Parameter 'file_data_reader' must be a function.") + } + if (is.null(file_closer) || !is.function(file_closer)) { + stop("Parameter 'file_closer' must be a function.") + } + if (!is.null(transform)) { + if (!is.function(transform)) { + stop("Parameter 'transform' must be a function.") + } + } + if (!is.null(transform_params)) { + if (!is.list(transform_params)) { + stop("Parameter 'transform_params' must be a list.") + } + if (is.null(names(transform_params))) { + stop("Parameter 'transform_params' must be a named list.") + } + } + if (!is.null(transform_vars)) { + if (!is.character(transform_vars)) { + stop("Parameter 'transform_vars' must be a vector of character strings.") + } + } + if (any(!(transform_vars %in% names(return_vars)))) { + stop("All the variables specified in 'transform_vars' must also be specified in 'return_vars'.") + } + if (!is.logical(apply_indices_after_transform)) { + stop("Parameter 'apply_indices_after_transform' must be either TRUE or FALSE.") + } + aiat <- apply_indices_after_transform + if (!is.numeric(transform_extra_cells)) { + stop("Parameter 'transform_extra_cells' must be numeric.") + } + transform_extra_cells <- round(transform_extra_cells) + if (!is.logical(split_multiselected_dims)) { + stop("Parameter 'split_multiselected_dims' must be TRUE or FALSE.") + } + if (!is.numeric(path_glob_permissive) && !is.logical(path_glob_permissive)) { + stop("Parameter 'path_glob_permissive' must be TRUE, FALSE or an integer.") + } + if (length(path_glob_permissive) != 1) { + stop("Parameter 'path_glob_permissive' must be of length 1.") + } + if (!is.logical(retrieve)) { + stop("Parameter 'retrieve' must be TRUE or FALSE.") + } + if (!is.null(num_procs)) { + if (!is.numeric(num_procs)) { + stop("Parameter 'num_procs' must be numeric.") + } + else { + num_procs <- round(num_procs) + } + } + if (!is.logical(silent)) { + stop("Parameter 'silent' must be logical.") + } + dim_params[[found_pattern_dim]] <- dat_names + if (!silent) { + message(paste0("Exploring files... This will take a variable amount ", + "of time depending on the issued request and the ", + "performance of the file server...")) + } + if (!is.character(debug)) { + dims_to_check <- c("time") + } else { + dims_to_check <- debug + debug <- TRUE + } + array_of_files_to_load <- NULL + array_of_not_found_files <- NULL + indices_of_first_files_with_data <- vector("list", length(dat)) + selectors_of_first_files_with_data <- vector("list", length(dat)) + dataset_has_files <- rep(FALSE, length(dat)) + found_file_dims <- vector("list", length(dat)) + expected_inner_dims <- vector("list", length(dat)) + for (i in 1:length(dat)) { + dat_selectors <- dim_params + dat_selectors[[found_pattern_dim]] <- dat_selectors[[found_pattern_dim]][i] + dim_vars <- paste0("$", dim_names, "$") + file_dims <- which(sapply(dim_vars, grepl, dat[[i]][["path"]], + fixed = TRUE)) + if (length(file_dims) > 0) { + file_dims <- dim_names[file_dims] + } + file_dims <- unique(c(pattern_dims, file_dims)) + found_file_dims[[i]] <- file_dims + expected_inner_dims[[i]] <- dim_names[which(!(dim_names %in% + file_dims))] + if (any(c(names(depending_file_dims), unlist(depending_file_dims)) %in% + expected_inner_dims[[i]])) { + stop(paste0("The dimension dependancies specified in ", + "'depending_file_dims' can only be between file ", + "dimensions, but some inner dimensions found in ", + "dependancies for '", dat[[i]][["name"]], "', which ", + "has the following file dimensions: ", paste(paste0("'", + file_dims, "'"), collapse = ", "), ".")) + } else { + a <- names(depending_file_dims) %in% file_dims + b <- unlist(depending_file_dims) %in% file_dims + ab <- a & b + if (any(!ab)) { + warning(paste0("Detected some dependancies in 'depending_file_dims' with ", + "non-existing dimension names. These will be disregarded.")) + depending_file_dims <- depending_file_dims[-which(!ab)] + } + if (any(names(depending_file_dims) == unlist(depending_file_dims))) { + depending_file_dims <- depending_file_dims[-which(names(depending_file_dims) == + unlist(depending_file_dims))] + } + } + if (any(!(names(inner_dims_across_files) %in% file_dims)) || + any(!(unlist(inner_dims_across_files) %in% expected_inner_dims[[i]]))) { + stop(paste0("All relationships specified in ", "'_across' parameters must be between a inner ", + "dimension and a file dimension. Found wrong ", + "specification for '", dat[[i]][["name"]], "', which ", + "has the following file dimensions: ", paste(paste0("'", + file_dims, "'"), collapse = ", "), ", and the following inner dimensions: ", + paste(paste0("'", expected_inner_dims[[i]], "'"), + collapse = ", "), ".")) + } + j <- 1 + while (j <= length(return_vars)) { + if (any(!(return_vars[[j]] %in% file_dims))) { + if (any(return_vars[[j]] %in% expected_inner_dims[[i]])) { + stop("Found variables in 'return_vars' requested ", + "for some inner dimensions (for dataset '", + dat[[i]][["name"]], "'), but variables can only be ", + "requested for file dimensions.") + } else { + stop("Found variables in 'return_vars' requested ", + "for non-existing dimensions.") + } + } + j <- j + 1 + } + if (!is.null(metadata_dims)) { + if (any(!(metadata_dims %in% file_dims))) { + stop("All dimensions in 'metadata_dims' must be file dimensions.") + } + } + for (dim_name in dim_names) { + if (!(dim_name %in% pattern_dims)) { + if (is.null(attr(dat_selectors[[dim_name]], "values")) || + is.null(attr(dat_selectors[[dim_name]], "indices"))) { + flag <- ((dat_selectors[[dim_name]] %in% c("all", + "first", "last")) || (is.numeric(unlist(dat_selectors[[dim_name]])))) + attr(dat_selectors[[dim_name]], "values") <- !flag + attr(dat_selectors[[dim_name]], "indices") <- flag + } + if ((attr(dat_selectors[[dim_name]], "values") || + (dim_name %in% c("var", "variable"))) && !(dim_name %in% + names(var_params)) && !(dim_name %in% file_dims)) { + if (dim_name %in% c("var", "variable")) { + var_params <- c(var_params, setNames(list("var_names"), + dim_name)) + warning(paste0("Found specified values for dimension '", + dim_name, "' but no '", dim_name, "_var' requested. ", + "\"", dim_name, "_var = '", "var_names", + "'", "\"", " has been automatically added to ", + "the Start call.")) + } else { + var_params <- c(var_params, setNames(list(dim_name), + dim_name)) + warning(paste0("Found specified values for dimension '", + dim_name, "' but no '", dim_name, "_var' requested. ", + "\"", dim_name, "_var = '", dim_name, "'", + "\"", " has been automatically added to ", + "the Start call.")) + } + } + } + } + if (any(!(unlist(var_params) %in% names(return_vars)))) { + vars_to_add <- which(!(unlist(var_params) %in% names(return_vars))) + new_return_vars <- vector("list", length(vars_to_add)) + names(new_return_vars) <- unlist(var_params)[vars_to_add] + return_vars <- c(return_vars, new_return_vars) + warning(paste0("All '*_var' params must associate a dimension to one of the ", + "requested variables in 'return_vars'. The following variables", + " have been added to 'return_vars': ", paste(paste0("'", + unlist(var_params), "'"), collapse = ", "))) + } + replace_values <- vector("list", length = length(file_dims)) + names(replace_values) <- file_dims + for (file_dim in file_dims) { + if (file_dim %in% names(var_params)) { + warning(paste0("The '", file_dim, "_var' param will be ignored since '", + file_dim, "' is a file dimension (for the dataset with pattern ", + dat[[i]][["path"]], ").")) + } + if (!is.list(dat_selectors[[file_dim]]) || (is.list(dat_selectors[[file_dim]]) && + length(dat_selectors[[file_dim]]) == 2 && is.null(names(dat_selectors[[file_dim]])))) { + dat_selectors[[file_dim]] <- list(dat_selectors[[file_dim]]) + } + first_class <- class(dat_selectors[[file_dim]][[1]]) + first_length <- length(dat_selectors[[file_dim]][[1]]) + for (j in 1:length(dat_selectors[[file_dim]])) { + sv <- selector_vector <- dat_selectors[[file_dim]][[j]] + if (!identical(first_class, class(sv)) || !identical(first_length, + length(sv))) { + stop("All provided selectors for depending dimensions must ", + "be vectors of the same length and of the same class.") + } + if (is.character(sv) && !((length(sv) == 1) && + (sv[1] %in% c("all", "first", "last")))) { + dat_selectors[[file_dim]][[j]] <- selector_checker(selectors = sv, + return_indices = FALSE) + dat_selectors[[file_dim]][[j]] <- dat_selectors[[file_dim]][[j]][chunk_indices(length(dat_selectors[[file_dim]][[j]]), + chunks[[file_dim]]["chunk"], chunks[[file_dim]]["n_chunks"], + file_dim)] + } + else if (!(is.numeric(sv) || (is.character(sv) && + (length(sv) == 1) && (sv %in% c("all", "first", + "last"))) || (is.list(sv) && (length(sv) == + 2) && (all(sapply(sv, is.character)) || all(sapply(sv, + is.numeric)))))) { + stop("All explicitly provided selectors for file dimensions must be character strings.") + } + } + sv <- dat_selectors[[file_dim]][[1]] + if (is.character(sv) && !((length(sv) == 1) && (sv[1] %in% + c("all", "first", "last")))) { + replace_values[[file_dim]] <- dat_selectors[[file_dim]][[1]][1] + } + } + undefined_file_dims <- file_dims[which(sapply(replace_values, + is.null))] + defined_file_dims <- file_dims[which(!(file_dims %in% + undefined_file_dims))] + for (file_dim in file_dims) { + if (file_dim %in% names(depending_file_dims)) { + if (all(c(file_dim, depending_file_dims[[file_dim]]) %in% + defined_file_dims)) { + if (length(dat_selectors[[file_dim]]) != length(dat_selectors[[depending_file_dims[[file_dim]]]][[1]])) { + stop(paste0("If providing selectors for the depending ", + "dimension '", file_dim, "', a ", "vector of selectors must be provided for ", + "each selector of the dimension it depends on, '", + depending_file_dims[[file_dim]], "'.")) + } + else if (!all(names(dat_selectors[[file_dim]]) == + dat_selectors[[depending_file_dims[[file_dim]]]][[1]])) { + stop(paste0("If providing selectors for the depending ", + "dimension '", file_dim, "', the name of the ", + "provided vectors of selectors must match ", + "exactly the selectors of the dimension it ", + "depends on, '", depending_file_dims[[file_dim]], + "'.")) + } + } + } + } + if (length(grep("^http", dat[[i]][["path"]])) > 0) { + if (length(undefined_file_dims) > 0) { + stop(paste0("All selectors for the file dimensions must be ", + "character strings if requesting data to a remote ", + "server. Found invalid selectors for the file dimensions ", + paste(paste0("'", undefined_file_dims, "'"), + collapse = ", "), ".")) + } + dataset_has_files[i] <- TRUE + } else { + dat[[i]][["path"]] <- path.expand(dat[[i]][["path"]]) + first_file <- NULL + first_file_selectors <- NULL + if (length(undefined_file_dims) > 0) { + replace_values[undefined_file_dims] <- "*" + } + files_to_check <- sapply(dat_selectors[defined_file_dims], + function(x) length(x[[1]])) + sub_array_of_files_to_check <- array(1:prod(files_to_check), + dim = files_to_check) + j <- 1 + while (j <= prod(files_to_check) && is.null(first_file)) { + selector_indices <- which(sub_array_of_files_to_check == + j, arr.ind = TRUE)[1, ] + selectors <- sapply(1:length(defined_file_dims), + function(x) { + vector_to_pick <- 1 + if (defined_file_dims[x] %in% names(depending_file_dims)) { + vector_to_pick <- selector_indices[which(defined_file_dims == + depending_file_dims[[defined_file_dims[x]]])] + } + dat_selectors[defined_file_dims][[x]][[vector_to_pick]][selector_indices[x]] + }) + replace_values[defined_file_dims] <- selectors + file_path <- .ReplaceVariablesInString(dat[[i]][["path"]], + replace_values) + file_path <- Sys.glob(file_path) + if (length(file_path) > 0) { + first_file <- file_path[1] + first_file_selectors <- selectors + } + j <- j + 1 + } + if (is.null(first_file)) { + warning(paste0("No found files for the datset '", + dat[[i]][["name"]], "'. Provide existing selectors for the file dimensions ", + " or check and correct its path pattern: ", + dat[[i]][["path"]])) + } else { + dataset_has_files[i] <- TRUE + if (length(undefined_file_dims) > 0) { + first_values <- vector("list", length = length(undefined_file_dims)) + names(first_values) <- undefined_file_dims + found_values <- 0 + stop <- FALSE + try_dim <- 1 + last_success <- 1 + while ((found_values < length(undefined_file_dims)) && + !stop) { + u_file_dim <- undefined_file_dims[try_dim] + if (is.null(first_values[[u_file_dim]])) { + path_with_globs_and_tag <- .ReplaceVariablesInString(dat[[i]][["path"]], + replace_values[-which(file_dims == u_file_dim)], + allow_undefined_key_vars = TRUE) + found_value <- .FindTagValue(path_with_globs_and_tag, + first_file, u_file_dim) + if (!is.null(found_value)) { + found_values <- found_values + 1 + last_success <- try_dim + first_values[[u_file_dim]] <- found_value + replace_values[[u_file_dim]] <- found_value + } + } + try_dim <- (try_dim%%length(undefined_file_dims)) + + 1 + if (try_dim == last_success) { + stop <- TRUE + } + } + if (found_values < length(undefined_file_dims)) { + stop(paste0("Path pattern of dataset '", + dat[[i]][["name"]], "' is too complex. Could not automatically ", + "detect values for all non-explicitly defined ", + "indices. Check its pattern: ", dat[[i]][["path"]])) + } + dat[[i]][["path"]] <- .ReplaceGlobExpressions(dat[[i]][["path"]], + first_file, replace_values, file_dims, dat[[i]][["name"]], + path_glob_permissive) + ufd <- c(undefined_file_dims[which(!(undefined_file_dims %in% + names(depending_file_dims)))], undefined_file_dims[which(undefined_file_dims %in% + names(depending_file_dims))]) + for (u_file_dim in ufd) { + replace_values[undefined_file_dims] <- first_values + replace_values[[u_file_dim]] <- "*" + depended_dim <- NULL + depended_dim_values <- NA + selectors <- dat_selectors[[u_file_dim]][[1]] + if (u_file_dim %in% names(depending_file_dims)) { + depended_dim <- depending_file_dims[[u_file_dim]] + depended_dim_values <- dat_selectors[[depended_dim]][[1]] + dat_selectors[[u_file_dim]] <- vector("list", + length = length(depended_dim_values)) + names(dat_selectors[[u_file_dim]]) <- depended_dim_values + } else { + dat_selectors[[u_file_dim]] <- list() + } + if (u_file_dim %in% unlist(depending_file_dims)) { + depending_dims <- names(depending_file_dims)[which(sapply(depending_file_dims, + function(x) u_file_dim %in% x))] + replace_values[depending_dims] <- rep("*", + length(depending_dims)) + } + for (j in 1:length(depended_dim_values)) { + parsed_values <- c() + if (!is.null(depended_dim)) { + replace_values[[depended_dim]] <- depended_dim_values[j] + } + path_with_globs <- .ReplaceVariablesInString(dat[[i]][["path"]], + replace_values) + found_files <- Sys.glob(path_with_globs) + if (length(found_files) == 0) { + warning(paste0("Could not find files for any '", + u_file_dim, "' for '", depended_dim, + "' = '", depended_dim_values[j], "'.")) + dat_selectors[[u_file_dim]][[j]] <- NA + } else { + for (found_file in found_files) { + path_with_globs_and_tag <- .ReplaceVariablesInString(dat[[i]][["path"]], + replace_values[-which(file_dims == + u_file_dim)], allow_undefined_key_vars = TRUE) + parsed_values <- c(parsed_values, .FindTagValue(path_with_globs_and_tag, + found_file, u_file_dim)) + } + dat_selectors[[u_file_dim]][[j]] <- selector_checker(selectors = selectors, + var = unique(parsed_values), return_indices = FALSE) + dat_selectors[[u_file_dim]][[j]] <- dat_selectors[[u_file_dim]][[j]][chunk_indices(length(dat_selectors[[u_file_dim]][[j]]), + chunks[[u_file_dim]]["chunk"], chunks[[u_file_dim]]["n_chunks"], + u_file_dim)] + } + } + } + } else { + dat[[i]][["path"]] <- .ReplaceGlobExpressions(dat[[i]][["path"]], + first_file, replace_values, defined_file_dims, + dat[[i]][["name"]], path_glob_permissive) + } + } + } + if (dataset_has_files[i]) { + known_dims <- file_dims + } else { + known_dims <- defined_file_dims + } + replace_values <- vector("list", length = length(known_dims)) + names(replace_values) <- known_dims + files_to_load <- sapply(dat_selectors[known_dims], function(x) length(x[[1]])) + files_to_load[found_pattern_dim] <- 1 + sub_array_of_files_to_load <- array(1:prod(files_to_load), + dim = files_to_load) + names(dim(sub_array_of_files_to_load)) <- known_dims + sub_array_of_not_found_files <- array(!dataset_has_files[i], + dim = files_to_load) + names(dim(sub_array_of_not_found_files)) <- known_dims + j <- 1 + while (j <= prod(files_to_load)) { + selector_indices <- which(sub_array_of_files_to_load == + j, arr.ind = TRUE)[1, ] + names(selector_indices) <- known_dims + selectors <- sapply(1:length(known_dims), function(x) { + vector_to_pick <- 1 + if (known_dims[x] %in% names(depending_file_dims)) { + vector_to_pick <- selector_indices[which(known_dims == + depending_file_dims[[known_dims[x]]])] + } + dat_selectors[known_dims][[x]][[vector_to_pick]][selector_indices[x]] + }) + names(selectors) <- known_dims + replace_values[known_dims] <- selectors + if (!dataset_has_files[i]) { + if (any(is.na(selectors))) { + replace_values <- replace_values[-which(names(replace_values) %in% + names(selectors[which(is.na(selectors))]))] + } + file_path <- .ReplaceVariablesInString(dat[[i]][["path"]], + replace_values, TRUE) + sub_array_of_files_to_load[j] <- file_path + } else { + if (any(is.na(selectors))) { + replace_values <- replace_values[-which(names(replace_values) %in% + names(selectors[which(is.na(selectors))]))] + file_path <- .ReplaceVariablesInString(dat[[i]][["path"]], + replace_values, TRUE) + sub_array_of_files_to_load[j] <- file_path + sub_array_of_not_found_files[j] <- TRUE + } else { + file_path <- .ReplaceVariablesInString(dat[[i]][["path"]], + replace_values) + if (!(length(grep("^http", file_path)) > 0)) { + if (grepl(file_path, "*", fixed = TRUE)) { + file_path_full <- Sys.glob(file_path)[1] + if (nchar(file_path_full) > 0) { + file_path <- file_path_full + } + } + } + sub_array_of_files_to_load[j] <- file_path + if (is.null(indices_of_first_files_with_data[[i]])) { + if (!(length(grep("^http", file_path)) > + 0)) { + if (!file.exists(file_path)) { + file_path <- NULL + } + } + if (!is.null(file_path)) { + test_file <- NULL + test_file <- file_opener(file_path) + if (!is.null(test_file)) { + selector_indices[which(known_dims == + found_pattern_dim)] <- i + indices_of_first_files_with_data[[i]] <- selector_indices + selectors_of_first_files_with_data[[i]] <- selectors + file_closer(test_file) + } + } + } + } + } + j <- j + 1 + } + if (is.null(array_of_files_to_load)) { + array_of_files_to_load <- sub_array_of_files_to_load + array_of_not_found_files <- sub_array_of_not_found_files + } else { + array_of_files_to_load <- .MergeArrays(array_of_files_to_load, + sub_array_of_files_to_load, along = found_pattern_dim) + array_of_not_found_files <- .MergeArrays(array_of_not_found_files, + sub_array_of_not_found_files, along = found_pattern_dim) + } + dat[[i]][["selectors"]] <- dat_selectors + } + if (all(sapply(indices_of_first_files_with_data, is.null))) { + stop("No data files found for any of the specified datasets.") + } + dims_to_iterate <- NULL + for (return_var in names(return_vars)) { + dims_to_iterate <- unique(c(dims_to_iterate, return_vars[[return_var]])) + } + if (found_pattern_dim %in% dims_to_iterate) { + dims_to_iterate <- dims_to_iterate[-which(dims_to_iterate == + found_pattern_dim)] + } + common_return_vars <- NULL + common_first_found_file <- NULL + common_return_vars_pos <- NULL + if (length(return_vars) > 0) { + common_return_vars_pos <- which(sapply(return_vars, function(x) !(found_pattern_dim %in% + x))) + } + if (length(common_return_vars_pos) > 0) { + common_return_vars <- return_vars[common_return_vars_pos] + return_vars <- return_vars[-common_return_vars_pos] + common_first_found_file <- rep(FALSE, length(which(sapply(common_return_vars, + length) == 0))) + names(common_first_found_file) <- names(common_return_vars[which(sapply(common_return_vars, + length) == 0)]) + } + return_vars <- lapply(return_vars, function(x) { + if (found_pattern_dim %in% x) { + x[-which(x == found_pattern_dim)] + } else { + x + } + }) + if (length(common_return_vars) > 0) { + picked_common_vars <- vector("list", length = length(common_return_vars)) + names(picked_common_vars) <- names(common_return_vars) + } else { + picked_common_vars <- NULL + } + picked_common_vars_ordered <- picked_common_vars + picked_common_vars_unorder_indices <- picked_common_vars + picked_vars <- vector("list", length = length(dat)) + names(picked_vars) <- dat_names + picked_vars_ordered <- picked_vars + picked_vars_unorder_indices <- picked_vars + for (i in 1:length(dat)) { + if (dataset_has_files[i]) { + for (inner_dim in expected_inner_dims[[i]]) { + if (!is.list(dat[[i]][["selectors"]][[inner_dim]]) || + (is.list(dat[[i]][["selectors"]][[inner_dim]]) && + length(dat[[i]][["selectors"]][[inner_dim]]) == + 2 && is.null(names(dat[[i]][["selectors"]][[inner_dim]])))) { + dat[[i]][["selectors"]][[inner_dim]] <- list(dat[[i]][["selectors"]][[inner_dim]]) + } + } + if (length(return_vars) > 0) { + picked_vars[[i]] <- vector("list", length = length(return_vars)) + names(picked_vars[[i]]) <- names(return_vars) + picked_vars_ordered[[i]] <- picked_vars[[i]] + picked_vars_unorder_indices[[i]] <- picked_vars[[i]] + } + indices_of_first_file <- as.list(indices_of_first_files_with_data[[i]]) + array_file_dims <- sapply(dat[[i]][["selectors"]][found_file_dims[[i]]], + function(x) length(x[[1]])) + names(array_file_dims) <- found_file_dims[[i]] + if (length(dims_to_iterate) > 0) { + indices_of_first_file[dims_to_iterate] <- lapply(array_file_dims[dims_to_iterate], + function(x) 1:x) + } + array_of_var_files <- do.call("[", c(list(x = array_of_files_to_load), + indices_of_first_file, list(drop = FALSE))) + array_of_var_indices <- array(1:length(array_of_var_files), + dim = dim(array_of_var_files)) + array_of_not_found_var_files <- do.call("[", c(list(x = array_of_not_found_files), + indices_of_first_file, list(drop = FALSE))) + previous_indices <- rep(-1, length(indices_of_first_file)) + names(previous_indices) <- names(indices_of_first_file) + first_found_file <- NULL + if (length(return_vars) > 0) { + first_found_file <- rep(FALSE, length(which(sapply(return_vars, + length) == 0))) + names(first_found_file) <- names(return_vars[which(sapply(return_vars, + length) == 0)]) + } + for (j in 1:length(array_of_var_files)) { + current_indices <- which(array_of_var_indices == + j, arr.ind = TRUE)[1, ] + names(current_indices) <- names(indices_of_first_file) + if (!is.na(array_of_var_files[j]) && !array_of_not_found_var_files[j]) { + changed_dims <- which(current_indices != previous_indices) + vars_to_read <- NULL + if (length(return_vars) > 0) { + vars_to_read <- names(return_vars)[sapply(return_vars, + function(x) any(names(changed_dims) %in% + x))] + } + if (!is.null(first_found_file)) { + if (any(!first_found_file)) { + vars_to_read <- c(vars_to_read, names(first_found_file[which(!first_found_file)])) + } + } + if ((i == 1) && (length(common_return_vars) > + 0)) { + vars_to_read <- c(vars_to_read, names(common_return_vars)[sapply(common_return_vars, + function(x) any(names(changed_dims) %in% + x))]) + } + if (!is.null(common_first_found_file)) { + if (any(!common_first_found_file)) { + vars_to_read <- c(vars_to_read, names(common_first_found_file[which(!common_first_found_file)])) + } + } + file_object <- file_opener(array_of_var_files[j]) + if (!is.null(file_object)) { + for (var_to_read in vars_to_read) { + if (var_to_read %in% unlist(var_params)) { + associated_dim_name <- names(var_params)[which(unlist(var_params) == + var_to_read)] + } + var_name_to_reader <- var_to_read + names(var_name_to_reader) <- "var" + var_dims <- file_dim_reader(NULL, file_object, + var_name_to_reader, NULL, synonims) + names(var_dims) <- sapply(names(var_dims), + function(x) { + which_entry <- which(sapply(synonims, + function(y) x %in% y)) + if (length(which_entry) > 0) { + names(synonims)[which_entry] + } else { + x + } + }) + if (!is.null(var_dims)) { + var_file_dims <- NULL + if (var_to_read %in% names(common_return_vars)) { + var_to_check <- common_return_vars[[var_to_read]] + } else { + var_to_check <- return_vars[[var_to_read]] + } + if (any(names(dim(array_of_files_to_load)) %in% + var_to_check)) { + var_file_dims <- dim(array_of_files_to_load)[which(names(dim(array_of_files_to_load)) %in% + var_to_check)] + } + if (((var_to_read %in% names(common_return_vars)) && + is.null(picked_common_vars[[var_to_read]])) || + ((var_to_read %in% names(return_vars)) && + is.null(picked_vars[[i]][[var_to_read]]))) { + if (any(names(var_file_dims) %in% names(var_dims))) { + stop("Found a requested var in 'return_var' requested for a ", + "file dimension which also appears in the dimensions of ", + "the variable inside the file.\n", + array_of_var_files[j]) + } + special_types <- list(POSIXct = as.POSIXct, + POSIXlt = as.POSIXlt, Date = as.Date) + first_sample <- file_var_reader(NULL, + file_object, NULL, var_to_read, synonims) + if (any(class(first_sample) %in% names(special_types))) { + array_size <- prod(c(var_file_dims, + var_dims)) + new_array <- rep(special_types[[class(first_sample)[1]]](NA), + array_size) + dim(new_array) <- c(var_file_dims, + var_dims) + } else { + new_array <- array(dim = c(var_file_dims, + var_dims)) + } + attr(new_array, "variables") <- attr(first_sample, + "variables") + if (var_to_read %in% names(common_return_vars)) { + picked_common_vars[[var_to_read]] <- new_array + pick_ordered <- FALSE + if (var_to_read %in% unlist(var_params)) { + if (associated_dim_name %in% names(dim_reorder_param) && + !aiat) { + picked_common_vars_ordered[[var_to_read]] <- new_array + pick_ordered <- TRUE + } + } + if (!pick_ordered) { + picked_common_vars_ordered[[var_to_read]] <- NULL + } + } else { + picked_vars[[i]][[var_to_read]] <- new_array + pick_ordered <- FALSE + if (var_to_read %in% unlist(var_params)) { + if (associated_dim_name %in% names(dim_reorder_params) && + !aiat) { + picked_vars_ordered[[i]][[var_to_read]] <- new_array + pick_ordered <- TRUE + } + } + if (!pick_ordered) { + picked_vars_ordered[[i]][[var_to_read]] <- NULL + } + } + } else { + if (var_to_read %in% names(common_return_vars)) { + array_var_dims <- dim(picked_common_vars[[var_to_read]]) + } else { + array_var_dims <- dim(picked_vars[[i]][[var_to_read]]) + } + full_array_var_dims <- array_var_dims + if (any(names(array_var_dims) %in% + names(var_file_dims))) { + array_var_dims <- array_var_dims[-which(names(array_var_dims) %in% + names(var_file_dims))] + } + if (names(array_var_dims) != names(var_dims)) { + stop("Error while reading the variable '", + var_to_read, "' from ", "the file. Dimensions do not match.\nExpected ", + paste(paste0("'", names(array_var_dims), + "'"), collapse = ", "), " but found ", + paste(paste0("'", names(var_dims), + "'"), collapse = ", "), ".\n", + array_of_var_files[j]) + } + if (any(var_dims > array_var_dims)) { + longer_dims <- which(var_dims > array_var_dims) + if (length(longer_dims) == 1) { + longer_dims_in_full_array <- longer_dims + if (any(names(full_array_var_dims) %in% + names(var_file_dims))) { + candidates <- (1:length(full_array_var_dims))[-which(names(full_array_var_dims) %in% + names(var_file_dims))] + longer_dims_in_full_array <- candidates[longer_dims] + } + padding_dims <- full_array_var_dims + padding_dims[longer_dims_in_full_array] <- var_dims[longer_dims] - + array_var_dims[longer_dims] + special_types <- list(POSIXct = as.POSIXct, + POSIXlt = as.POSIXlt, Date = as.Date) + if (var_to_read %in% names(common_return_vars)) { + var_class <- class(picked_common_vars[[var_to_read]]) + } else { + var_class <- class(picked_vars[[i]][[var_to_read]]) + } + if (any(var_class %in% names(special_types))) { + padding_size <- prod(padding_dims) + padding <- rep(special_types[[var_class[1]]](NA), + padding_size) + dim(padding) <- padding_dims + } else { + padding <- array(dim = padding_dims) + } + if (var_to_read %in% names(common_return_vars)) { + picked_common_vars[[var_to_read]] <- .abind2(picked_common_vars[[var_to_read]], + padding, names(full_array_var_dims)[longer_dims_in_full_array]) + } else { + picked_vars[[i]][[var_to_read]] <- .abind2(picked_vars[[i]][[var_to_read]], + padding, names(full_array_var_dims)[longer_dims_in_full_array]) + } + } else { + stop("Error while reading the variable '", + var_to_read, "' from ", "the file. Found size (", + paste(var_dims, collapse = " x "), + ") is greater than expected maximum size (", + array_var_dims, ").") + } + } + } + var_store_indices <- c(as.list(current_indices[names(var_file_dims)]), + lapply(var_dims, function(x) 1:x)) + var_values <- file_var_reader(NULL, file_object, + NULL, var_to_read, synonims) + if (var_to_read %in% unlist(var_params)) { + if ((associated_dim_name %in% names(dim_reorder_params)) && + !aiat) { + if (length(dim(var_values)) > 1) { + stop("Requested a '", associated_dim_name, + "_reorder' for a dimension ", + "whose coordinate variable that has more than 1 dimension. This is ", + "not supported.") + } + ordered_var_values <- dim_reorder_params[[associated_dim_name]](var_values) + attr(ordered_var_values$x, "variables") <- attr(var_values, + "variables") + if (!all(c("x", "ix") %in% names(ordered_var_values))) { + stop("All the dimension reorder functions must return a list with the components 'x' and 'ix'.") + } + unorder <- sort(ordered_var_values$ix, + index.return = TRUE)$ix + if (var_to_read %in% names(common_return_vars)) { + picked_common_vars_ordered[[var_to_read]] <- do.call("[<-", + c(list(x = picked_common_vars_ordered[[var_to_read]]), + var_store_indices, list(value = ordered_var_values$x))) + picked_common_vars_unorder_indices[[var_to_read]] <- do.call("[<-", + c(list(x = picked_common_vars_unorder_indices[[var_to_read]]), + var_store_indices, list(value = unorder))) + } else { + picked_vars_ordered[[i]][[var_to_read]] <- do.call("[<-", + c(list(x = picked_vars_ordered[[i]][[var_to_read]]), + var_store_indices, list(value = ordered_var_values$x))) + picked_vars_unorder_indices[[i]][[var_to_read]] <- do.call("[<-", + c(list(x = picked_vars_unorder_indices[[i]][[var_to_read]]), + var_store_indices, list(value = unorder))) + } + } + } + if (var_to_read %in% names(common_return_vars)) { + picked_common_vars[[var_to_read]] <- do.call("[<-", + c(list(x = picked_common_vars[[var_to_read]]), + var_store_indices, list(value = var_values))) + } else { + picked_vars[[i]][[var_to_read]] <- do.call("[<-", + c(list(x = picked_vars[[i]][[var_to_read]]), + var_store_indices, list(value = var_values))) + } + if (var_to_read %in% names(first_found_file)) { + first_found_file[var_to_read] <- TRUE + } + if (var_to_read %in% names(common_first_found_file)) { + common_first_found_file[var_to_read] <- TRUE + } + } else { + stop("Could not find variable '", var_to_read, + "' in the file ", array_of_var_files[j]) + } + } + file_closer(file_object) + } + } + previous_indices <- current_indices + } + } + } + transform_indices <- function(v, n, m) { + unique2 <- function(v) { + if (length(v) < 2) { + v + } else { + v[c(1, v[2:length(v)] - v[1:(length(v) - 1)]) != + 0] + } + } + unique2(round(((v - 1)/(n - 1)) * (m - 1))) + 1 + } + beta <- transform_extra_cells + dims_to_crop <- vector("list") + transformed_vars <- vector("list", length = length(dat)) + names(transformed_vars) <- dat_names + transformed_vars_ordered <- transformed_vars + transformed_vars_unorder_indices <- transformed_vars + transformed_common_vars <- NULL + transformed_common_vars_ordered <- NULL + transformed_common_vars_unorder_indices <- NULL + for (i in 1:length(dat)) { + if (dataset_has_files[i]) { + indices <- indices_of_first_files_with_data[[i]] + if (!is.null(indices)) { + file_path <- do.call("[", c(list(array_of_files_to_load), + as.list(indices_of_first_files_with_data[[i]]))) + data_dims <- NULL + if (length(unlist(var_params[expected_inner_dims[[i]]])) < + length(expected_inner_dims[[i]])) { + file_to_open <- file_path + data_dims <- file_dim_reader(file_to_open, + NULL, selectors_of_first_files_with_data[[i]], + lapply(dat[[i]][["selectors"]][expected_inner_dims[[i]]], + "[[", 1), synonims) + names(data_dims) <- sapply(names(data_dims), + function(x) { + which_entry <- which(sapply(synonims, function(y) x %in% + y)) + if (length(which_entry) > 0) { + names(synonims)[which_entry] + } else { + x + } + }) + } + if (!is.null(transform) && (length(transform_vars) > + 0)) { + if (!all(transform_vars %in% c(names(picked_vars[[i]]), + names(picked_common_vars)))) { + stop("Could not find all the required variables in 'transform_vars' ", + "for the dataset '", dat[[i]][["name"]], + "'.") + } + vars_to_transform <- NULL + picked_vars_to_transform <- which(names(picked_vars[[i]]) %in% + transform_vars) + if (length(picked_vars_to_transform) > 0) { + picked_vars_to_transform <- names(picked_vars[[i]])[picked_vars_to_transform] + new_vars_to_transform <- picked_vars[[i]][picked_vars_to_transform] + which_are_ordered <- which(!sapply(picked_vars_ordered[[i]][picked_vars_to_transform], + is.null)) + if (length(which_are_ordered) > 0) { + new_vars_to_transform[which_are_ordered] <- picked_vars_ordered[[i]][which_are_ordered] + } + vars_to_transform <- c(vars_to_transform, + new_vars_to_transform) + } + picked_common_vars_to_transform <- which(names(picked_common_vars) %in% + transform_vars) + if (length(picked_common_vars_to_transform) > + 0) { + picked_common_vars_to_transform <- names(picked_common_vars)[picked_common_vars_to_transform] + new_vars_to_transform <- picked_common_vars[[i]][picked_common_vars_to_transform] + which_are_ordered <- which(!sapply(picked_common_vars_ordered[[i]][picked_common_vars_to_transform], + is.null)) + if (length(which_are_ordered) > 0) { + new_vars_to_transform[which_are_ordered] <- picked_common_vars_ordered[[i]][which_are_ordered] + } + vars_to_transform <- c(vars_to_transform, + new_vars_to_transform) + } + transformed_data <- do.call(transform, c(list(data_array = NULL, + variables = vars_to_transform, file_selectors = selectors_of_first_files_with_data[[i]]), + transform_params)) + if (!is.null(transformed_common_vars)) { + common_ones <- which(names(picked_common_vars) %in% + names(transformed_data$variables)) + if (length(common_ones) > 0) { + transformed_data$variables <- transformed_data$variables[-common_ones] + } + } + transformed_vars[[i]] <- list() + transformed_vars_ordered[[i]] <- list() + transformed_vars_unorder_indices[[i]] <- list() + for (var_to_read in names(transformed_data$variables)) { + if (var_to_read %in% unlist(var_params)) { + associated_dim_name <- names(var_params)[which(unlist(var_params) == + var_to_read)] + if ((associated_dim_name %in% names(dim_reorder_params)) && + aiat) { + if (length(dim(transformed_data$variables[[associated_dim_name]])) > + 1) { + stop("Requested a '", associated_dim_name, + "_reorder' for a dimension ", "whose coordinate variable that has more than 1 dimension (after ", + "transform). This is not supported.") + } + ordered_var_values <- dim_reorder_params[[associated_dim_name]](transformed_data$variables[[associated_dim_name]]) + attr(ordered_var_values, "variables") <- attr(transformed_data$variables[[associated_dim_name]], + "variables") + if (!all(c("x", "ix") %in% names(ordered_var_values))) { + stop("All the dimension reorder functions must return a list with the components 'x' and 'ix'.") + } + unorder <- sort(ordered_var_values$ix, + index.return = TRUE)$ix + if (var_to_read %in% names(picked_common_vars)) { + transformed_common_vars_ordered[[var_to_read]] <- ordered_var_values$x + transformed_common_vars_unorder_indices[[var_to_read]] <- unorder + } + else { + transformed_vars_ordered[[i]][[var_to_read]] <- ordered_var_values$x + transformed_vars_unorder_indices[[i]][[var_to_read]] <- unorder + } + } + } + } + transformed_picked_vars <- which(names(picked_vars[[i]]) %in% + names(transformed_data$variables)) + if (length(transformed_picked_vars) > 0) { + transformed_picked_vars <- names(picked_vars[[i]])[transformed_picked_vars] + transformed_vars[[i]][transformed_picked_vars] <- transformed_data$variables[transformed_picked_vars] + } + if (is.null(transformed_common_vars)) { + transformed_picked_common_vars <- which(names(picked_common_vars) %in% + names(transformed_data$variables)) + if (length(transformed_picked_common_vars) > + 0) { + transformed_picked_common_vars <- names(picked_common_vars)[transformed_picked_common_vars] + transformed_common_vars <- transformed_data$variables[transformed_picked_common_vars] + } + } + } + for (inner_dim in expected_inner_dims[[i]]) { + if (debug) { + print("-> DEFINING INDICES FOR INNER DIMENSION:") + print(inner_dim) + } + file_dim <- NULL + if (inner_dim %in% unlist(inner_dims_across_files)) { + file_dim <- names(inner_dims_across_files)[which(sapply(inner_dims_across_files, + function(x) inner_dim %in% x))[1]] + chunk_amount <- length(dat[[i]][["selectors"]][[file_dim]][[1]]) + names(chunk_amount) <- file_dim + } + else { + chunk_amount <- 1 + } + if ((dat[[i]][["selectors"]][[inner_dim]][[1]] %in% + c("all", "first", "last")) && (chunks[[inner_dim]]["n_chunks"] != + 1)) { + selectors <- dat[[i]][["selectors"]][[inner_dim]][[1]] + if (selectors == "all") { + selectors <- indices(1:(data_dims[[inner_dim]] * + chunk_amount)) + } + else if (selectors == "first") { + selectors <- indices(1) + } + else { + selectors <- indices(data_dims[[inner_dim]] * + chunk_amount) + } + dat[[i]][["selectors"]][[inner_dim]][[1]] <- selectors + } + selector_array <- dat[[i]][["selectors"]][[inner_dim]][[1]] + if (debug) { + if (inner_dim %in% dims_to_check) { + print(paste0("-> DEBUG MESSAGES FOR THE DATASET", + i, " AND INNER DIMENSION '", inner_dim, + "':")) + print("-> STRUCTURE OF SELECTOR ARRAY:") + print(str(selector_array)) + print("-> PICKED VARS:") + print(picked_vars) + print("-> TRANSFORMED VARS:") + print(transformed_vars) + } + } + if (is.null(dim(selector_array))) { + dim(selector_array) <- length(selector_array) + } + if (is.null(names(dim(selector_array)))) { + if (length(dim(selector_array)) == 1) { + names(dim(selector_array)) <- inner_dim + } + else { + stop("Provided selector arrays must be provided with dimension ", + "names. Found an array of selectors without dimension names ", + "for the dimension '", inner_dim, "'.") + } + } + selectors_are_indices <- FALSE + if (!is.null(attr(selector_array, "indices"))) { + if (!is.logical(attr(selector_array, "indices"))) { + stop("The atribute 'indices' for the selectors for the dimension '", + inner_dim, "' must be TRUE or FALSE.") + } + selectors_are_indices <- attr(selector_array, + "indices") + } + taken_chunks <- rep(FALSE, chunk_amount) + selector_file_dims <- 1 + if (any(found_file_dims[[i]] %in% names(dim(selector_array)))) { + selector_file_dims <- dim(selector_array)[which(names(dim(selector_array)) %in% + found_file_dims[[i]])] + } + selector_inner_dims <- dim(selector_array)[which(!(names(dim(selector_array)) %in% + found_file_dims[[i]]))] + var_with_selectors <- NULL + var_with_selectors_name <- var_params[[inner_dim]] + var_ordered <- NULL + var_unorder_indices <- NULL + with_transform <- FALSE + if (!is.null(var_with_selectors_name)) { + if ((var_with_selectors_name %in% transform_vars) && + (!is.null(transform))) { + with_transform <- TRUE + if (!is.null(file_dim)) { + stop("Requested a transformation over the dimension '", + inner_dim, "', wich goes across files. This feature ", + "is not supported. Either do the request without the ", + "transformation or request it over dimensions that do ", + "not go across files.") + } + } + if (debug) { + if (inner_dim %in% dims_to_check) { + print("-> NAME OF THE VARIABLE WITH THE SELECTOR VALUES FOR THE CURRENT INNER DIMENSION:") + print(var_with_selectors_name) + print("-> NAMES OF THE VARIABLES TO BE TRANSFORMED:") + print(transform_vars) + print("-> STRUCTURE OF THE TRANSFORMATION FUNCTION:") + print(str(transform)) + } + } + if (var_with_selectors_name %in% names(picked_vars[[i]])) { + var_with_selectors <- picked_vars[[i]][[var_with_selectors_name]] + var_ordered <- picked_vars_ordered[[i]][[var_with_selectors_name]] + var_unorder_indices <- picked_vars_unorder_indices[[i]][[var_with_selectors_name]] + } + else if (var_with_selectors_name %in% names(picked_common_vars)) { + var_with_selectors <- picked_common_vars[[var_with_selectors_name]] + var_ordered <- picked_common_vars_ordered[[var_with_selectors_name]] + var_unorder_indices <- picked_common_vars_unorder_indices[[var_with_selectors_name]] + } + n <- prod(dim(var_with_selectors)) + if (is.null(var_unorder_indices)) { + var_unorder_indices <- 1:n + } + if (with_transform) { + if (var_with_selectors_name %in% names(transformed_vars[[i]])) { + m <- prod(dim(transformed_vars[[i]][[var_with_selectors_name]])) + if (aiat) { + var_with_selectors <- transformed_vars[[i]][[var_with_selectors_name]] + var_ordered <- transformed_vars_ordered[[i]][[var_with_selectors_name]] + var_unorder_indices <- transformed_vars_unorder_indices[[i]][[var_with_selectors_name]] + } + } + else if (var_with_selectors_name %in% names(transformed_common_vars)) { + m <- prod(dim(transformed_common_vars[[var_with_selectors_name]])) + if (aiat) { + var_with_selectors <- transformed_common_vars[[var_with_selectors_name]] + var_ordered <- transformed_common_vars_ordered[[var_with_selectors_name]] + var_unorder_indices <- transformed_common_vars_unorder_indices[[var_with_selectors_name]] + } + } + if (is.null(var_unorder_indices)) { + var_unorder_indices <- 1:m + } + } + if (debug) { + if (inner_dim %in% dims_to_check) { + print("-> SIZE OF ORIGINAL VARIABLE:") + print(n) + print("-> SIZE OF TRANSFORMED VARIABLE:") + if (with_transform) + print(m) + print("-> STRUCTURE OF ORDERED VAR:") + print(str(var_ordered)) + print("-> UNORDER INDICES:") + print(var_unorder_indices) + } + } + var_dims <- dim(var_with_selectors) + var_file_dims <- 1 + if (any(names(var_dims) %in% found_file_dims[[i]])) { + if (with_transform) { + stop("Requested transformation for inner dimension '", + inner_dim, "' but provided selectors for such dimension ", + "over one or more file dimensions. This is not ", + "supported. Either request no transformation for the ", + "dimension '", inner_dim, "' or specify the ", + "selectors for this dimension without the file dimensions.") + } + var_file_dims <- var_dims[which(names(var_dims) %in% + found_file_dims[[i]])] + var_dims <- var_dims[-which(names(var_dims) %in% + found_file_dims[[i]])] + } + if (inner_dim %in% unlist(inner_dims_across_files)) { + if (length(var_dims) > 1) { + stop("Specified a '", inner_dim, "_var' for the dimension '", + inner_dim, "', which goes across files (across '", + file_dim, "'). The specified variable, '", + var_with_selectors_name, "', has more ", + "than one dimension and can not be used as selector variable. ", + "Select another variable or fix it in the files.") + } + } + var_full_dims <- dim(var_with_selectors) + if (!(inner_dim %in% names(var_full_dims))) { + stop("Could not find the dimension '", + inner_dim, "' in ", "the file. Either change the dimension name in ", + "your request, adjust the parameter ", + "'dim_names_in_files' or fix the dimension name in ", + "the file.\n", file_path) + } + } + else if (((is.numeric(selector_array) || is.list(selector_array)) && + selectors_are_indices) || (is.character(selector_array) && + (length(selector_array) == 1) && (selector_array %in% + c("all", "first", "last")) && !is.null(file_dim_reader))) { + if (!(inner_dim %in% names(data_dims))) { + stop("Could not find the dimension '", + inner_dim, "' in ", "the file. Either change the dimension name in ", + "your request, adjust the parameter ", + "'dim_names_in_files' or fix the dimension name in ", + "the file.\n", file_path) + } + } + else { + stop(paste0("Can not translate the provided selectors for '", + inner_dim, "' to numeric indices. Provide numeric indices and a ", + "'file_dim_reader' function, or a '", inner_dim, + "_var' in order to calculate the indices.")) + } + fri <- first_round_indices <- NULL + sri <- second_round_indices <- NULL + tvi <- tranaformed_variable_indices <- NULL + ordered_fri <- NULL + ordered_sri <- NULL + if ((length(selector_array) == 1) && is.character(selector_array) && + (selector_array %in% c("all", "first", "last")) && + (chunks[[inner_dim]]["n_chunks"] == 1)) { + if (is.null(var_with_selectors_name)) { + fri <- vector("list", length = chunk_amount) + dim(fri) <- c(chunk_amount) + sri <- vector("list", length = chunk_amount) + dim(sri) <- c(chunk_amount) + if (selector_array == "all") { + fri[] <- replicate(chunk_amount, list(1:(data_dims[inner_dim]))) + taken_chunks <- rep(TRUE, chunk_amount) + } + else if (selector_array == "first") { + fri[[1]] <- 1 + taken_chunks[1] <- TRUE + } + else if (selector_array == "last") { + fri[[chunk_amount]] <- data_dims[inner_dim] + taken_chunks[length(taken_chunks)] <- TRUE + } + } + else { + if ((!is.null(file_dim)) && !(file_dim %in% + names(var_file_dims))) { + stop("The variable '", var_with_selectors_name, + "' must also be ", "requested for the file dimension '", + file_dim, "' in ", "this configuration.") + } + fri <- vector("list", length = prod(var_file_dims)) + dim(fri) <- var_file_dims + ordered_fri <- fri + sri <- vector("list", length = prod(var_file_dims)) + dim(sri) <- var_file_dims + ordered_sri <- sri + if (selector_array == "all") { + ordered_fri[] <- replicate(prod(var_file_dims), + list(1:n)) + fri[] <- replicate(prod(var_file_dims), + list(var_unorder_indices[1:n])) + taken_chunks <- rep(TRUE, chunk_amount) + if (!with_transform) { + } + else { + ordered_sri[] <- replicate(prod(var_file_dims), + list(1:m)) + sri[] <- replicate(prod(var_file_dims), + list(1:m)) + tvi <- 1:m + } + } + else if (selector_array == "first") { + taken_chunks[1] <- TRUE + if (!with_transform) { + ordered_fri[[1]] <- 1 + fri[[1]] <- var_unorder_indices[1] + } + else { + if (!aiat) { + ordered_fri[[1]] <- 1 + fri[[1]] <- var_unorder_indices[1] + ordered_sri[[1]] <- 1:ceiling(m/n) + sri[[1]] <- 1:ceiling(m/n) + tvi <- 1:ceiling(m/n) + } + else { + ordered_fri[[1]] <- 1:ceiling(m/n) + fri[[1]] <- var_unorder_indices[1:ceiling(m/n)] + ordered_sri[[1]] <- 1 + sri[[1]] <- 1 + tvi <- 1 + } + } + } + else if (selector_array == "last") { + taken_chunks[length(taken_chunks)] <- TRUE + if (!with_transform) { + ordered_fri[[prod(var_file_dims)]] <- n + fri[[prod(var_file_dims)]] <- var_unorder_indices[n] + } + else { + if (!aiat) { + ordered_fri[[prod(var_file_dims)]] <- prod(var_dims) + fri[[prod(var_file_dims)]] <- var_unorder_indices[prod(var_dims)] + ordered_sri[[prod(var_file_dims)]] <- 1:ceiling(m/n) + sri[[prod(var_file_dims)]] <- 1:ceiling(m/n) + tvi <- 1:ceiling(m/n) + } + else { + ordered_fri[[prod(var_file_dims)]] <- (n - + ceiling(m/n) + 1):n + fri[[prod(var_file_dims)]] <- var_unorder_indices[(n - + ceiling(m/n) + 1):n] + ordered_sri[[prod(var_file_dims)]] <- 1 + sri[[prod(var_file_dims)]] <- 1 + tvi <- 1 + } + } + } + } + } + else { + if (!is.null(var_with_selectors_name)) { + unmatching_file_dims <- which(!(names(var_file_dims) %in% + names(selector_file_dims))) + if ((length(unmatching_file_dims) > 0)) { + raise_error <- FALSE + if (is.null(file_dim)) { + raise_error <- TRUE + } + else { + if (!((length(unmatching_file_dims) == + 1) && (names(var_file_dims)[unmatching_file_dims] == + file_dim) && (inner_dim %in% names(selector_inner_dims)))) { + raise_error <- TRUE + } + } + if (raise_error) { + stop("Provided selectors for the dimension '", + inner_dim, "' must have as many ", + "file dimensions as the variable the dimension is defined along, '", + var_with_selectors_name, "', with the exceptions of the file pattern dimension ('", + found_pattern_dim, "') and any depended file dimension (if specified as ", + "depended dimension in parameter 'inner_dims_across_files' and the ", + "depending file dimension is present in the provided selector array).") + } + } + if (any(names(selector_file_dims) %in% + names(dim(var_with_selectors)))) { + if (any(dim(var_with_selectors)[names(selector_file_dims)] != + selector_file_dims)) { + stop("Size of selector file dimensions must mach size of requested ", + "variable dimensions.") + } + } + } + if (is.null(names(selector_file_dims))) { + if (is.null(file_dim)) { + fri_dims <- 1 + } + else { + fri_dims <- chunk_amount + names(fri_dims) <- file_dim + } + } + else { + fri_dim_names <- names(selector_file_dims) + if (!is.null(file_dim)) { + fri_dim_names <- c(fri_dim_names, file_dim) + } + fri_dim_names <- found_file_dims[[i]][which(found_file_dims[[i]] %in% + fri_dim_names)] + fri_dims <- rep(NA, length(fri_dim_names)) + names(fri_dims) <- fri_dim_names + fri_dims[names(selector_file_dims)] <- selector_file_dims + if (!is.null(file_dim)) { + fri_dims[file_dim] <- chunk_amount + } + } + fri <- vector("list", length = prod(fri_dims)) + dim(fri) <- fri_dims + sri <- vector("list", length = prod(fri_dims)) + dim(sri) <- fri_dims + selector_file_dim_array <- array(1:prod(selector_file_dims), + dim = selector_file_dims) + selector_store_position <- fri_dims + for (j in 1:prod(dim(selector_file_dim_array))) { + selector_indices_to_take <- which(selector_file_dim_array == + j, arr.ind = TRUE)[1, ] + names(selector_indices_to_take) <- names(selector_file_dims) + selector_store_position[names(selector_indices_to_take)] <- selector_indices_to_take + sub_array_of_selectors <- Subset(selector_array, + names(selector_indices_to_take), as.list(selector_indices_to_take), + drop = "selected") + if (debug) { + if (inner_dim %in% dims_to_check) { + print("-> ITERATING OVER FILE DIMENSIONS OF THE SELECTORS.") + print("-> STRUCTURE OF A SUB ARRAY:") + print(str(sub_array_of_selectors)) + print("-> STRUCTURE OF THE VARIABLE WITH SELECTORS:") + print(str(var_with_selectors)) + print(dim(var_with_selectors)) + } + } + if (selectors_are_indices) { + sub_array_of_values <- NULL + } + else { + if (length(var_file_dims) > 0) { + var_indices_to_take <- selector_indices_to_take[which(names(selector_indices_to_take) %in% + names(var_file_dims))] + sub_array_of_values <- Subset(var_with_selectors, + names(var_indices_to_take), as.list(var_indices_to_take), + drop = "selected") + } + else { + sub_array_of_values <- var_with_selectors + } + } + if (debug) { + if (inner_dim %in% dims_to_check) { + print("-> STRUCTURE OF THE SUB ARRAY FROM THE VARIABLE CORRESPONDING TO THE SUB ARRAY OF SELECTORS") + print(str(sub_array_of_values)) + print(dim(sub_array_of_values)) + print("-> NAME OF THE FILE DIMENSION THE CURRENT INNER DIMENSION EXTENDS ALONG:") + print(file_dim) + } + } + if ((!is.null(file_dim) && (file_dim %in% + names(selector_file_dims))) || is.null(file_dim)) { + if (length(sub_array_of_selectors) > + 0) { + if (debug) { + if (inner_dim %in% dims_to_check) { + print("-> THE INNER DIMENSION DOES NOT GO ACROSS ANY FILE DIMENSION OR IT DOES BUT IS IN THE PROVIDED SELECTOR ARRAY.") + } + } + if (selectors_are_indices) { + if (!is.null(var_with_selectors_name)) { + max_allowed <- ifelse(aiat, m, + n) + } + else { + max_allowed <- data_dims[inner_dim] + } + if (any(na.omit(unlist(sub_array_of_selectors)) > + max_allowed) || any(na.omit(unlist(sub_array_of_selectors)) < + 1)) { + stop("Provided indices out of range for dimension '", + inner_dim, "' ", "for dataset '", + dat[[i]][["name"]], "' (accepted range: 1 to ", + max_allowed, ").") + } + } + goes_across_prime_meridian <- FALSE + if (!is.null(var_ordered) && !selectors_are_indices) { + if (!is.null(dim_reorder_params[[inner_dim]])) { + if (is.list(sub_array_of_selectors)) { + sub_array_reordered <- dim_reorder_params[[inner_dim]](unlist(sub_array_of_selectors)) + sub_array_unorder <- sort(sub_array_reordered$ix, + index.return = TRUE)$ix + sub_array_of_selectors <- as.list(sub_array_reordered$x[sub_array_unorder]) + is_circular_dim <- attr(dim_reorder_params[[inner_dim]], + "circular") + if (!is.null(is_circular_dim)) { + if (is_circular_dim) { + goes_across_prime_meridian <- abs(sub_array_of_selectors[[1]]) > + abs(sub_array_of_selectors[[2]]) + } + } + } + else { + sub_array_of_selectors <- dim_reorder_params[[inner_dim]](sub_array_of_selectors)$x + } + } + sub_array_of_indices <- selector_checker(sub_array_of_selectors, + var_ordered, tolerance = if (aiat) { + NULL + } + else { + tolerance_params[[inner_dim]] + }) + } + else { + sub_array_of_indices <- selector_checker(sub_array_of_selectors, + sub_array_of_values, tolerance = if (aiat) { + NULL + } + else { + tolerance_params[[inner_dim]] + }) + } + sub_array_of_indices <- sub_array_of_indices[chunk_indices(length(sub_array_of_indices), + chunks[[inner_dim]]["chunk"], chunks[[inner_dim]]["n_chunks"], + inner_dim)] + if (debug) { + if (inner_dim %in% dims_to_check) { + print("-> TRANSFORMATION REQUESTED?") + print(with_transform) + print("-> BETA:") + print(beta) + } + } + if (with_transform) { + if (debug) { + if (inner_dim %in% dims_to_check) { + print("-> SELECTORS REQUESTED BEFORE TRANSFORM.") + } + } + if (goes_across_prime_meridian) { + sub_array_of_fri <- 1:n + } + else { + if (is.list(sub_array_of_indices)) { + sub_array_of_indices <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]] + } + first_index <- min(unlist(sub_array_of_indices)) + last_index <- max(unlist(sub_array_of_indices)) + start_padding <- min(beta, first_index - + 1) + end_padding <- min(beta, n - last_index) + sub_array_of_fri <- (first_index - + start_padding):(last_index + + end_padding) + } + subset_vars_to_transform <- vars_to_transform + if (!is.null(var_ordered)) { + subset_vars_to_transform[[var_with_selectors_name]] <- Subset(var_ordered, + inner_dim, sub_array_of_fri) + } + else { + subset_vars_to_transform[[var_with_selectors_name]] <- Subset(sub_array_of_values, + inner_dim, sub_array_of_fri) + } + transformed_subset_var <- do.call(transform, + c(list(data_array = NULL, variables = subset_vars_to_transform, + file_selectors = selectors_of_first_files_with_data[[i]]), + transform_params))$variables[[var_with_selectors_name]] + if (!is.null(dim_reorder_params[[inner_dim]])) { + transformed_subset_var_reorder <- dim_reorder_params[[inner_dim]](transformed_subset_var) + transformed_subset_var <- transformed_subset_var_reorder$x + transformed_subset_var_unorder <- sort(transformed_subset_var_reorder$ix, + index.return = TRUE)$ix + } + else { + transformed_subset_var_unorder <- 1:length(transformed_subset_var) + } + sub_array_of_sri <- selector_checker(sub_array_of_selectors, + transformed_subset_var, tolerance = if (aiat) { + tolerance_params[[inner_dim]] + } + else { + NULL + }) + if (goes_across_prime_meridian) { + sub_array_of_sri <- c(1:sub_array_of_sri[[2]], + sub_array_of_sri[[1]]:length(transformed_subset_var)) + } + else if (is.list(sub_array_of_sri)) { + sub_array_of_sri <- sub_array_of_sri[[1]]:sub_array_of_sri[[2]] + } + ordered_sri <- sub_array_of_sri + sub_array_of_sri <- transformed_subset_var_unorder[sub_array_of_sri] + if (debug) { + if (inner_dim %in% dims_to_check) { + print("-> FIRST INDEX:") + print(first_index) + print("-> LAST INDEX:") + print(last_index) + print("-> STRUCTURE OF FIRST ROUND INDICES:") + print(str(sub_array_of_fri)) + print("-> STRUCTURE OF SECOND ROUND INDICES:") + print(str(sub_array_of_sri)) + print("-> STRUCTURE OF TRANSFORMED VARIABLE INDICES:") + print(str(tvi)) + } + } + sri <- do.call("[[<-", c(list(x = sri), + as.list(selector_store_position), + list(value = sub_array_of_sri))) + } + else { + if (goes_across_prime_meridian) { + sub_array_of_fri <- c(1:sub_array_of_indices[[2]], + sub_array_of_indices[[1]]:n) + } + else if (is.list(sub_array_of_indices)) { + sub_array_of_fri <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]] + } + else { + sub_array_of_fri <- sub_array_of_indices + } + } + if (!is.null(var_unorder_indices)) { + if (is.null(ordered_fri)) { + ordered_fri <- sub_array_of_fri + } + sub_array_of_fri <- var_unorder_indices[sub_array_of_fri] + } + fri <- do.call("[[<-", c(list(x = fri), + as.list(selector_store_position), + list(value = sub_array_of_fri))) + if (!is.null(file_dim)) { + taken_chunks[selector_store_position[[file_dim]]] <- TRUE + } + else { + taken_chunks <- TRUE + } + } + } + else { + if (debug) { + if (inner_dim %in% dims_to_check) { + print("-> THE INNER DIMENSION GOES ACROSS A FILE DIMENSION.") + } + } + if (inner_dim %in% names(dim(sub_array_of_selectors))) { + if (is.null(var_with_selectors_name)) { + if (any(na.omit(unlist(sub_array_of_selectors)) < + 1) || any(na.omit(unlist(sub_array_of_selectors)) > + data_dims[inner_dim] * chunk_amount)) { + stop("Provided indices out of range for dimension '", + inner_dim, "' ", "for dataset '", + dat[[i]][["name"]], "' (accepted range: 1 to ", + data_dims[inner_dim] * chunk_amount, + ").") + } + } + else { + if (inner_dim %in% names(dim(sub_array_of_values))) { + inner_dim_pos_in_sub_array <- which(names(dim(sub_array_of_values)) == + inner_dim) + if (inner_dim_pos_in_sub_array != + 1) { + new_sub_array_order <- (1:length(dim(sub_array_of_values)))[-inner_dim_pos_in_sub_array] + new_sub_array_order <- c(inner_dim_pos_in_sub_array, + new_sub_array_order) + sub_array_of_values <- .aperm2(sub_array_of_values, + new_sub_array_order) + } + } + } + inner_dim_pos_in_sub_array <- which(names(dim(sub_array_of_selectors)) == + inner_dim) + if (inner_dim_pos_in_sub_array != 1) { + new_sub_array_order <- (1:length(dim(sub_array_of_selectors)))[-inner_dim_pos_in_sub_array] + new_sub_array_order <- c(inner_dim_pos_in_sub_array, + new_sub_array_order) + sub_array_of_selectors <- .aperm2(sub_array_of_selectors, + new_sub_array_order) + } + sub_array_of_indices <- selector_checker(sub_array_of_selectors, + sub_array_of_values, tolerance = tolerance_params[[inner_dim]]) + if (is.list(sub_array_of_indices)) { + sub_array_of_indices <- sub_array_of_indices[[1]]:sub_array_of_indices[[2]] + } + sub_array_of_indices <- sub_array_of_indices[chunk_indices(length(sub_array_of_indices), + chunks[[inner_dim]]["chunk"], chunks[[inner_dim]]["n_chunks"], + inner_dim)] + sub_array_is_list <- FALSE + if (is.list(sub_array_of_indices)) { + sub_array_is_list <- TRUE + sub_array_of_indices <- unlist(sub_array_of_indices) + } + if (is.null(var_with_selectors_name)) { + indices_chunk <- floor((sub_array_of_indices - + 1)/data_dims[inner_dim]) + 1 + transformed_indices <- ((sub_array_of_indices - + 1)%%data_dims[inner_dim]) + 1 + } + else { + indices_chunk <- floor((sub_array_of_indices - + 1)/var_full_dims[inner_dim]) + + 1 + transformed_indices <- ((sub_array_of_indices - + 1)%%var_full_dims[inner_dim]) + + 1 + } + if (sub_array_is_list) { + sub_array_of_indices <- as.list(sub_array_of_indices) + } + if (debug) { + if (inner_dim %in% dims_to_check) { + print("-> GOING TO ITERATE ALONG CHUNKS.") + } + } + for (chunk in 1:chunk_amount) { + if (!is.null(names(selector_store_position))) { + selector_store_position[file_dim] <- chunk + } + else { + selector_store_position <- chunk + } + chunk_selectors <- transformed_indices[which(indices_chunk == + chunk)] + sub_array_of_indices <- chunk_selectors + if (with_transform) { + if (!aiat) { + first_index <- min(unlist(sub_array_of_indices)) + last_index <- max(unlist(sub_array_of_indices)) + sub_array_of_fri <- max(c(first_index - + beta, 1)):min(c(last_index + + beta, n)) + sub_array_of_sri <- transform_indices(unlist(sub_array_of_indices) - + first_index + 1, n, m) + if (is.list(sub_array_of_indices)) { + if (length(sub_array_of_sri) > + 1) { + sub_array_of_sri <- sub_array_of_sri[[1]]:sub_array_of_sri[[2]] + } + } + } + else { + first_index <- min(unlist(sub_array_of_indices)) + last_index <- max(unlist(sub_array_of_indices)) + first_index_before_transform <- max(transform_indices(first_index, + m, n) - beta, 1) + last_index_before_transform <- min(transform_indices(last_index, + m, n) + beta, n) + sub_array_of_fri <- first_index_before_transform:last_index_before_transform + if (is.list(sub_array_of_indices) && + (length(sub_array_of_indices) > + 1)) { + sub_array_of_sri <- 1:(last_index - + first_index + 1) + round(beta/n * + m) + } + else { + sub_array_of_sri <- sub_array_of_indices - + first_index + 1 + round(beta/n * + m) + } + } + sri <- do.call("[[<-", c(list(x = sri), + as.list(selector_store_position), + list(value = sub_array_of_sri))) + if (length(sub_array_of_sri) > + 0) { + taken_chunks[chunk] <- TRUE + } + } + else { + sub_array_of_fri <- sub_array_of_indices + if (length(sub_array_of_fri) > + 0) { + taken_chunks[chunk] <- TRUE + } + } + if (!is.null(var_unorder_indices)) { + ordered_fri <- sub_array_of_fri + sub_array_of_fri <- var_unorder_indices[sub_array_of_fri] + } + fri <- do.call("[[<-", c(list(x = fri), + as.list(selector_store_position), + list(value = sub_array_of_fri))) + } + if (debug) { + if (inner_dim %in% dims_to_check) { + print("-> FINISHED ITERATING ALONG CHUNKS") + } + } + } + else { + stop("Provided array of indices for dimension '", + inner_dim, "', ", "which goes across the file dimension '", + file_dim, "', but ", "the provided array does not have the dimension '", + inner_dim, "', which is mandatory.") + } + } + } + } + if (debug) { + if (inner_dim %in% dims_to_check) { + print("-> PROCEEDING TO CROP VARIABLES") + } + } + empty_chunks <- which(!taken_chunks) + if (length(empty_chunks) >= length(taken_chunks)) { + stop("Selectors do not match any of the possible values for the dimension '", + inner_dim, "'.") + } + if (length(empty_chunks) > 0) { + chunks_to_keep <- which(taken_chunks) + dims_to_crop[[file_dim]] <- c(dims_to_crop[[file_dim]], + list(chunks_to_keep)) + } + dat[[i]][["selectors"]][[inner_dim]] <- list(fri = fri, + sri = sri) + types_of_var_to_crop <- "picked" + if (with_transform) { + types_of_var_to_crop <- c(types_of_var_to_crop, + "transformed") + } + if (!is.null(dim_reorder_params[[inner_dim]])) { + types_of_var_to_crop <- c(types_of_var_to_crop, + "reordered") + } + for (type_of_var_to_crop in types_of_var_to_crop) { + if (type_of_var_to_crop == "transformed") { + if (is.null(tvi)) { + if (!is.null(dim_reorder_params[[inner_dim]])) { + crop_indices <- unique(unlist(ordered_sri)) + } + else { + crop_indices <- unique(unlist(sri)) + } + } + else { + crop_indices <- unique(unlist(tvi)) + } + vars_to_crop <- transformed_vars[[i]] + common_vars_to_crop <- transformed_common_vars + } + else if (type_of_var_to_crop == "reordered") { + crop_indices <- unique(unlist(ordered_fri)) + vars_to_crop <- picked_vars_ordered[[i]] + common_vars_to_crop <- picked_common_vars_ordered + } + else { + crop_indices <- unique(unlist(fri)) + vars_to_crop <- picked_vars[[i]] + common_vars_to_crop <- picked_common_vars + } + for (var_to_crop in names(vars_to_crop)) { + if (inner_dim %in% names(dim(vars_to_crop[[var_to_crop]]))) { + if (!is.null(crop_indices)) { + if (type_of_var_to_crop == "transformed") { + if (!aiat) { + vars_to_crop[[var_to_crop]] <- Subset(transformed_subset_var, + inner_dim, crop_indices) + } + else { + vars_to_crop[[var_to_crop]] <- Subset(vars_to_crop[[var_to_crop]], + inner_dim, crop_indices) + } + } + else { + vars_to_crop[[var_to_crop]] <- Subset(vars_to_crop[[var_to_crop]], + inner_dim, crop_indices) + } + } + } + } + if (i == length(dat)) { + for (common_var_to_crop in names(common_vars_to_crop)) { + if (inner_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]]))) { + common_vars_to_crop[[common_var_to_crop]] <- Subset(common_vars_to_crop[[common_var_to_crop]], + inner_dim, crop_indices) + } + } + } + if (type_of_var_to_crop == "transformed") { + if (!is.null(vars_to_crop)) { + transformed_vars[[i]] <- vars_to_crop + } + if (i == length(dat)) { + transformed_common_vars <- common_vars_to_crop + } + } + else if (type_of_var_to_crop == "reordered") { + if (!is.null(vars_to_crop)) { + picked_vars_ordered[[i]] <- vars_to_crop + } + if (i == length(dat)) { + picked_common_vars_ordered <- common_vars_to_crop + } + } + else { + if (!is.null(vars_to_crop)) { + picked_vars[[i]] <- vars_to_crop + } + if (i == length(dat)) { + picked_common_vars <- common_vars_to_crop + } + } + } + } + } + } + } + for (file_dim in names(dims_to_crop)) { + indices_to_keep <- unique(unlist(dims_to_crop[[file_dim]])) + array_of_files_to_load <- Subset(array_of_files_to_load, + file_dim, indices_to_keep) + array_of_not_found_files <- Subset(array_of_not_found_files, + file_dim, indices_to_keep) + for (i in 1:length(dat)) { + for (selector_dim in names(dat[[i]][["selectors"]])) { + if (selector_dim == file_dim) { + for (j in 1:length(dat[[i]][["selectors"]][[selector_dim]][["fri"]])) { + dat[[i]][["selectors"]][[selector_dim]][["fri"]][[j]] <- dat[[i]][["selectors"]][[selector_dim]][["fri"]][[j]][indices_to_keep] + } + for (j in 1:length(dat[[i]][["selectors"]][[selector_dim]][["sri"]])) { + dat[[i]][["selectors"]][[selector_dim]][["sri"]][[j]] <- dat[[i]][["selectors"]][[selector_dim]][["sri"]][[j]][indices_to_keep] + } + } + if (file_dim %in% names(dim(dat[[i]][["selectors"]][[selector_dim]][["fri"]]))) { + dat[[i]][["selectors"]][[selector_dim]][["fri"]] <- Subset(dat[[i]][["selectors"]][[selector_dim]][["fri"]], + file_dim, indices_to_keep) + dat[[i]][["selectors"]][[selector_dim]][["sri"]] <- Subset(dat[[i]][["selectors"]][[selector_dim]][["sri"]], + file_dim, indices_to_keep) + } + } + for (picked_var in names(picked_vars[[i]])) { + if (file_dim %in% names(dim(picked_vars[[i]][[picked_var]]))) { + picked_vars[[i]][[picked_var]] <- Subset(picked_vars[[i]][[picked_var]], + file_dim, indices_to_keep) + } + } + for (transformed_var in names(transformed_vars[[i]])) { + if (file_dim %in% names(dim(transformed_vars[[i]][[transformed_var]]))) { + transformed_vars[[i]][[transformed_var]] <- Subset(transformed_vars[[i]][[transformed_var]], + file_dim, indices_to_keep) + } + } + } + for (picked_common_var in names(picked_common_vars)) { + if (file_dim %in% names(dim(picked_common_vars[[picked_common_var]]))) { + picked_common_vars[[picked_common_var]] <- Subset(picked_common_vars[[picked_common_var]], + file_dim, indices_to_keep) + } + } + for (transformed_common_var in names(transformed_common_vars)) { + if (file_dim %in% names(dim(transformed_common_vars[[transformed_common_var]]))) { + transformed_common_vars[[transformed_common_var]] <- Subset(transformed_common_vars[[transformed_common_var]], + file_dim, indices_to_keep) + } + } + } + total_inner_dims <- NULL + for (i in 1:length(dat)) { + if (dataset_has_files[i]) { + inner_dims <- expected_inner_dims[[i]] + inner_dims <- sapply(inner_dims, function(x) { + if (!all(sapply(dat[[i]][["selectors"]][[x]][["sri"]], + is.null))) { + max(sapply(dat[[i]][["selectors"]][[x]][["sri"]], + length)) + } + else { + if (length(var_params[[x]]) > 0) { + if (var_params[[x]] %in% names(transformed_vars[[i]])) { + length(transformed_vars[[i]][[var_params[[x]]]]) + } + else if (var_params[[x]] %in% names(transformed_common_vars)) { + length(transformed_common_vars[[var_params[[x]]]]) + } + else { + max(sapply(dat[[i]][["selectors"]][[x]][["fri"]], + length)) + } + } + else { + max(sapply(dat[[i]][["selectors"]][[x]][["fri"]], + length)) + } + } + }) + names(inner_dims) <- expected_inner_dims[[i]] + if (is.null(total_inner_dims)) { + total_inner_dims <- inner_dims + } + else { + new_dims <- .MergeArrayDims(total_inner_dims, + inner_dims) + total_inner_dims <- new_dims[[3]] + } + } + } +print('total_inner_dims') +print(total_inner_dims) + + new_dims <- .MergeArrayDims(dim(array_of_files_to_load), + total_inner_dims) + final_dims <- new_dims[[3]][dim_names] +print('final_dims in the beginning') +print(final_dims) + final_dims_fake <- final_dims + if (merge_across_dims) { + if (!is.null(inner_dims_across_files)) { + for (file_dim_across in names(inner_dims_across_files)) { + inner_dim_pos <- which(names(final_dims_fake) == + inner_dims_across_files[[file_dim_across]]) + new_dims <- c() + if (inner_dim_pos > 1) { + new_dims <- c(new_dims, final_dims_fake[1:(inner_dim_pos - + 1)]) + } + ##Bug: 'prod' time x chunk + new_dims <- c(new_dims, setNames(prod(final_dims_fake[c(inner_dim_pos, + inner_dim_pos + 1)]), inner_dims_across_files[[file_dim_across]])) + if (inner_dim_pos + 1 < length(final_dims_fake)) { + new_dims <- c(new_dims, final_dims_fake[(inner_dim_pos + + 2):length(final_dims_fake)]) + } + final_dims_fake <- new_dims + } + } + } + + all_split_dims <- NULL + if (split_multiselected_dims) { + for (dim_param in 1:length(dim_params)) { + if (!is.null(dim(dim_params[[dim_param]]))) { + if (length(dim(dim_params[[dim_param]])) > 1) { + split_dims <- dim(dim_params[[dim_param]]) + all_split_dims <- c(all_split_dims, setNames(list(split_dims), + names(dim_params)[dim_param])) + if (is.null(names(split_dims))) { + names(split_dims) <- paste0(names(dim_params)[dim_param], + 1:length(split_dims)) + } + old_dim_pos <- which(names(final_dims_fake) == + names(dim_params)[dim_param]) + new_dims <- c() + if (old_dim_pos > 1) { + new_dims <- c(new_dims, final_dims_fake[1:(old_dim_pos - + 1)]) + } + new_dims <- c(new_dims, split_dims) + if (old_dim_pos < length(final_dims_fake)) { + new_dims <- c(new_dims, final_dims_fake[(old_dim_pos + + 1):length(final_dims_fake)]) + } + final_dims_fake <- new_dims + } + } + } + } + if (!silent) { + .message("Detected dimension sizes:") + longest_dim_len <- max(sapply(names(final_dims_fake), nchar)) + longest_size_len <- max(sapply(paste0(final_dims_fake, ''), nchar)) + sapply(names(final_dims_fake), + function(x) { + message(paste0("* ", paste(rep(' ', longest_dim_len - nchar(x)), collapse = ''), + x, ": ", paste(rep(' ', longest_size_len - nchar(paste0(final_dims_fake[x], ''))), collapse = ''), + final_dims_fake[x])) + }) + bytes <- prod(c(final_dims_fake, 8)) + dim_sizes <- paste(final_dims_fake, collapse = ' x ') + if (retrieve) { + .message(paste("Total size of requested data:")) + } else { + .message(paste("Total size of involved data:")) + } + .message(paste(dim_sizes, " x 8 bytes =", + format(structure(bytes, class = "object_size"), units = "auto")), + indent = 2) + } + + # The following several lines will only be run if retrieve = TRUE + if (retrieve) { + + ########## CREATING THE SHARED MATRIX AND DISPATCHING WORK PIECES ########### + # TODO: try performance of storing all in cols instead of rows + # Create the shared memory array, and a pointer to it, to be sent + # to the work pieces. + data_array <- big.matrix(nrow = prod(final_dims), ncol = 1) + shared_matrix_pointer <- describe(data_array) + if (is.null(num_procs)) { + num_procs <- availableCores() + } + # Creating a shared tmp folder to store metadata from each chunk + array_of_metadata_flags <- array(FALSE, dim = dim(array_of_files_to_load)) + if (!is.null(metadata_dims)) { + metadata_indices_to_load <- as.list(rep(1, length(dim(array_of_files_to_load)))) + names(metadata_indices_to_load) <- names(dim(array_of_files_to_load)) + metadata_indices_to_load[metadata_dims] <- as.list(rep(TRUE, length(metadata_dims))) + array_of_metadata_flags <- do.call('[<-', c(list(array_of_metadata_flags), metadata_indices_to_load, + list(value = rep(TRUE, prod(dim(array_of_files_to_load)[metadata_dims]))))) + } + metadata_file_counter <- 0 + metadata_folder <- tempfile('metadata') + dir.create(metadata_folder) + # Build the work pieces, each with: + # - file path + # - total size (dims) of store array + # - start position in store array + # - file selectors (to provide extra info. useful e.g. to select variable) + # - indices to take from file + work_pieces <- list() + for (i in 1:length(dat)) { + if (dataset_has_files[i]) { + selectors <- dat[[i]][['selectors']] + file_dims <- found_file_dims[[i]] + inner_dims <- expected_inner_dims[[i]] + sub_array_dims <- final_dims[file_dims] + sub_array_dims[found_pattern_dim] <- 1 + sub_array_of_files_to_load <- array(1:prod(sub_array_dims), + dim = sub_array_dims) + names(dim(sub_array_of_files_to_load)) <- names(sub_array_dims) + # Detect which of the dimensions of the dataset go across files. + file_dim_across_files <- lapply(inner_dims, + function(x) { + dim_across <- sapply(inner_dims_across_files, function(y) x %in% y) + if (any(dim_across)) { + names(inner_dims_across_files)[which(dim_across)[1]] + } else { + NULL + } + }) + names(file_dim_across_files) <- inner_dims + j <- 1 + while (j <= prod(sub_array_dims)) { + # Work out file path. + file_to_load_sub_indices <- which(sub_array_of_files_to_load == j, arr.ind = TRUE)[1, ] + names(file_to_load_sub_indices) <- names(sub_array_dims) + file_to_load_sub_indices[found_pattern_dim] <- i + big_dims <- rep(1, length(dim(array_of_files_to_load))) + names(big_dims) <- names(dim(array_of_files_to_load)) + file_to_load_indices <- .MergeArrayDims(file_to_load_sub_indices, big_dims)[[1]] + file_to_load <- do.call('[[', c(list(array_of_files_to_load), + as.list(file_to_load_indices))) + not_found_file <- do.call('[[', c(list(array_of_not_found_files), + as.list(file_to_load_indices))) + load_file_metadata <- do.call('[', c(list(array_of_metadata_flags), + as.list(file_to_load_indices))) + if (load_file_metadata) { + metadata_file_counter <- metadata_file_counter + 1 + } + if (!is.na(file_to_load) && !not_found_file) { + # Work out indices to take + first_round_indices <- lapply(inner_dims, + function (x) { + if (is.null(file_dim_across_files[[x]])) { + selectors[[x]][['fri']][[1]] + } else { + which_chunk <- file_to_load_sub_indices[file_dim_across_files[[x]]] + selectors[[x]][['fri']][[which_chunk]] + } + }) + names(first_round_indices) <- inner_dims + second_round_indices <- lapply(inner_dims, + function (x) { + if (is.null(file_dim_across_files[[x]])) { + selectors[[x]][['sri']][[1]] + } else { + which_chunk <- file_to_load_sub_indices[file_dim_across_files[[x]]] + selectors[[x]][['sri']][[which_chunk]] + } + }) +if (debug) { +print("-> BUILDING A WORK PIECE") +#print(str(selectors)) +} + names(second_round_indices) <- inner_dims + if (!any(sapply(first_round_indices, length) == 0)) { + work_piece <- list() + work_piece[['first_round_indices']] <- first_round_indices + work_piece[['second_round_indices']] <- second_round_indices + work_piece[['file_indices_in_array_of_files']] <- file_to_load_indices + work_piece[['file_path']] <- file_to_load + work_piece[['store_dims']] <- final_dims + # Work out store position + store_position <- final_dims + store_position[names(file_to_load_indices)] <- file_to_load_indices + store_position[inner_dims] <- rep(1, length(inner_dims)) + work_piece[['store_position']] <- store_position + # Work out file selectors + file_selectors <- sapply(file_dims, + function (x) { + vector_to_pick <- 1 + if (x %in% names(depending_file_dims)) { + vector_to_pick <- file_to_load_indices[depending_file_dims[[x]]] + } + selectors[file_dims][[x]][[vector_to_pick]][file_to_load_indices[x]] + }) + names(file_selectors) <- file_dims + work_piece[['file_selectors']] <- file_selectors + # Send variables for transformation + if (!is.null(transform) && (length(transform_vars) > 0)) { + vars_to_transform <- NULL + picked_vars_to_transform <- which(names(picked_vars[[i]]) %in% transform_vars) + if (length(picked_vars_to_transform) > 0) { + picked_vars_to_transform <- names(picked_vars[[i]])[picked_vars_to_transform] + vars_to_transform <- c(vars_to_transform, picked_vars[[i]][picked_vars_to_transform]) + if (any(picked_vars_to_transform %in% names(picked_vars_ordered[[i]]))) { + picked_vars_ordered_to_transform <- picked_vars_to_transform[which(picked_vars_to_transform %in% names(picked_vars_ordered[[i]]))] + vars_to_transform[picked_vars_ordered_to_transform] <- picked_vars_ordered[[i]][picked_vars_ordered_to_transform] + } + } + picked_common_vars_to_transform <- which(names(picked_common_vars) %in% transform_vars) + if (length(picked_common_vars_to_transform) > 0) { + picked_common_vars_to_transform <- names(picked_common_vars)[picked_common_vars_to_transform] + vars_to_transform <- c(vars_to_transform, picked_common_vars[picked_common_vars_to_transform]) + if (any(picked_common_vars_to_transform %in% names(picked_common_vars_ordered))) { + picked_common_vars_ordered_to_transform <- picked_common_vars_to_transform[which(picked_common_vars_to_transform %in% names(picked_common_vars_ordered))] + vars_to_transform[picked_common_vars_ordered_to_transform] <- picked_common_vars_ordered[picked_common_vars_ordered_to_transform] + } + } + work_piece[['vars_to_transform']] <- vars_to_transform + } + # Send flag to load metadata + if (load_file_metadata) { + work_piece[['save_metadata_in']] <- paste0(metadata_folder, '/', metadata_file_counter) + } + work_pieces <- c(work_pieces, list(work_piece)) + } + } + j <- j + 1 + } + } + } +#print("N") +if (debug) { +print("-> WORK PIECES BUILT") +} + + # Calculate the progress %s that will be displayed and assign them to + # the appropriate work pieces. + if (length(work_pieces) / num_procs >= 2 && !silent) { + if (length(work_pieces) / num_procs < 10) { + amount <- 100 / ceiling(length(work_pieces) / num_procs) + reps <- ceiling(length(work_pieces) / num_procs) + } else { + amount <- 10 + reps <- 10 + } + progress_steps <- rep(amount, reps) + if (length(work_pieces) < (reps + 1)) { + selected_pieces <- length(work_pieces) + progress_steps <- c(sum(head(progress_steps, reps)), + tail(progress_steps, reps)) + } else { + selected_pieces <- round(seq(1, length(work_pieces), + length.out = reps + 1))[-1] + } + progress_steps <- paste0(' + ', round(progress_steps, 2), '%') + progress_message <- 'Progress: 0%' + } else { + progress_message <- '' + selected_pieces <- NULL + } + piece_counter <- 1 + step_counter <- 1 + work_pieces <- lapply(work_pieces, + function (x) { + if (piece_counter %in% selected_pieces) { + wp <- c(x, list(progress_amount = progress_steps[step_counter])) + step_counter <<- step_counter + 1 + } else { + wp <- x + } + piece_counter <<- piece_counter + 1 + wp + }) + if (!silent) { + .message("If the size of the requested data is close to or above the free shared RAM memory, R may crash.") + .message("If the size of the requested data is close to or above the half of the free RAM memory, R may crash.") + .message(paste0("Will now proceed to read and process ", length(work_pieces), " data files:")) + if (length(work_pieces) < 30) { + lapply(work_pieces, function (x) .message(x[['file_path']], indent = 2)) + } else { + .message("The list of files is long. You can check it after Start() finishes in the output '$Files'.", indent = 2, exdent = 5) + } + } + + # Build the cluster of processes that will do the work and dispatch work pieces. + # The function .LoadDataFile is applied to each work piece. This function will + # open the data file, regrid if needed, subset, apply the mask, + # compute and apply the weights if needed, + # disable extreme values and store in the shared memory matrix. +#print("O") + if (!silent) { + .message("Loading... This may take several minutes...") + if (progress_message != '') { + .message(progress_message, appendLF = FALSE) + } + } + if (num_procs == 1) { + found_files <- lapply(work_pieces, .LoadDataFile, + shared_matrix_pointer = shared_matrix_pointer, + file_data_reader = file_data_reader, + synonims = synonims, + transform = transform, + transform_params = transform_params, + silent = silent, debug = debug) + } else { + cluster <- makeCluster(num_procs, outfile = "") + # Send the heavy work to the workers + work_errors <- try({ + found_files <- clusterApplyLB(cluster, work_pieces, .LoadDataFile, + shared_matrix_pointer = shared_matrix_pointer, + file_data_reader = file_data_reader, + synonims = synonims, + transform = transform, + transform_params = transform_params, + silent = silent, debug = debug) + }) + stopCluster(cluster) + } + + if (!silent) { + if (progress_message != '') { + .message("\n", tag = '') + } + } +#print("P") + data_array <- array(bigmemory::as.matrix(data_array), dim = final_dims_fake) + gc() + + # Load metadata and remove the metadata folder + if (!is.null(metadata_dims)) { + loaded_metadata_files <- list.files(metadata_folder) + loaded_metadata <- lapply(paste0(metadata_folder, '/', loaded_metadata_files), readRDS) + unlink(metadata_folder, recursive = TRUE) + return_metadata <- vector('list', length = prod(dim(array_of_metadata_flags)[metadata_dims])) + return_metadata[as.numeric(loaded_metadata_files)] <- loaded_metadata + dim(return_metadata) <- dim(array_of_metadata_flags[metadata_dims]) + attr(data_array, 'Variables') <- return_metadata + # TODO: Try to infer data type from loaded_metadata + # as.integer(data_array) + } + + failed_pieces <- work_pieces[which(unlist(found_files))] + for (failed_piece in failed_pieces) { + array_of_not_found_files <- do.call('[<-', + c(list(array_of_not_found_files), + as.list(failed_piece[['file_indices_in_array_of_files']]), + list(value = TRUE))) + } + if (any(array_of_not_found_files)) { + for (i in 1:prod(dim(array_of_files_to_load))) { + if (is.na(array_of_not_found_files[i])) { + array_of_files_to_load[i] <- NA + } else { + if (array_of_not_found_files[i]) { + array_of_not_found_files[i] <- array_of_files_to_load[i] + array_of_files_to_load[i] <- NA + } else { + array_of_not_found_files[i] <- NA + } + } + } + } else { + array_of_not_found_files <- NULL + } + + } # End if (retrieve) + + # Replace the vars and common vars by the transformed vars and common vars + for (i in 1:length(dat)) { + if (length(names(transformed_vars[[i]])) > 0) { + picked_vars[[i]][names(transformed_vars[[i]])] <- transformed_vars[[i]] + } else if (length(names(picked_vars_ordered[[i]])) > 0) { + picked_vars[[i]][names(picked_vars_ordered[[i]])] <- picked_vars_ordered[[i]] + } + } + if (length(names(transformed_common_vars)) > 0) { + picked_common_vars[names(transformed_common_vars)] <- transformed_common_vars + } else if (length(names(picked_common_vars_ordered)) > 0) { + picked_common_vars[names(picked_common_vars_ordered)] <- picked_common_vars_ordered + } +if (debug) { +print("-> THE TRANSFORMED VARS:") +print(str(transformed_vars)) +print("-> THE PICKED VARS:") +print(str(picked_vars)) +} + + file_selectors <- NULL + for (i in 1:length(dat)) { + file_selectors[[dat[[i]][['name']]]] <- dat[[i]][['selectors']][which(names(dat[[i]][['selectors']]) %in% found_file_dims[[i]])] + } + if (retrieve) { + if (!silent) { + .message("Successfully retrieved data.") + } + var_backup <- attr(data_array, 'Variables')[[1]] + attr(data_array, 'Variables') <- NULL + attributes(data_array) <- c(attributes(data_array), + list(Variables = c(list(common = c(picked_common_vars, var_backup)), + picked_vars), + Files = array_of_files_to_load, + NotFoundFiles = array_of_not_found_files, + FileSelectors = file_selectors, + PatternDim = found_pattern_dim) + ) + attr(data_array, 'class') <- c('startR_array', attr(data_array, 'class')) + data_array +print('in the end of code, dim(data_array) = ') +print(dim(data_array)) + } else { + if (!silent) { + .message("Successfully discovered data dimensions.") + } + start_call <- match.call() + for (i in 2:length(start_call)) { + if (class(start_call[[i]]) %in% c('name', 'call')) { + start_call[[i]] <- eval.parent(start_call[[i]]) + } + } + start_call[['retrieve']] <- TRUE + attributes(start_call) <- c(attributes(start_call), + list(Dimensions = final_dims_fake, + Variables = c(list(common = picked_common_vars), picked_vars), + ExpectedFiles = array_of_files_to_load, + FileSelectors = file_selectors, + PatternDim = found_pattern_dim, + MergedDims = if (merge_across_dims) { + inner_dims_across_files + } else { + NULL + }, + SplitDims = if (split_multiselected_dims) { + all_split_dims + } else { + NULL + }) + ) + attr(start_call, 'class') <- c('startR_cube', attr(start_call, 'class')) + start_call + } +