From 630dcd79f6656cf578ac297a957e77e261d58bdd Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Tue, 27 Jun 2017 18:41:08 +0200 Subject: [PATCH 1/8] Bugfix when retrieving no file dimensions. --- R/Start.R | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/R/Start.R b/R/Start.R index b1889f0..5707989 100644 --- a/R/Start.R +++ b/R/Start.R @@ -283,6 +283,10 @@ Start <- function(..., # dim = indices/selectors, if ((length(dat_to_fetch) > 0) && (length(dat_to_fetch) < length(dat))) { .warning("'path' has been provided for some datasets. Any information in the configuration file related to these will be ignored.") } + if (length(dat_to_fetch) > 0) { + stop("Specified only the name for some data sets, but not the path ", + "pattern. This option has not been yet implemented.") + } # Reorder inner_dims_across_files (to make the keys be the file dimensions, # and the values to be the inner dimensions that go across it). @@ -498,7 +502,10 @@ debug <- TRUE dat_selectors <- dim_params dat_selectors[[found_pattern_dim]] <- dat_selectors[[found_pattern_dim]][i] dim_vars <- paste0('$', dim_names, '$') - file_dims <- dim_names[which(sapply(dim_vars, grepl, dat[[i]][['path']], fixed = TRUE))] + file_dims <- which(sapply(dim_vars, grepl, dat[[i]][['path']], fixed = TRUE)) + if (length(file_dims) > 0) { + file_dims <- dim_names[file_dims] + } file_dims <- unique(c(pattern_dims, file_dims)) found_file_dims[[i]] <- file_dims expected_inner_dims[[i]] <- dim_names[which(!(dim_names %in% file_dims))] -- GitLab From 86f6b31409ef6f713eafd99ae556834cfdc6cd0a Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Tue, 27 Jun 2017 19:57:42 +0200 Subject: [PATCH 2/8] Some progress. --- R/Start.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/R/Start.R b/R/Start.R index 5707989..eebcb4c 100644 --- a/R/Start.R +++ b/R/Start.R @@ -209,7 +209,7 @@ Start <- function(..., # dim = indices/selectors, .warning(paste0("A reorder for the selectors of '", pattern_dim, "' has been specified, but it is a pattern dimension and the reorder will be ignored.")) } - if (any(sapply(dat, is.list))) { + if (is.list(dat) || any(sapply(dat, is.list))) { if (is.null(found_pattern_dim)) { found_pattern_dim <- pattern_dim } else { @@ -218,7 +218,7 @@ Start <- function(..., # dim = indices/selectors, } } if (is.null(found_pattern_dim)) { - .warning(paste0("Taking the first pattern dim, '", pattern_dims[1], "', as dimension with pattern specifications (to be fetched in configuration file).")) + .warning(paste0("Could not find any pattern dim with explicit data set descriptions (in the form of list of lists). Taking the first pattern dim, '", pattern_dims[1], "', as dimension with pattern specifications.")) found_pattern_dim <- pattern_dims[1] } @@ -240,6 +240,10 @@ Start <- function(..., # dim = indices/selectors, dat_names <- c() if (!is.list(dat)) { dat <- as.list(dat) + } else { + if (!any(sapply(dat, is.list))) { + dat <- list(dat) + } } for (i in 1:length(dat)) { if (is.character(dat[[i]]) && length(dat[[i]]) == 1 && nchar(dat[[i]]) > 0) { -- GitLab From d6eb8480660a000879538b10e81f3d266eaf1050 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Tue, 27 Jun 2017 20:05:42 +0200 Subject: [PATCH 3/8] Small error message improvement. --- R/NcDataReader.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/NcDataReader.R b/R/NcDataReader.R index c5ec75e..7b521e5 100644 --- a/R/NcDataReader.R +++ b/R/NcDataReader.R @@ -39,7 +39,8 @@ NcDataReader <- function(file_path = NULL, file_object = NULL, position_of_var <- length(inner_indices) } else { stop("A 'var'/'variable' file dimension or inner dimension must be ", - "requested for NcDataReader() to read NetCDF files.") + "requested for NcDataReader() to read NetCDF files with more than ", + "one variable.") } inner_indices[[position_of_var]] <- sapply(inner_indices[[position_of_var]], -- GitLab From 79a7cfed278a02dcc863e81012048bbc4b22b995 Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Fri, 14 Jul 2017 18:42:01 +0200 Subject: [PATCH 4/8] Small fix in NcDataReader. --- R/NcDataReader.R | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/R/NcDataReader.R b/R/NcDataReader.R index c5ec75e..f5320aa 100644 --- a/R/NcDataReader.R +++ b/R/NcDataReader.R @@ -114,15 +114,17 @@ NcDataReader <- function(file_path = NULL, file_object = NULL, result <- easyNCDF::NcToArray(file_to_read, inner_indices, drop_var_dim = drop_var_dim, expect_all_indices = TRUE, allow_out_of_range = TRUE) } - names(dim(result)) <- sapply(names(dim(result)), - function(x) { - which_entry <- which(sapply(synonims, function(y) x %in% y)) - if (length(which_entry) > 0) { - names(synonims)[which_entry] - } else { - x - } - }) + if (!is.null(dim(result))) { + names(dim(result)) <- sapply(names(dim(result)), + function(x) { + which_entry <- which(sapply(synonims, function(y) x %in% y)) + if (length(which_entry) > 0) { + names(synonims)[which_entry] + } else { + x + } + }) + } names(attr(result, 'variables')) <- sapply(names(attr(result, 'variables')), function(x) { which_entry <- which(sapply(synonims, function(y) x %in% y)) -- GitLab From 7717d305c48bcdcc163346e0b4fde542aa74618f Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Fri, 14 Jul 2017 19:52:19 +0200 Subject: [PATCH 5/8] Added a comment. --- R/Start.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/Start.R b/R/Start.R index eebcb4c..0397025 100644 --- a/R/Start.R +++ b/R/Start.R @@ -2518,6 +2518,8 @@ print("-> WORK PIECES BUILT") return_metadata[as.numeric(loaded_metadata_files)] <- loaded_metadata dim(return_metadata) <- dim(array_of_metadata_flags[metadata_dims]) attr(data_array, 'variables') <- return_metadata + # TODO: Try to infer data type from loaded_metadata + # as.integer(data_array) failed_pieces <- work_pieces[which(unlist(found_files))] for (failed_piece in failed_pieces) { -- GitLab From e89811492c2ddf4f9dbac939c2b9dca55883fa4c Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Fri, 14 Jul 2017 20:34:12 +0200 Subject: [PATCH 6/8] Adding option to not load metadata. --- R/Start.R | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/R/Start.R b/R/Start.R index 0397025..2854453 100644 --- a/R/Start.R +++ b/R/Start.R @@ -224,7 +224,9 @@ Start <- function(..., # dim = indices/selectors, # Check metadata_dims if (!is.null(metadata_dims)) { - if (!is.character(metadata_dims) || (length(metadata_dims) < 1)) { + if (is.na(metadata_dims)) { + metadata_dims <- NULL + } else if (!is.character(metadata_dims) || (length(metadata_dims) < 1)) { stop("Parameter 'metadata' dims must be a vector of at least one character string.") } } else { @@ -565,8 +567,10 @@ debug <- TRUE j <- j + 1 } # (Check the metadata_dims). - if (any(!(metadata_dims %in% file_dims))) { - stop("All dimensions in 'metadata_dims' must be file dimensions.") + if (!is.null(metadata_dims)) { + if (any(!(metadata_dims %in% file_dims))) { + stop("All dimensions in 'metadata_dims' must be file dimensions.") + } } # (Check the *_var parameters). if (any(!(unlist(var_params) %in% names(return_vars)))) { @@ -2258,11 +2262,13 @@ print("-> PROCEEDING TO CROP VARIABLES") } # Creating a shared tmp folder to store metadata from each chunk array_of_metadata_flags <- array(FALSE, dim = dim(array_of_files_to_load)) - metadata_indices_to_load <- as.list(rep(1, length(dim(array_of_files_to_load)))) - names(metadata_indices_to_load) <- names(dim(array_of_files_to_load)) - metadata_indices_to_load[metadata_dims] <- as.list(rep(TRUE, length(metadata_dims))) - array_of_metadata_flags <- do.call('[<-', c(list(array_of_metadata_flags), metadata_indices_to_load, - list(value = rep(TRUE, prod(dim(array_of_files_to_load)[metadata_dims]))))) + if (!is.null(metadata_dims)) { + metadata_indices_to_load <- as.list(rep(1, length(dim(array_of_files_to_load)))) + names(metadata_indices_to_load) <- names(dim(array_of_files_to_load)) + metadata_indices_to_load[metadata_dims] <- as.list(rep(TRUE, length(metadata_dims))) + array_of_metadata_flags <- do.call('[<-', c(list(array_of_metadata_flags), metadata_indices_to_load, + list(value = rep(TRUE, prod(dim(array_of_files_to_load)[metadata_dims]))))) + } metadata_file_counter <- 0 metadata_folder <- tempfile('metadata') dir.create(metadata_folder) @@ -2511,15 +2517,17 @@ print("-> WORK PIECES BUILT") gc() # Load metadata and remove the metadata folder - loaded_metadata_files <- list.files(metadata_folder) - loaded_metadata <- lapply(paste0(metadata_folder, '/', loaded_metadata_files), readRDS) - unlink(metadata_folder, recursive = TRUE) - return_metadata <- vector('list', length = prod(dim(array_of_metadata_flags)[metadata_dims])) - return_metadata[as.numeric(loaded_metadata_files)] <- loaded_metadata - dim(return_metadata) <- dim(array_of_metadata_flags[metadata_dims]) - attr(data_array, 'variables') <- return_metadata - # TODO: Try to infer data type from loaded_metadata - # as.integer(data_array) + if (!is.null(metadata_dims)) { + loaded_metadata_files <- list.files(metadata_folder) + loaded_metadata <- lapply(paste0(metadata_folder, '/', loaded_metadata_files), readRDS) + unlink(metadata_folder, recursive = TRUE) + return_metadata <- vector('list', length = prod(dim(array_of_metadata_flags)[metadata_dims])) + return_metadata[as.numeric(loaded_metadata_files)] <- loaded_metadata + dim(return_metadata) <- dim(array_of_metadata_flags[metadata_dims]) + attr(data_array, 'variables') <- return_metadata + # TODO: Try to infer data type from loaded_metadata + # as.integer(data_array) + } failed_pieces <- work_pieces[which(unlist(found_files))] for (failed_piece in failed_pieces) { -- GitLab From 115937c9cfe16112a56f7af40e57917a48f0750f Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Fri, 14 Jul 2017 21:00:28 +0200 Subject: [PATCH 7/8] Small fix in Subset. --- R/Subset.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/R/Subset.R b/R/Subset.R index a084a1c..c82b0c6 100644 --- a/R/Subset.R +++ b/R/Subset.R @@ -78,11 +78,15 @@ Subset <- function(x, along, indices, drop = FALSE) { metadata[['dim']] <- metadata[['dim']][-dims_to_drop] if (is.character(dim_names)) { names(metadata[['dim']]) <- dim_names[-dims_to_drop] - metadata[['dimensions']] <- dim_names[-dims_to_drop] + if ('dimensions' %in% names(attributes(x))) { + metadata[['dimensions']] <- dim_names[-dims_to_drop] + } } } else if (is.character(dim_names)) { names(metadata[['dim']]) <- dim_names - metadata[['dimensions']] <- dim_names + if ('dimensions' %in% names(attributes(x))) { + metadata[['dimensions']] <- dim_names + } } attributes(subset) <- metadata subset -- GitLab From 8bf0973c8263ed03601ef8d25e8eda9a7f0a669f Mon Sep 17 00:00:00 2001 From: Nicolau Manubens Date: Mon, 18 Sep 2017 18:58:45 +0200 Subject: [PATCH 8/8] Fix in .MergeArrays. --- R/Utils.R | 54 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/R/Utils.R b/R/Utils.R index 24437ac..7464a9d 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -440,33 +440,41 @@ indices <- function(x) { # 'a' 'b' 'c' 'e' 'd' 'f' 'g' # 2 4 3 7 5 9 11 .MergeArrays <- function(array1, array2, along) { - if (!(identical(names(dim(array1)), names(dim(array2))) && - identical(dim(array1)[-which(names(dim(array1)) == along)], - dim(array2)[-which(names(dim(array2)) == along)]))) { - new_dims <- .MergeArrayDims(dim(array1), dim(array2)) - dim(array1) <- new_dims[[1]] - dim(array2) <- new_dims[[2]] - for (j in 1:length(dim(array1))) { - if (names(dim(array1))[j] != along) { - if (dim(array1)[j] != dim(array2)[j]) { - if (which.max(c(dim(array1)[j], dim(array2)[j])) == 1) { - na_array_dims <- dim(array2) - na_array_dims[j] <- dim(array1)[j] - dim(array2)[j] - na_array <- array(dim = na_array_dims) - array2 <- abind(array2, na_array, along = j) - names(dim(array2)) <- names(na_array_dims) - } else { - na_array_dims <- dim(array1) - na_array_dims[j] <- dim(array2)[j] - dim(array1)[j] - na_array <- array(dim = na_array_dims) - array1 <- abind(array1, na_array, along = j) - names(dim(array1)) <- names(na_array_dims) + if (!(is.null(array1) || is.null(array2))) { + if (!(identical(names(dim(array1)), names(dim(array2))) && + identical(dim(array1)[-which(names(dim(array1)) == along)], + dim(array2)[-which(names(dim(array2)) == along)]))) { + new_dims <- .MergeArrayDims(dim(array1), dim(array2)) + dim(array1) <- new_dims[[1]] + dim(array2) <- new_dims[[2]] + for (j in 1:length(dim(array1))) { + if (names(dim(array1))[j] != along) { + if (dim(array1)[j] != dim(array2)[j]) { + if (which.max(c(dim(array1)[j], dim(array2)[j])) == 1) { + na_array_dims <- dim(array2) + na_array_dims[j] <- dim(array1)[j] - dim(array2)[j] + na_array <- array(dim = na_array_dims) + array2 <- abind(array2, na_array, along = j) + names(dim(array2)) <- names(na_array_dims) + } else { + na_array_dims <- dim(array1) + na_array_dims[j] <- dim(array2)[j] - dim(array1)[j] + na_array <- array(dim = na_array_dims) + array1 <- abind(array1, na_array, along = j) + names(dim(array1)) <- names(na_array_dims) + } } } } } + if (!(along %in% names(dim(array2)))) { + stop("The dimension specified in 'along' is not present in the ", + "provided arrays.") + } + array1 <- abind(array1, array2, along = which(names(dim(array1)) == along)) + names(dim(array1)) <- names(dim(array2)) + } else if (is.null(array1)) { + array1 <- array2 } - array1 <- abind(array1, array2, along = which(names(dim(array1)) == along)) - names(dim(array1)) <- names(dim(array2)) array1 } -- GitLab