diff --git a/.Rbuildignore b/.Rbuildignore index b320a05571bd3c450b5b1f2e522261ba923fc601..90018c782d3ac1075895bbfa778c1248ae5259dd 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,7 +6,7 @@ ^README\.md$ #\..*\.RData$ #^vignettes$ -^tests$ +#^tests$ ^inst/doc$ #^inst/doc/*$ #^inst/doc/figures/$ diff --git a/DESCRIPTION b/DESCRIPTION index f761503614963190702c9aea93cd264bcfba3368..af073b1b4205e4f57a6eee4980afcfd01203cff7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: startR Title: Automatically Retrieve Multidimensional Distributed Data Sets -Version: 2.2.0 +Version: 2.2.0-1 Authors@R: c( person("Nicolau", "Manubens", , "nicolau.manubens@bsc.es", role = c("aut")), person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("aut", "cre")), @@ -29,7 +29,8 @@ Imports: easyNCDF, s2dv, ClimProjDiags, - PCICt + PCICt, + methods Suggests: stats, utils, diff --git a/NAMESPACE b/NAMESPACE index 1434a0f7a06ce718c3ea494c6e749829a173585c..c6bca724ce7efa965d4ca5f203d9fceb2228300a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,6 +24,7 @@ import(future) import(multiApply) import(parallel) importFrom(ClimProjDiags,Subset) +importFrom(methods,is) importFrom(s2dv,CDORemap) importFrom(stats,na.omit) importFrom(stats,setNames) diff --git a/NEWS.md b/NEWS.md index c542dd1cc14451ce643b361c58583b7744312829..11f5a30f8636c0e1e50d84fdf2d9b0ffc60a4f29 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# startR v2.2.0-1 (Release date: 2022-04-19) +- Bugfix for the case that the variable has units like time, e.g., "days". +- Development of metadata reshaping. The metadata should correspond to data if data are reshaped by parameter "merge_across_dims" and "split_multiselected_dims", as well as if data selectors are not continuous indices. +- Development of multiple dependency by array selector. An inner dimension indices can vary with multiple file dimensions. + # startR v2.2.0 (Release date: 2022-02-11) - License changes to Apache License 2.0 - R version dependency changes to >= 3.6.0 diff --git a/R/AddStep.R b/R/AddStep.R index 037bd58ca9aae1f11be3042dd046dc488d9ac005..00af3ab776f86fe8bac6e72e6dfb5b9c901cd632 100644 --- a/R/AddStep.R +++ b/R/AddStep.R @@ -41,22 +41,22 @@ #' use_attributes = list(data = "Variables")) #' wf <- AddStep(data, step, pi_val = pi_short) #' +#'@importFrom methods is #'@export AddStep <- function(inputs, step_fun, ...) { # Check step_fun - if (!('startR_step_fun' %in% class(step_fun))) { + if (!is(step_fun, 'startR_step_fun')) { stop("Parameter 'step_fun' must be a startR step function as returned by Step.") } # Check inputs - if (any(c('startR_cube', 'startR_workflow') %in% class(inputs))) { + if (is(inputs, 'startR_cube') | is(inputs, 'startR_workflow')) { inputs <- list(inputs) names(inputs) <- 'input1' } else if (is.list(inputs)) { if (any(!sapply(inputs, - function(x) any(c('startR_cube', - 'startR_workflow') %in% class(x))))) { + function(x) is(x, 'startR_cube') | is(x, 'startR_workflow')))) { stop("Parameter 'inputs' must be one or a list of objects of the class ", "'startR_cube' or 'startR_workflow'.") } @@ -90,7 +90,7 @@ AddStep <- function(inputs, step_fun, ...) { stop("The target dimensions required by 'step_fun' for the input ", input, " are not present in the corresponding provided object in 'inputs'.") } - if ('startR_workflow' %in% class(inputs[[input]])) { + if (is(inputs[[input]], 'startR_workflow')) { if (is.null(previous_target_dims)) { previous_target_dims <- attr(inputs[[input]], 'TargetDims') } else { diff --git a/R/ByChunks.R b/R/ByChunks.R index 8185763bf5c31d388bdb28d5ab3873d8636f46d9..37a554c1e44d8fda4bc60a6a2a0732c54e4009b3 100644 --- a/R/ByChunks.R +++ b/R/ByChunks.R @@ -80,6 +80,7 @@ #' #ByChunks(step, data) #' #'@import multiApply +#'@importFrom methods is #'@noRd ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', threads_load = 2, threads_compute = 1, @@ -109,7 +110,7 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', MergeArrays <- .MergeArrays # Check input headers - if ('startR_cube' %in% class(cube_headers)) { + if (is(cube_headers, 'startR_cube')) { cube_headers <- list(cube_headers) } if (!all(sapply(lapply(cube_headers, class), @@ -411,7 +412,7 @@ ByChunks <- function(step_fun, cube_headers, ..., chunks = 'auto', timings[['nchunks']] <- prod(unlist(chunks)) # Check step_fun - if (!('startR_step_fun' %in% class(step_fun))) { + if (!is(step_fun, 'startR_step_fun')) { stop("Parameter 'step_fun' must be of the class 'startR_step_fun', as returned ", "by the function Step.") } diff --git a/R/Collect.R b/R/Collect.R index bf387297722c15e61e36d6b768624aec0b73c605..4c80b037d4b91101fc0e90fc6c5c3b1f09b5d624 100644 --- a/R/Collect.R +++ b/R/Collect.R @@ -72,7 +72,7 @@ #' #'@export Collect <- function(startr_exec, wait = TRUE, remove = TRUE) { - if (!('startR_exec' %in% class(startr_exec))) { + if (!is(startr_exec, 'startR_exec')) { stop("Parameter 'startr_exec' must be an object of the class ", "'startR_exec', as returned by Collect(..., wait = FALSE).") } diff --git a/R/Compute.R b/R/Compute.R index 0e8d42cd134a3bbc66fc896a357db1e4f5cd65a5..0aa94245bd5d5eff3c08fc4d61cb911dac628187 100644 --- a/R/Compute.R +++ b/R/Compute.R @@ -82,6 +82,7 @@ #' wf <- AddStep(data, step) #' res <- Compute(wf, chunks = list(longitude = 4, sdate = 2)) #' +#'@importFrom methods is #'@export Compute <- function(workflow, chunks = 'auto', threads_load = 1, threads_compute = 1, @@ -89,13 +90,13 @@ Compute <- function(workflow, chunks = 'auto', ecflow_server = NULL, silent = FALSE, debug = FALSE, wait = TRUE) { # Check workflow - if (!any(c('startR_cube', 'startR_workflow') %in% class(workflow))) { + if (!is(workflow, 'startR_cube') & !is(workflow, 'startR_workflow')) { stop("Parameter 'workflow' must be an object of class 'startR_cube' as ", "returned by Start or of class 'startR_workflow' as returned by ", "AddStep.") } - if ('startR_cube' %in% class(workflow)) { + if (is(workflow, 'startR_cube')) { #machine_free_ram <- 1000000000 #max_ram_ratio <- 0.5 #data_size <- prod(c(attr(workflow, 'Dimensions'), 8)) diff --git a/R/NcDataReader.R b/R/NcDataReader.R index ebc58fc27c6a6c5a379e9773de1ffe27bed2ecc3..9c85e331b04d1ce237f7269b2841c225541cc9e6 100644 --- a/R/NcDataReader.R +++ b/R/NcDataReader.R @@ -184,119 +184,130 @@ NcDataReader <- function(file_path = NULL, file_object = NULL, }) if (length(names(attr(result, 'variables'))) == 1) { - var_name <- names(attr(result, 'variables')) - units <- attr(result, 'variables')[[var_name]][['units']] + # The 1st condition is for implicit time dim (if time length = 1, it is + # allowed to not be defined in Start call. Therefore, it is not in the list + # of synonims); + # the 2nd condition is for the normal case; the 3rd one is that if return_vars + # has a variable that is not 'time'. The only way to know if it should be time + # is to check calendar. + # All these conditions are to prevent the variables with time-like units but + # actually not a time variable, e.g., drought period [days]. + if (names(attr(result, 'variables')) == 'time' | + 'time' %in% synonims[[names(attr(result, 'variables'))]] | + 'calendar' %in% names(attr(result, 'variables')[[1]])) { + var_name <- names(attr(result, 'variables')) + units <- attr(result, 'variables')[[var_name]][['units']] - if (units %in% c('seconds', 'minutes', 'hours', 'days', 'weeks', 'months', 'years')) { - if (units == 'seconds') { -# units <- 'secs' - } else if (units == 'minutes') { -# units <- 'mins' - result <- result * 60 # min to sec - } - result[] <- paste(result[], units) - - } else if (grepl(' since ', units)) { - # Find the calendar - calendar <- attr(result, 'variables')[[var_name]]$calendar - if (calendar == 'standard') calendar <- 'gregorian' + if (units %in% c('seconds', 'minutes', 'hours', 'days', 'weeks', 'months', 'years')) { + if (units == 'seconds') { +# units <- 'secs' + } else if (units == 'minutes') { +# units <- 'mins' + result <- result * 60 # min to sec + } + result[] <- paste(result[], units) - parts <- strsplit(units, ' since ')[[1]] - units <- parts[1] + } else if (grepl(' since ', units)) { + # Find the calendar + calendar <- attr(result, 'variables')[[var_name]]$calendar + if (calendar == 'standard') calendar <- 'gregorian' - if (units %in% c('second', 'seconds')) { -# units <- 'secs' - } else if (units %in% c('minute', 'minutes')) { -# units <- 'mins' - result <- result * 60 # min to sec - } else if (units %in% c('hour', 'hours')) { - result <- result * 60 * 60 # hour to sec - } else if (units %in% c('day', 'days')) { -# units <- 'days' - result <- result * 24 * 60 * 60 # day to sec - } else if (units %in% c('month', 'months')) { - # define day in each month - leap_month_day <- c(31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) - no_leap_month_day <- c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) - # Origin year and month - ori_year <- as.numeric(substr(parts[2], 1, 4)) - ori_month <- as.numeric(substr(parts[2], 6, 7)) - if (is.na(ori_month)) { - ori_month <- as.numeric(substr(parts[2], 6, 6)) - } - if (!is.numeric(ori_year) | !is.numeric(ori_month)) { - stop(paste0("The time unit attribute format is not 'YYYY-MM-DD' or 'YYYY-M-D'. ", - "Check the file or contact the maintainer.")) - } + parts <- strsplit(units, ' since ')[[1]] + units <- parts[1] - if (calendar == 'gregorian') { - # Find how many years + months - yr_num <- floor(result / 12) - month_left <- result - yr_num * 12 - # Find the leap years we care - if (ori_month <= 2) { - leap_num <- length(which(sapply(ori_year:(ori_year + yr_num - 1), s2dv::LeapYear))) - } else { - leap_num <- length(which(sapply((ori_year + 1):(ori_year + yr_num), s2dv::LeapYear))) + if (units %in% c('second', 'seconds')) { +# units <- 'secs' + } else if (units %in% c('minute', 'minutes')) { +# units <- 'mins' + result <- result * 60 # min to sec + } else if (units %in% c('hour', 'hours')) { + result <- result * 60 * 60 # hour to sec + } else if (units %in% c('day', 'days')) { +# units <- 'days' + result <- result * 24 * 60 * 60 # day to sec + } else if (units %in% c('month', 'months')) { + # define day in each month + leap_month_day <- c(31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) + no_leap_month_day <- c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) + # Origin year and month + ori_year <- as.numeric(substr(parts[2], 1, 4)) + ori_month <- as.numeric(substr(parts[2], 6, 7)) + if (is.na(ori_month)) { + ori_month <- as.numeric(substr(parts[2], 6, 6)) + } + if (!is.numeric(ori_year) | !is.numeric(ori_month)) { + stop(paste0("The time unit attribute format is not 'YYYY-MM-DD' or 'YYYY-M-D'. ", + "Check the file or contact the maintainer.")) } - total_days <- leap_num * 366 + (yr_num - leap_num) * 365 # not include month_left yet + if (calendar == 'gregorian') { + # Find how many years + months + yr_num <- floor(result / 12) + month_left <- result - yr_num * 12 + # Find the leap years we care + if (ori_month <= 2) { + leap_num <- length(which(sapply(ori_year:(ori_year + yr_num - 1), s2dv::LeapYear))) + } else { + leap_num <- length(which(sapply((ori_year + 1):(ori_year + yr_num), s2dv::LeapYear))) + } + total_days <- leap_num * 366 + (yr_num - leap_num) * 365 # not include month_left yet - if (month_left != 0) { - if ((ori_month + month_left) <= 12) { # the last month is still in the same last yr - # Is the last year a leap year? - last_leap <- s2dv::LeapYear(ori_year + yr_num) - if (last_leap) { - total_days <- total_days + sum(leap_month_day[ori_month:(ori_month + month_left - 1)]) - } else { - total_days <- total_days + sum(no_leap_month_day[ori_month:(ori_month + month_left - 1)]) - } - } else { # the last month ends in the next yr - if (ori_month == 2) { # e.g., 2005-02-16 + 11mth = 2006-01-16 - last_leap <- s2dv::LeapYear(ori_year + yr_num) # still consider 2005 + if (month_left != 0) { + if ((ori_month + month_left) <= 12) { # the last month is still in the same last yr + # Is the last year a leap year? + last_leap <- s2dv::LeapYear(ori_year + yr_num) if (last_leap) { - total_days <- total_days + sum(leap_month_day[2:12]) + total_days <- total_days + sum(leap_month_day[ori_month:(ori_month + month_left - 1)]) } else { - total_days <- total_days + sum(no_leap_month_day[2:12]) + total_days <- total_days + sum(no_leap_month_day[ori_month:(ori_month + month_left - 1)]) } - } else { # e.g., 2005-04-16 + 11mth = 2006-03-16 - last_leap <- s2dv::LeapYear(ori_year + yr_num + 1) - needed_month <- c(ori_month:12, 1:(ori_month + month_left - 12 - 1)) - if (last_leap) { - total_days <- total_days + sum(leap_month_day[needed_month]) - } else { - total_days <- total_days + sum(no_leap_month_day[needed_month]) + } else { # the last month ends in the next yr + if (ori_month == 2) { # e.g., 2005-02-16 + 11mth = 2006-01-16 + last_leap <- s2dv::LeapYear(ori_year + yr_num) # still consider 2005 + if (last_leap) { + total_days <- total_days + sum(leap_month_day[2:12]) + } else { + total_days <- total_days + sum(no_leap_month_day[2:12]) + } + } else { # e.g., 2005-04-16 + 11mth = 2006-03-16 + last_leap <- s2dv::LeapYear(ori_year + yr_num + 1) + needed_month <- c(ori_month:12, 1:(ori_month + month_left - 12 - 1)) + if (last_leap) { + total_days <- total_days + sum(leap_month_day[needed_month]) + } else { + total_days <- total_days + sum(no_leap_month_day[needed_month]) + } } } } - } - result <- total_days * 24 * 60 * 60 # day to sec - } else if (calendar %in% c('365_day',' 365', 'noleap')) { - yr_num <- floor(result / 12) - month_left <- result - yr_num * 12 - total_days <- 365 * yr_num + sum(no_leap_month_day[ori_month:(month_left - 1)]) - result <- total_days * 24 * 60 * 60 # day to sec + result <- total_days * 24 * 60 * 60 # day to sec + } else if (calendar %in% c('365_day',' 365', 'noleap')) { + yr_num <- floor(result / 12) + month_left <- result - yr_num * 12 + total_days <- 365 * yr_num + sum(no_leap_month_day[ori_month:(month_left - 1)]) + result <- total_days * 24 * 60 * 60 # day to sec - } else if (calendar %in% c('360_day', '360')) { - result <- result * 30 * 24 * 60 * 60 # day to sec + } else if (calendar %in% c('360_day', '360')) { + result <- result * 30 * 24 * 60 * 60 # day to sec - } else { #old code. The calendar is not in any of the above. - result <- result * 30.5 - result <- result * 24 * 60 * 60 # day to sec + } else { #old code. The calendar is not in any of the above. + result <- result * 30.5 + result <- result * 24 * 60 * 60 # day to sec + } } - } - new_array <- PCICt::as.PCICt(result, cal = calendar, origin = parts[2])[] - new_array <- suppressWarnings(PCICt::as.POSIXct.PCICt(new_array, tz = "UTC")) + new_array <- PCICt::as.PCICt(result, cal = calendar, origin = parts[2])[] + new_array <- suppressWarnings(PCICt::as.POSIXct.PCICt(new_array, tz = "UTC")) - #new_array <- seq(as.POSIXct(parts[2]), - # length = max(result, na.rm = TRUE) + 1, - # by = units)[result[] + 1] - dim(new_array) <- dim(result) - attr(new_array, 'variables') <- attr(result, 'variables') - result <- new_array + #new_array <- seq(as.POSIXct(parts[2]), + # length = max(result, na.rm = TRUE) + 1, + # by = units)[result[] + 1] + dim(new_array) <- dim(result) + attr(new_array, 'variables') <- attr(result, 'variables') + result <- new_array + } } - } + } } if (close) { diff --git a/R/SelectorChecker.R b/R/SelectorChecker.R index 81ec4889ff344b808e1b9ab0569a05d893e80aa6..7b69a8b8b30b29100b0e0e02687cfaadd7deb144 100644 --- a/R/SelectorChecker.R +++ b/R/SelectorChecker.R @@ -35,6 +35,7 @@ #'sub_array_of_values <- seq(90, -90, length.out = 258)[2:257] #'SelectorChecker(sub_array_of_selectors, sub_array_of_values) #' +#'@importFrom methods is #'@export SelectorChecker <- function(selectors, var = NULL, return_indices = TRUE, tolerance = NULL) { @@ -93,7 +94,7 @@ SelectorChecker <- function(selectors, var = NULL, return_indices = TRUE, tol <- 0 if (!is.null(tolerance)) { - if (!any(class(tolerance) %in% "numeric")) { + if (!is(tolerance, "numeric")) { stop("Expected a numeric *_tolerance.") } tol <- tolerance @@ -148,7 +149,7 @@ SelectorChecker <- function(selectors, var = NULL, return_indices = TRUE, val <- selectors[[i]] tol <- 0 if (!is.null(tolerance)) { - if (!any(class(tolerance) %in% "difftime")) { + if (!is(tolerance, "difftime")) { stop("Expected a difftime *_tolerance.") } tol <- tolerance @@ -194,7 +195,7 @@ SelectorChecker <- function(selectors, var = NULL, return_indices = TRUE, "nearest values.")) } if (!is.null(tolerance)) { - if (!any(class(tolerance) %in% 'numeric')) { + if (!is(tolerance, 'numeric')) { stop("Expected a numeric *_tolerance.") } } @@ -228,7 +229,7 @@ SelectorChecker <- function(selectors, var = NULL, return_indices = TRUE, "nearest values.")) } if (!is.null(tolerance)) { - if (!any(class(tolerance) %in% 'difftime')) { + if (!is(tolerance, 'difftime')) { stop("Expected a difftime *_tolerance.") } } diff --git a/R/Start.R b/R/Start.R index d11c669daeec82be745586fb321c789ec8e03376..db573a86685a3e3cd363c87cc9803ce807c04921 100644 --- a/R/Start.R +++ b/R/Start.R @@ -802,6 +802,7 @@ #'@importFrom utils str #'@importFrom stats na.omit setNames #'@importFrom ClimProjDiags Subset +#'@importFrom methods is #'@export Start <- function(..., # dim = indices/selectors, # dim_var = 'var', @@ -1384,7 +1385,7 @@ Start <- function(..., # dim = indices/selectors, # names as depended dim. for (j in 1:length(dat_selectors[[file_dim]])) { sv <- selector_vector <- dat_selectors[[file_dim]][[j]] - if (!identical(first_class, class(sv)) || + if (!is(sv, first_class) || !identical(first_length, length(sv))) { stop("All provided selectors for depending dimensions must ", "be vectors of the same length and of the same class.") @@ -1731,7 +1732,11 @@ Start <- function(..., # dim = indices/selectors, #//////////////////////////////////////////// # Force return_vars = (time = NULL) to (time = 'sdate') if (1) selector = [sdate = 2, time = 4] or # (2) time_across = 'sdate'. - # NOTE: Here is not in for loop of dat[[i]] + # NOTE: Not sure if the loop over dat is needed here. In theory, all the dat + # should have the same dimensions (?) so expected_inner_dims and + # found_file_dims are the same. The selector_array may possible be + # different, but then the attribute will be correct? If it's different, + # it should depend on 'dat' (but here we only consider common_return_vars) for (i in 1:length(dat)) { for (inner_dim in expected_inner_dims[[i]]) { # The selectors for the inner dimension are taken. @@ -1745,11 +1750,29 @@ Start <- function(..., # dim = indices/selectors, if (inner_dim %in% inner_dims_across_files | is.character(file_dim_as_selector_array_dim)) { #(2) or (1) # inner_dim is not in return_vars or is NULL - if (((!inner_dim %in% names(common_return_vars)) & (!inner_dim %in% names(return_vars))) | - (inner_dim %in% names(common_return_vars) & is.null(common_return_vars[[inner_dim]]))) { - common_return_vars[[inner_dim]] <- correct_return_vars( - inner_dim, inner_dims_across_files, - found_pattern_dim, file_dim_as_selector_array_dim) + need_correct <- FALSE + if (((!inner_dim %in% names(common_return_vars)) & + (!inner_dim %in% names(return_vars))) | + (inner_dim %in% names(common_return_vars) & + is.null(common_return_vars[[inner_dim]]))) { + need_correct <- TRUE + } else if (inner_dim %in% names(common_return_vars) & + (inner_dim %in% inner_dims_across_files) & + !is.null(names(inner_dims_across_files))) { #(2) + if (!names(inner_dims_across_files) %in% common_return_vars[[inner_dim]]) need_correct <- TRUE + + } else if (inner_dim %in% names(common_return_vars) & + is.character(file_dim_as_selector_array_dim)) { #(1) + if (!all(file_dim_as_selector_array_dim %in% common_return_vars[[inner_dim]])) { + need_correct <- TRUE + file_dim_as_selector_array_dim <- file_dim_as_selector_array_dim[which(!file_dim_as_selector_array_dim %in% common_return_vars[[inner_dim]])] + } + } + if (need_correct) { + common_return_vars[[inner_dim]] <- + c(common_return_vars[[inner_dim]], + correct_return_vars(inner_dim, inner_dims_across_files, + found_pattern_dim, file_dim_as_selector_array_dim)) } } } @@ -2156,11 +2179,27 @@ Start <- function(..., # dim = indices/selectors, print("-> DEFINING INDICES FOR INNER DIMENSION:") print(inner_dim) } - file_dim <- NULL + crossed_file_dim <- NULL if (inner_dim %in% unlist(inner_dims_across_files)) { - file_dim <- names(inner_dims_across_files)[which(sapply(inner_dims_across_files, function(x) inner_dim %in% x))[1]] - chunk_amount <- length(dat[[i]][['selectors']][[file_dim]][[1]]) - names(chunk_amount) <- file_dim + crossed_file_dim <- names(inner_dims_across_files)[which(sapply(inner_dims_across_files, function(x) inner_dim %in% x))[1]] + chunk_amount <- length(dat[[i]][['selectors']][[crossed_file_dim]][[1]]) + names(chunk_amount) <- crossed_file_dim + } else if (!is.null(names(dim(dat[[i]][['selectors']][[inner_dim]][[1]]))) & + inner_dim %in% names(dim(dat[[i]][['selectors']][[inner_dim]][[1]])) & + any(found_file_dims[[i]] %in% names(dim(dat[[i]][['selectors']][[inner_dim]][[1]])))) { + # inner dim is dependent on file dim in the form of selector array (e.g., time = [sdate = 2, time = 4]) + crossed_file_dim <- found_file_dims[[i]][which(found_file_dims[[i]] %in% + names(dim(dat[[i]][['selectors']][[inner_dim]][[1]])))] + if (length(crossed_file_dim) == 1) { + chunk_amount <- length(dat[[i]][['selectors']][[crossed_file_dim]][[1]]) + names(chunk_amount) <- crossed_file_dim + } else { + # e.g., region = [memb = 2, sdate = 3, region = 1] + chunk_amount <- prod( + sapply(lapply( + dat[[i]][['selectors']][crossed_file_dim], "[[", 1), length)) + names(chunk_amount) <- paste(crossed_file_dim, collapse = '.') + } } else { chunk_amount <- 1 } @@ -2210,7 +2249,7 @@ Start <- function(..., # dim = indices/selectors, selector_file_dims <- 1 #NOTE: Change 'selector_file_dims' (from 1) if selector is an array with a file_dim dimname. - # I.e., If time = [sdate = 2, time = 4], selector_file_dims <- array(sdate = 2) + # I.e., If time = [sdate = 2, time = 4], selector_file_dims <- c(sdate = 2) if (any(found_file_dims[[i]] %in% names(dim(selector_array)))) { selector_file_dims <- dim(selector_array)[which(names(dim(selector_array)) %in% found_file_dims[[i]])] } @@ -2226,7 +2265,7 @@ Start <- function(..., # dim = indices/selectors, if (!is.null(var_with_selectors_name)) { if ((var_with_selectors_name %in% transform_vars) && (!is.null(transform))) { with_transform <- TRUE - if (!is.null(file_dim)) { + if (!is.null(crossed_file_dim)) { stop("Requested a transformation over the dimension '", inner_dim, "', wich goes across files. This feature ", "is not supported. Either do the request without the ", @@ -2366,7 +2405,7 @@ Start <- function(..., # dim = indices/selectors, #TODO: if (chunk_amount != number of chunks in selector_file_dims), crash if (length(var_dims) > 1) { stop("Specified a '", inner_dim, "_var' for the dimension '", - inner_dim, "', which goes across files (across '", file_dim, + inner_dim, "', which goes across files (across '", crossed_file_dim, "'). The specified variable, '", var_with_selectors_name, "', has more ", "than one dimension and can not be used as selector variable. ", "Select another variable or fix it in the files.") @@ -2400,7 +2439,7 @@ Start <- function(..., # dim = indices/selectors, # need to know each length to create the indices for each file later. # Record 'inner_dim_lengths' here for later usage. inner_dim_lengths <- NULL - if (largest_dims_length & !is.null(file_dim)) { + if (largest_dims_length & !is.null(crossed_file_dim)) { # inner_dim_lengths here includes all the files, but we only want # the files of fyear for certain "sdate". We will categorize it later. inner_dim_lengths <- tryCatch({ @@ -2411,11 +2450,11 @@ Start <- function(..., # dim = indices/selectors, }) # Use other file dims as the factors to categorize. - other_file_dims <- dim(array_of_files_to_load)[which(file_dims != file_dim)] + other_file_dims <- dim(array_of_files_to_load)[which(!found_file_dims[[i]] %in% crossed_file_dim)] other_file_dims <- lapply(lapply(other_file_dims, seq, 1), rev) other_file_dims_factor <- expand.grid(other_file_dims) selector_indices_save_subset <- - lapply(selector_indices_save[[i]], '[', which(file_dims != file_dim)) + lapply(selector_indices_save[[i]], '[', which(!found_file_dims[[i]] %in% crossed_file_dim)) # Put the fyear with the same other file dims (sdate, etc.) together, and find the largest length (in theory all of them should be the same) inner_dim_lengths_cat <- vector('list', dim(other_file_dims_factor)[1]) @@ -2466,9 +2505,9 @@ Start <- function(..., # dim = indices/selectors, #sri <- NULL } } else { - if ((!is.null(file_dim)) && !(file_dim %in% names(var_file_dims))) { + if (!is.null(crossed_file_dim) & any(!(crossed_file_dim %in% names(var_file_dims)))) { stop("The variable '", var_with_selectors_name, "' must also be ", - "requested for the file dimension '", file_dim, "' in ", + "requested for the file dimension '", crossed_file_dim, "' in ", "this configuration.") } fri <- vector('list', length = prod(var_file_dims)) @@ -2563,12 +2602,12 @@ Start <- function(..., # dim = indices/selectors, unmatching_file_dims <- which(!(names(var_file_dims) %in% names(selector_file_dims))) if ((length(unmatching_file_dims) > 0)) { raise_error <- FALSE - if (is.null(file_dim)) { + if (is.null(crossed_file_dim)) { raise_error <- TRUE } else { - if (!((length(unmatching_file_dims) == 1) && - (names(var_file_dims)[unmatching_file_dims] == file_dim) && - (inner_dim %in% names(selector_inner_dims)))) { + if (!(length(unmatching_file_dims) == 1 & + names(var_file_dims)[unmatching_file_dims] %in% crossed_file_dim & + inner_dim %in% names(selector_inner_dims))) { raise_error <- TRUE } } @@ -2583,31 +2622,34 @@ Start <- function(..., # dim = indices/selectors, } if (any(names(selector_file_dims) %in% names(dim(var_with_selectors)))) { if (any(dim(var_with_selectors)[names(selector_file_dims)] != selector_file_dims)) { - stop("Size of selector file dimensions must mach size of requested ", + stop("Size of selector file dimensions must match size of the corresponding ", "variable dimensions.") } } } ## TODO: If var dimensions are not in the same order as selector dimensions, reorder if (is.null(names(selector_file_dims))) { - if (is.null(file_dim)) { + if (is.null(crossed_file_dim)) { fri_dims <- 1 } else { fri_dims <- chunk_amount - names(fri_dims) <- file_dim + names(fri_dims) <- crossed_file_dim } } else { fri_dim_names <- names(selector_file_dims) - if (!is.null(file_dim)) { - fri_dim_names <- c(fri_dim_names, file_dim) + if (!is.null(crossed_file_dim)) { + fri_dim_names <- c(fri_dim_names, crossed_file_dim) } fri_dim_names <- found_file_dims[[i]][which(found_file_dims[[i]] %in% fri_dim_names)] fri_dims <- rep(NA, length(fri_dim_names)) names(fri_dims) <- fri_dim_names fri_dims[names(selector_file_dims)] <- selector_file_dims - if (!is.null(file_dim)) { - fri_dims[file_dim] <- chunk_amount - } + #NOTE: Not sure how it works here, but "chunk_amount" is the same as + # "selector_file_dims" above in the cases we've seen so far, + # and it causes problem when crossed_file_dim is more than one. +# if (!is.null(crossed_file_dim)) { +# fri_dims[crossed_file_dim] <- chunk_amount +# } } fri <- vector('list', length = prod(fri_dims)) dim(fri) <- fri_dims @@ -2651,13 +2693,13 @@ Start <- function(..., # dim = indices/selectors, print(str(sub_array_of_values)) print(dim(sub_array_of_values)) print("-> NAME OF THE FILE DIMENSION THE CURRENT INNER DIMENSION EXTENDS ALONG:") - print(file_dim) + print(crossed_file_dim) } } # The inner dim selector is an array in which have file dim (e.g., time = [sdate = 2, time = 4], # or the inner dim doesn't go across any file dim (e.g., no time_across = 'sdate') - if ((!is.null(file_dim) && (file_dim %in% names(selector_file_dims))) || is.null(file_dim)) { + if ((!is.null(crossed_file_dim) & (any(crossed_file_dim %in% names(selector_file_dims)))) || is.null(crossed_file_dim)) { if (length(sub_array_of_selectors) > 0) { if (debug) { if (inner_dim %in% dims_to_check) { @@ -3173,11 +3215,15 @@ Start <- function(..., # dim = indices/selectors, } fri <- do.call('[[<-', c(list(x = fri), as.list(selector_store_position), list(value = sub_array_of_fri))) - if (!is.null(file_dim)) { - taken_chunks[selector_store_position[[file_dim]]] <- TRUE - } else { - taken_chunks <- TRUE - } + + #NOTE: This part existed always but never was used. taken_chunks + # is related to merge_across_dims, but I don't know how it is + # used (maybe for higher efficiency?) +# if (!is.null(crossed_file_dim)) { +# taken_chunks[selector_store_position[[crossed_file_dim]]] <- TRUE +# } else { + taken_chunks <- TRUE +# } } } else { # The inner dim goes across a file dim (e.g., time_across = 'sdate') @@ -3186,7 +3232,7 @@ Start <- function(..., # dim = indices/selectors, print("-> THE INNER DIMENSION GOES ACROSS A FILE DIMENSION.") } } - # If "_across = + merge_across_dims = FALSE + chunk over ", return error because this instance is not logically correct. + # If "_across = + merge_across_dims = FALSE + chunk over ", return error because this instance is not logically correct. if (chunks[[inner_dim]]["n_chunks"] > 1 & inner_dim %in% inner_dims_across_files) { stop("Chunk over dimension '", inner_dim, "' is not allowed because '", inner_dim, "' is across '", @@ -3282,7 +3328,7 @@ Start <- function(..., # dim = indices/selectors, for (chunk in 1:chunk_amount) { if (!is.null(names(selector_store_position))) { - selector_store_position[file_dim] <- chunk + selector_store_position[crossed_file_dim] <- chunk } else { selector_store_position <- chunk } @@ -3344,7 +3390,7 @@ Start <- function(..., # dim = indices/selectors, } } else { stop("Provided array of indices for dimension '", inner_dim, "', ", - "which goes across the file dimension '", file_dim, "', but ", + "which goes across the file dimension '", crossed_file_dim, "', but ", "the provided array does not have the dimension '", inner_dim, "', which is mandatory.") } @@ -3384,12 +3430,12 @@ Start <- function(..., # dim = indices/selectors, # end_chunks_to_remove <- empty_chunks[first_chunk_to_remove:length(empty_chunks)] # chunks_to_keep <- which(!((1:length(taken_chunks)) %in% c(start_chunks_to_remove, end_chunks_to_remove))) chunks_to_keep <- which(taken_chunks) - dims_to_crop[[file_dim]] <- c(dims_to_crop[[file_dim]], list(chunks_to_keep)) - # found_indices <- Subset(found_indices, file_dim, chunks_to_keep) + dims_to_crop[[crossed_file_dim]] <- c(dims_to_crop[[crossed_file_dim]], list(chunks_to_keep)) + # found_indices <- Subset(found_indices, crossed_file_dim, chunks_to_keep) # # Crop dataset variables file dims. # for (picked_var in names(picked_vars[[i]])) { - # if (file_dim %in% names(dim(picked_vars[[i]][[picked_var]]))) { - # picked_vars[[i]][[picked_var]] <- Subset(picked_vars[[i]][[picked_var]], file_dim, chunks_to_keep) + # if (crossed_file_dim %in% names(dim(picked_vars[[i]][[picked_var]]))) { + # picked_vars[[i]][[picked_var]] <- Subset(picked_vars[[i]][[picked_var]], crossed_file_dim, chunks_to_keep) # } # } } @@ -3462,8 +3508,61 @@ Start <- function(..., # dim = indices/selectors, common_vars_to_crop[[common_var_to_crop]] <- Subset(transformed_var_with_selectors, inner_dim, crop_indices) } - } else { #old code - common_vars_to_crop[[common_var_to_crop]] <- Subset(common_vars_to_crop[[common_var_to_crop]], inner_dim, crop_indices) + } else { + if (!is.null(crossed_file_dim)) { #merge_across_dims, crossed_file_dim is the depended file dim + #NOTE: When is not this case??? Maybe this condition is not needed + if (any(crossed_file_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]])))) { + tmp <- common_vars_to_crop[[common_var_to_crop]] + tmp_attributes <- attributes(common_vars_to_crop[[common_var_to_crop]]) + dim_extra_ind <- which(!names(dim(tmp)) %in% c(crossed_file_dim, inner_dim)) + if (!identical(dim_extra_ind, integer(0))) { + tmp_list <- asplit(tmp, dim_extra_ind) + dim_file_ind <- which(names(dim(tmp_list[[1]])) %in% crossed_file_dim) + tmp_list <- lapply(tmp_list, asplit, dim_file_ind) + } else { # only crossed_file_dim and inner_dim + dim_file_ind <- which(names(dim(tmp)) %in% crossed_file_dim) + tmp_list <- asplit(tmp, dim_file_ind) + # Add another layer to be consistent with the first case above + tmp_list <- list(tmp_list) + } + max_fri_length <- max(sapply(fri, length)) + for (i_extra_dim in 1:length(tmp_list)) { + for (i_fri in 1:length(fri)) { + tmp_list[[i_extra_dim]][[i_fri]] <- + tmp_list[[i_extra_dim]][[i_fri]][fri[[i_fri]]] + + if (length(tmp_list[[i_extra_dim]][[i_fri]]) != max_fri_length) { + tmp_list[[i_extra_dim]][[i_fri]] <- + c(tmp_list[[i_extra_dim]][[i_fri]], rep(NA, max_fri_length - length(tmp_list[[i_extra_dim]][[i_fri]]))) + } + } + } + # Change list back to array + tmp_new_dim <- c(max_fri_length, dim(tmp)[crossed_file_dim], dim(tmp)[dim_extra_ind]) + names(tmp_new_dim) <- c(inner_dim, crossed_file_dim, names(dim(tmp))[dim_extra_ind]) + common_vars_to_crop[[common_var_to_crop]] <- + array(unlist(tmp_list), dim = tmp_new_dim) + # Reorder back + common_vars_to_crop[[common_var_to_crop]] <- + aperm(common_vars_to_crop[[common_var_to_crop]], match(names(dim(tmp)), names(tmp_new_dim))) + # Put attributes back + tmp <- which(!names(tmp_attributes) %in% names(attributes(common_vars_to_crop[[common_var_to_crop]]))) + attributes(common_vars_to_crop[[common_var_to_crop]]) <- + c(attributes(common_vars_to_crop[[common_var_to_crop]]), + tmp_attributes[tmp]) + + if ('time' %in% synonims[[common_var_to_crop]]) { + # Convert number back to time + common_vars_to_crop[[common_var_to_crop]] <- + as.POSIXct(common_vars_to_crop[[common_var_to_crop]], + origin = "1970-01-01", tz = 'UTC') + } + } + } else { # old code + + common_vars_to_crop[[common_var_to_crop]] <- Subset(common_vars_to_crop[[common_var_to_crop]], inner_dim, crop_indices) + } + } } @@ -3602,6 +3701,7 @@ Start <- function(..., # dim = indices/selectors, # Find the dimension to split if split_multiselected_dims = TRUE. # If there is no dimension able to be split, change split_multiselected_dims to FALSE. all_split_dims <- NULL + inner_dim_has_split_dim <- NULL if (split_multiselected_dims) { tmp <- dims_split(dim_params, final_dims_fake) final_dims_fake <- tmp[[1]] @@ -3612,6 +3712,16 @@ Start <- function(..., # dim = indices/selectors, split_multiselected_dims <- FALSE .warning(paste0("Not found any dimensions able to be split. The parameter ", "'split_multiselected_dims' is changed to FALSE.")) + } else { + tmp_fun <- function (x, y) { + any(names(dim(x)) %in% y) + } + inner_dim_has_split_dim <- names(which(unlist(lapply( + picked_common_vars, tmp_fun, names(all_split_dims))))) + if (!identical(inner_dim_has_split_dim, character(0))) { + # If merge_across_dims also, it will be replaced later + saved_reshaped_attr <- attr(picked_common_vars[[inner_dim_has_split_dim]], 'variables') + } } } #====================================================================== @@ -3623,6 +3733,9 @@ Start <- function(..., # dim = indices/selectors, across_inner_dim <- inner_dims_across_files[[1]] #TODO: more than one? # Get the length of each inner_dim ('time') along each file_dim ('file_date') length_inner_across_dim <- lapply(dat[[i]][['selectors']][[across_inner_dim]][['fri']], length) + dims_of_merge_dim <- dim(picked_common_vars[[across_inner_dim]]) + # Save attributes for later use. If split_multiselected_dims, this variable has been created above but is replaced here + saved_reshaped_attr <- attr(picked_common_vars[[across_inner_dim]], 'variables') if (merge_across_dims_narm & !split_multiselected_dims) { final_dims_fake <- merge_narm_dims(final_dims_fake, across_inner_dim, length_inner_across_dim) @@ -3668,6 +3781,28 @@ Start <- function(..., # dim = indices/selectors, all_split_dims[[1]] <- tmp[[2]] } } + if (merge_across_dims | split_multiselected_dims) { + if (!merge_across_dims & split_multiselected_dims & identical(inner_dim_has_split_dim, character(0))) { + final_dims_fake_metadata <- NULL + } else { + if (!merge_across_dims & split_multiselected_dims) { + if (any(names(all_split_dims[[1]]) %in% names(dim(picked_common_vars[[inner_dim_has_split_dim]]))) & + names(all_split_dims)[1] != inner_dim_has_split_dim) { + if (inner_dim_has_split_dim %in% names(final_dims)) { + stop("Detect inner dimension in the split array, but merge_across_dims is not used. The output dimensions will be repeated. Check if the dimensions and parameters are correctly defined.") + } else { + # Only split no merge, time dim is not explicitly defined because the + # length is 1, the sdate dim to be split having 'time' as one dimension. + # --> Take 'time' dim off from picked_common_vars. + dim(picked_common_vars[[inner_dim_has_split_dim]]) <- dim(picked_common_vars[[inner_dim_has_split_dim]])[-which(names(dim(picked_common_vars[[inner_dim_has_split_dim]])) == inner_dim_has_split_dim)] + } + } + } + final_dims_fake_metadata <- find_final_dims_fake_metadata( + merge_across_dims, split_multiselected_dims, picked_common_vars = picked_common_vars[[inner_dim_has_split_dim]], across_inner_dim, + final_dims_fake, dims_of_merge_dim, all_split_dims) + } + } # The following several lines will only run if retrieve = TRUE if (retrieve) { @@ -3781,48 +3916,103 @@ Start <- function(..., # dim = indices/selectors, } } #print("P") - - # NOTE: If merge_across_dims = TRUE, there might be additional NAs due to - # unequal inner_dim ('time') length across file_dim ('file_date'). - # If merge_across_dims_narm = TRUE, add additional lines to remove these NAs. + + # If merge_across_dims = TRUE, there might be additional NAs due to unequal + # inner_dim ('time') length across file_dim ('file_date'). + # If merge_across_dims_narm = TRUE, add additional lines to remove these NAs. # TODO: Now it assumes that only one '_across'. Add a for loop for more-than-one case. if (merge_across_dims & (split_multiselected_dims | merge_across_dims_narm)) { if (!merge_across_dims_narm) { data_array_tmp <- array(bigmemory::as.matrix(data_array), dim = final_dims) + tmp <- match(names(final_dims), names(dims_of_merge_dim)) + if (any(diff(tmp[!is.na(tmp)]) < 0)) { #need to reorder + picked_common_vars[[across_inner_dim]] <- .aperm2(picked_common_vars[[across_inner_dim]], tmp[!is.na(tmp)]) + } + metadata_tmp <- picked_common_vars[[across_inner_dim]] } else { - data_array_tmp <- remove_additional_na_from_merge( - inner_dims_across_files, final_dims, across_inner_dim, - length_inner_across_dim, data_array) + tmp <- remove_additional_na_from_merge( + data_array = bigmemory::as.matrix(data_array), + merge_dim_metadata = picked_common_vars[[across_inner_dim]], + inner_dims_across_files, final_dims, + length_inner_across_dim) + data_array_tmp <- tmp$data_array + metadata_tmp <- tmp$merge_dim_metadata } if (length(data_array_tmp) != prod(final_dims_fake)) { stop(paste0("After reshaping, the data do not fit into the expected output dimension. ", "Check if the reshaping parameters are used correctly.")) } - + if (length(metadata_tmp) != prod(final_dims_fake_metadata)) { + stop(paste0("After reshaping, the metadata do not fit into the expected output dimension. ", + "Check if the reshaping parameters are used correctly or contact support.")) + } + #NOTE: When one file contains values for dicrete dimensions, rearrange the # chunks (i.e., work_piece) is necessary. if (split_multiselected_dims) { - data_array_tmp <- rebuild_array_merge_split( - data_array_tmp, indices_chunk, all_split_dims, final_dims_fake, - across_inner_dim, length_inner_across_dim) + tmp <- rebuild_array_merge_split( + data_array = data_array_tmp, metadata = metadata_tmp, indices_chunk, + all_split_dims, final_dims_fake, across_inner_dim, length_inner_across_dim) + data_array_tmp <- tmp$data_array + metadata_tmp <- tmp$metadata } - + data_array <- array(data_array_tmp, dim = final_dims_fake) - + metadata_tmp <- array(metadata_tmp, dim = final_dims_fake_metadata) + + # If split_multiselected_dims + merge_across_dims, the dimension order may change above. + # To get the user-required dim order, we need to reorder the array again. + if (split_multiselected_dims) { + if (inner_dim_pos_in_split_dims != 1) { + correct_order <- match(names(final_dims_fake_output), names(final_dims_fake)) + data_array <- .aperm2(data_array, correct_order) + correct_order_metadata <- match(names(final_dims_fake_output), names(all_split_dims[[across_inner_dim]])) + metadata_tmp <- .aperm2(metadata_tmp, correct_order_metadata[!is.na(correct_order_metadata)]) + } + } + # Convert numeric back to dates + if ('time' %in% synonims[[across_inner_dim]]) { + metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') + } + + picked_common_vars[[across_inner_dim]] <- metadata_tmp + attr(picked_common_vars[[across_inner_dim]], 'variables') <- saved_reshaped_attr + } else { # ! (merge_across_dims + split_multiselected_dims) (old version) data_array <- array(bigmemory::as.matrix(data_array), dim = final_dims_fake) - } - - # NOTE: If split_multiselected_dims + merge_across_dims, the dimension order may change above. - # To get the user-required dim order, we need to reorder the array again. - if (split_multiselected_dims & merge_across_dims) { - if (inner_dim_pos_in_split_dims != 1) { - correct_order <- match(names(final_dims_fake_output), names(final_dims_fake)) - data_array <- .aperm2(data_array, correct_order) + if (merge_across_dims) { + # merge_across_dims = TRUE but (merge_across_dims_narm = F & split_multiselected_dims = F) + + inner_dim_pos <- which(names(dims_of_merge_dim) == inner_dims_across_files) + file_dim_pos <- which(names(dims_of_merge_dim) == names(inner_dims_across_files)) + if (file_dim_pos < inner_dim_pos) { #need to reorder + tmp <- seq(1, length(dims_of_merge_dim)) + tmp[inner_dim_pos] <- file_dim_pos + tmp[file_dim_pos] <- inner_dim_pos + picked_common_vars[[across_inner_dim]] <- .aperm2(picked_common_vars[[across_inner_dim]], tmp) + } + metadata_tmp <- array(picked_common_vars[[across_inner_dim]], dim = final_dims_fake_metadata) + # Convert numeric back to dates + if ('time' %in% synonims[[across_inner_dim]]) { + metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') + } + picked_common_vars[[across_inner_dim]] <- metadata_tmp + attr(picked_common_vars[[across_inner_dim]], 'variables') <- saved_reshaped_attr + } + if (split_multiselected_dims) { + if (!identical(inner_dim_has_split_dim, character(0))) { + metadata_tmp <- array(picked_common_vars[[inner_dim_has_split_dim]], dim = final_dims_fake_metadata) + # Convert numeric back to dates + if (is(picked_common_vars[[inner_dim_has_split_dim]], 'POSIXct')) { + metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') + } + picked_common_vars[[inner_dim_has_split_dim]] <- metadata_tmp + attr(picked_common_vars[[inner_dim_has_split_dim]], 'variables') <- saved_reshaped_attr + } } } - + gc() # Load metadata and remove the metadata folder @@ -3870,7 +4060,90 @@ Start <- function(..., # dim = indices/selectors, } } # End if (retrieve) - + else { # if retrieve = FALSE, metadata still needs to reshape + + if (merge_across_dims & (split_multiselected_dims | merge_across_dims_narm)) { + if (!merge_across_dims_narm) { + tmp <- match(names(final_dims), names(dims_of_merge_dim)) + if (any(diff(tmp[!is.na(tmp)]) < 0)) { #need to reorder + picked_common_vars[[across_inner_dim]] <- .aperm2(picked_common_vars[[across_inner_dim]], tmp[!is.na(tmp)]) + } + metadata_tmp <- picked_common_vars[[across_inner_dim]] + } else { + tmp <- remove_additional_na_from_merge( + data_array = NULL, + merge_dim_metadata = picked_common_vars[[across_inner_dim]], + inner_dims_across_files, final_dims, + length_inner_across_dim) + metadata_tmp <- tmp$merge_dim_metadata + } + + if (length(metadata_tmp) != prod(final_dims_fake_metadata)) { + stop(paste0("After reshaping, the metadata do not fit into the expected output dimension. ", + "Check if the reshaping parameters are used correctly or contact support.")) + } + + #NOTE: When one file contains values for dicrete dimensions, rearrange the + # chunks (i.e., work_piece) is necessary. + if (split_multiselected_dims) { + tmp <- rebuild_array_merge_split( + data_array = NULL, metadata = metadata_tmp, indices_chunk, + all_split_dims, final_dims_fake, across_inner_dim, length_inner_across_dim) + metadata_tmp <- tmp$metadata + } + metadata_tmp <- array(metadata_tmp, dim = final_dims_fake_metadata) + + # If split_multiselected_dims + merge_across_dims, the dimension order may change above. + # To get the user-required dim order, we need to reorder the array again. + if (split_multiselected_dims) { + if (inner_dim_pos_in_split_dims != 1) { + correct_order <- match(names(final_dims_fake_output), names(final_dims_fake)) +# data_array <- .aperm2(data_array, correct_order) + correct_order_metadata <- match(names(final_dims_fake_output), names(all_split_dims[[across_inner_dim]])) + metadata_tmp <- .aperm2(metadata_tmp, correct_order_metadata[!is.na(correct_order_metadata)]) + } + } + # Convert numeric back to dates + if ('time' %in% synonims[[across_inner_dim]]) { + metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') + } + picked_common_vars[[across_inner_dim]] <- metadata_tmp + attr(picked_common_vars[[across_inner_dim]], 'variables') <- saved_reshaped_attr + } else { # ! (merge_across_dims + split_multiselected_dims) (old version) + if (merge_across_dims) { + # merge_across_dims = TRUE but (merge_across_dims_narm = F & split_multiselected_dims = F) + + inner_dim_pos <- which(names(dims_of_merge_dim) == inner_dims_across_files) + file_dim_pos <- which(names(dims_of_merge_dim) == names(inner_dims_across_files)) + if (file_dim_pos < inner_dim_pos) { #need to reorder + tmp <- seq(1, length(dims_of_merge_dim)) + tmp[inner_dim_pos] <- file_dim_pos + tmp[file_dim_pos] <- inner_dim_pos + picked_common_vars[[across_inner_dim]] <- .aperm2(picked_common_vars[[across_inner_dim]], tmp) + } + metadata_tmp <- array(picked_common_vars[[across_inner_dim]], dim = final_dims_fake_metadata) + # Convert numeric back to dates + if ('time' %in% synonims[[across_inner_dim]]) { + metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') + } + picked_common_vars[[across_inner_dim]] <- metadata_tmp + attr(picked_common_vars[[across_inner_dim]], 'variables') <- saved_reshaped_attr + } + if (split_multiselected_dims) { + if (!identical(inner_dim_has_split_dim, character(0))) { + metadata_tmp <- array(picked_common_vars[[inner_dim_has_split_dim]], dim = final_dims_fake_metadata) + # Convert numeric back to dates + if (is(picked_common_vars[[inner_dim_has_split_dim]], 'POSIXct')) { + metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') + } + picked_common_vars[[inner_dim_has_split_dim]] <- metadata_tmp + attr(picked_common_vars[[inner_dim_has_split_dim]], 'variables') <- saved_reshaped_attr + } + } + } + + } + # Change final_dims_fake back because retrieve = FALSE will use it for attributes later if (exists("final_dims_fake_output")) { final_dims_fake <- final_dims_fake_output diff --git a/R/Utils.R b/R/Utils.R index d0e850e7f8c01180ef043ca5d4b5886ea3abca61..9bf23e995b4f2c75e262ebc675547b9960feb132 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -1,4 +1,5 @@ #'@import abind +#'@importFrom methods is #'@importFrom ClimProjDiags Subset .chunk <- function(chunk, n_chunks, selectors) { if (any(chunk > n_chunks)) { @@ -806,7 +807,7 @@ readRDS(paste0(shared_dir, '/', chunk_files_original[found_chunk])) }) - if (('try-error' %in% class(array_of_chunks[[i]]))) { + if (is(array_of_chunks[[i]], 'try-error')) { message("Waiting for an incomplete file transfer...") Sys.sleep(5) } else { @@ -844,3 +845,17 @@ .KnownLatNames <- function() { known_lat_names <- c('lat', 'latitude', 'y', 'j', 'nav_lat') } + +.ReplaceElementInVector <- function(x, target, new_val) { + # x is a vector with name + # target is a string + # new_val is a vector with name + # E.g., Change [a = 2, b = 3] to [c = 1, d = 2, b = 3], then: + # x = c(a = 2, b = 3), target = 'a', new_val = c(c = 1, d = 2) + new_names <- unlist(lapply(as.list(names(x)), function(x) if (x == target) names(new_val) else x)) + new_list <- vector('list', length = length(new_names)) + for (i in 1:length(new_list)) { + new_list[[i]] <- c(new_val, x)[which(c(names(new_val), names(x)) == new_names[i])] + } + return(unlist(new_list)) +} diff --git a/R/zzz.R b/R/zzz.R index 0130724599137903e4c47d9fea52504dcd36d2fa..6b6189b3c0697892ba35a3f78eecb0db42832343 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -118,6 +118,9 @@ rebuild_dim_params <- function(dim_params, merge_across_dims, # Reallocating pairs of across file and inner dimensions if they have # to be merged. They are put one next to the other to ease merge later. if (merge_across_dims) { + if (any(!names(inner_dims_across_files) %in% names(dim_params)) | + any(!unlist(inner_dims_across_files) %in% names(dim_params))) + stop("All *_across parameters must have value as a file dimension name.") for (inner_dim_across in names(inner_dims_across_files)) { inner_dim_pos <- which(names(dim_params) == inner_dim_across) file_dim_pos <- which(names(dim_params) == inner_dims_across_files[[inner_dim_across]]) @@ -429,7 +432,7 @@ correct_return_vars <- function(inner_dim, inner_dims_across_files, found_patter file_dim_as_selector_array_dim) { # inner_dim is not in return_vars or is NULL if (is.character(file_dim_as_selector_array_dim)) { #(1) - if (file_dim_as_selector_array_dim %in% found_pattern_dim) { + if (any(file_dim_as_selector_array_dim %in% found_pattern_dim)) { stop(paste0("Found '", inner_dim, "' selector has dimension of the pattern dim '", found_pattern_dim, "', which is not allowed. To assign the dependency on the pattern dim, ", @@ -446,10 +449,10 @@ correct_return_vars <- function(inner_dim, inner_dims_across_files, found_patter corrected_value <- file_dim_name } } - .warning(paste0("Found ", inner_dim, " dependency on file diemnsion '", corrected_value, - "', but '", inner_dim, "' is not in return_vars list or is NULL. ", - "To provide the correct metadata, the value of ", inner_dim, - " in 'return_vars' is specified as '", corrected_value, "'.")) + .warning(paste0("Found '", inner_dim, "' dependency on file dimension '", corrected_value, + "', but '", inner_dim, "' is not in return_vars list or does not include '", corrected_value, + "'. To provide the correct metadata, '", corrected_value, "' is included under '", inner_dim, + "' in 'return_vars.")) return(corrected_value) } @@ -822,6 +825,28 @@ reorder_split_dims <- function(all_split_dims, inner_dim_pos_in_split_dims, fina return(list(final_dims_fake, all_split_dims)) } +# Find the final_dims_fake for metadata if it needs to be reshaped +find_final_dims_fake_metadata <- function(merge_across_dims, split_multiselected_dims, + picked_common_vars, across_inner_dim, final_dims_fake, + dims_of_merge_dim, all_split_dims) { + if (merge_across_dims) { + if (!split_multiselected_dims) { + final_dims_fake_metadata <- final_dims_fake[names(final_dims_fake) %in% names(dims_of_merge_dim)] + } else { + final_dims_fake_metadata <- final_dims_fake[names(final_dims_fake) %in% names(all_split_dims[[across_inner_dim]])] + } + } else if (split_multiselected_dims) { + target_split_dim_ind <- which(names(dim(picked_common_vars)) == names(all_split_dims)) + margin_dim_ind <- c(1:length(dim(picked_common_vars)))[-target_split_dim_ind] + if (identical(margin_dim_ind, numeric(0)) | identical(margin_dim_ind, integer(0))) { + final_dims_fake_metadata <- all_split_dims[[1]] + } else { + final_dims_fake_metadata <- .ReplaceElementInVector(dim(picked_common_vars), target = names(all_split_dims), new_val = all_split_dims[[1]]) + } + } + + return(final_dims_fake_metadata) +} # Build the work pieces. build_work_pieces <- function(work_pieces, i, selectors, file_dims, inner_dims, final_dims, @@ -873,7 +898,16 @@ build_work_pieces <- function(work_pieces, i, selectors, file_dims, inner_dims, x_dim_name <- attr(attr(selectors[[x]][['fri']], "dim"), "names") if (!is.null(x_dim_name)) { which_chunk <- file_to_load_sub_indices[x_dim_name] - selectors[[x]][['fri']][[which_chunk]] + if (length(which_chunk) > 1) { + tmp_dim <- attr(selectors[[x]][['fri']], "dim") + vec_ind <- which_chunk[1] + for (i_dim in length(tmp_dim):2) { + vec_ind <- vec_ind + (which_chunk[i_dim] - 1) * prod(tmp_dim[1:(i_dim - 1)]) + } + selectors[[x]][['fri']][[vec_ind]] + } else { #old code + selectors[[x]][['fri']][[which_chunk]] + } } else { selectors[[x]][['fri']][[1]] } @@ -889,7 +923,16 @@ build_work_pieces <- function(work_pieces, i, selectors, file_dims, inner_dims, x_dim_name <- attr(attr(selectors[[x]][['sri']], "dim"), "names") if (!is.null(x_dim_name)) { which_chunk <- file_to_load_sub_indices[x_dim_name] - selectors[[x]][['sri']][[which_chunk]] + if (length(which_chunk) > 1) { + tmp_dim <- attr(selectors[[x]][['sri']], "dim") + vec_ind <- which_chunk[1] + for (i_dim in length(tmp_dim):2) { + vec_ind <- vec_ind + (which_chunk[i_dim] - 1) * prod(tmp_dim[1:(i_dim - 1)]) + } + selectors[[x]][['sri']][[vec_ind]] + } else { #old code + selectors[[x]][['sri']][[which_chunk]] + } } else { selectors[[x]][['sri']][[1]] } @@ -1028,9 +1071,13 @@ retrieve_progress_message <- function(work_pieces, num_procs, silent) { # If merge_across_dims = TRUE and merge_across_dims_narm = TRUE, remove the additional NAs # due to unequal inner_dim ('time') length across file_dim ('sdate'). -remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims, across_inner_dim, - length_inner_across_dim, data_array) { +remove_additional_na_from_merge <- function(data_array = NULL, merge_dim_metadata = NULL, + inner_dims_across_files, final_dims, length_inner_across_dim) { + # data_array is a vector from bigmemory::as.matrix + # merge_dim_metadata is an array + across_file_dim <- names(inner_dims_across_files) #TODO: more than one? + across_inner_dim <- inner_dims_across_files[[1]] #TODO: more than one? # Get the length of these two dimensions in final_dims length_inner_across_store_dims <- final_dims[across_inner_dim] length_file_across_store_dims <- final_dims[across_file_dim] @@ -1042,37 +1089,74 @@ remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims, for (i in 1:length_file_across_store_dims) { #1:4 logi_array[1:length_inner_across_dim[[i]], i] <- TRUE } - - # First, get the data array with final_dims dimension - data_array_final_dims <- array(bigmemory::as.matrix(data_array), dim = final_dims) - + + if (!is.null(data_array)) { + # First, turn the data vector into array with final_dims + data_array_final_dims <- array(data_array, dim = final_dims) + } + # Change the NA derived from additional spaces to -9999, then remove these -9999 func_remove_blank <- function(data_array, logi_array) { # dim(data_array) = [time, file_date] # dim(logi_array) = [time, file_date] - # Change the blank spaces from NA to -9999 - data_array[which(!logi_array)] <- -9999 + # data_array can be data or metadata; if data, change the blank spaces from + # NA to -9999; if metadata (supposed to be 'time'), change the corresponding + # spaces to -12^10. + if (is(data_array, "POSIXct")) { + # change to numeric first + data_array <- array(as.vector(data_array), dim = dim(data_array)) + data_array[which(!logi_array)] <- -12^10 + } else { + data_array[which(!logi_array)] <- -9999 + } return(data_array) } - data_array_final_dims <- multiApply::Apply(data_array_final_dims, - target_dims = c(across_inner_dim, across_file_dim), #c('time', 'file_date') - output_dims = c(across_inner_dim, across_file_dim), - fun = func_remove_blank, - logi_array = logi_array)$output1 - ## reorder back to the correct dim - tmp <- match(names(final_dims), names(dim(data_array_final_dims))) - data_array_final_dims <- .aperm2(data_array_final_dims, tmp) - data_array_tmp <- data_array_final_dims[data_array_final_dims != -9999] # become a vector - - return(data_array_tmp) -} + if (!is.null(data_array)) { + data_array_final_dims <- multiApply::Apply(data_array_final_dims, + target_dims = c(across_inner_dim, across_file_dim), #c('time', 'file_date') + output_dims = c(across_inner_dim, across_file_dim), + fun = func_remove_blank, + logi_array = logi_array)$output1 + } + if (!is.null(merge_dim_metadata)) { + tmp_attr <- attributes(merge_dim_metadata)$variables + merge_dim_metadata <- multiApply::Apply(merge_dim_metadata, + target_dims = c(across_inner_dim, across_file_dim), + output_dims = c(across_inner_dim, across_file_dim), + fun = func_remove_blank, + logi_array = logi_array)$output1 + } + + if (!is.null(data_array)) { + ## reorder back to the correct dim + tmp <- match(names(final_dims), names(dim(data_array_final_dims))) + data_array_final_dims <- .aperm2(data_array_final_dims, tmp) + data_array_tmp <- data_array_final_dims[data_array_final_dims != -9999] # become a vector + } else { + data_array_tmp <- NULL + } + if (!is.null(merge_dim_metadata)) { + # Reorder metadata dim as final dim + tmp <- match(names(final_dims), names(dim(merge_dim_metadata))) + merge_dim_metadata <- aperm(merge_dim_metadata, tmp[!is.na(tmp)]) + merge_dim_metadata <- merge_dim_metadata[merge_dim_metadata != -12^10] + attr(merge_dim_metadata, 'variables') <- tmp_attr + } + + #NOTE: both outputs are vectors. If 'merge_dim_metadata' is actually time, it is just numeric here. + return(list(data_array = data_array_tmp, merge_dim_metadata = merge_dim_metadata)) +} # When merge_across_dims = TRUE and split_multiselected_dims = TRUE, rearrange the chunks # (i.e., work_piece) is necessary if one file contains values for discrete dimensions -rebuild_array_merge_split <- function(data_array_tmp, indices_chunk, all_split_dims, - final_dims_fake, across_inner_dim, length_inner_across_dim) { +rebuild_array_merge_split <- function(data_array = NULL, metadata = NULL, indices_chunk, + all_split_dims, final_dims_fake, across_inner_dim, length_inner_across_dim) { + + rebuild_data <- ifelse(is.null(data_array), FALSE, TRUE) + rebuild_metadata <- ifelse(is.null(metadata), FALSE, TRUE) + # generate the correct order list from indices_chunk final_order_list <- list() i <- 1 @@ -1103,43 +1187,101 @@ rebuild_array_merge_split <- function(data_array_tmp, indices_chunk, all_split_d if (split_dims_pos[length(split_dims_pos)] < length(final_dims_fake)) { new_dims <- c(new_dims, final_dims_fake[(split_dims_pos[length(split_dims_pos)] + 1):length(final_dims_fake)]) } - data_array_no_split <- array(data_array_tmp, dim = new_dims) - # seperate 'time' dim into each work_piece length - data_array_seperate <- list() + + if (rebuild_data) { + data_array <- array(data_array, dim = new_dims) + # seperate 'time' dim into each work_piece length + data_array_seperate <- vector('list', length = length(length_inner_across_dim)) + array_piece <- vector('list', length = length(final_order_list)) + } + if (rebuild_metadata) { + metadata <- array(metadata, dim = length(metadata)) #metadata_no_split + names(dim(metadata)) <- across_inner_dim + metadata_seperate <- vector('list', length = length(length_inner_across_dim)) + metadata_piece <- vector('list', length = length(final_order_list)) + } + tmp <- cumsum(unlist(length_inner_across_dim)) tmp <- c(0, tmp) for (i in 1:length(length_inner_across_dim)) { - data_array_seperate[[i]] <- ClimProjDiags::Subset(data_array_no_split, + if (rebuild_data) { + data_array_seperate[[i]] <- ClimProjDiags::Subset(data_array, + across_inner_dim, + (tmp[i] + 1):tmp[i + 1]) + } + if (rebuild_metadata) { + metadata_seperate[[i]] <- ClimProjDiags::Subset(metadata, across_inner_dim, (tmp[i] + 1):tmp[i + 1]) + } } + # re-build the array: chunk which_chunk <- as.numeric(names(final_order_list)) sort_which_chunk <- sort(unique(which_chunk)) which_chunk <- sapply(lapply(which_chunk, '==', sort_which_chunk), which) - how_many_indices <- unlist(final_order_list) - array_piece <- list() - ind_in_array_seperate <- as.list(rep(1, length(data_array_seperate))) + + if (rebuild_data) { + ind_in_array_seperate <- as.list(rep(1, length(data_array_seperate))) + } else if (rebuild_metadata) { + ind_in_array_seperate <- as.list(rep(1, length(metadata_seperate))) + } + for (i in 1:length(final_order_list)) { - array_piece[[i]] <- ClimProjDiags::Subset( - data_array_seperate[[which_chunk[i]]], across_inner_dim, - ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1)) + if (rebuild_data) { + array_piece[[i]] <- ClimProjDiags::Subset( + data_array_seperate[[which_chunk[i]]], across_inner_dim, + ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1)) + } + if (rebuild_metadata) { + metadata_piece[[i]] <- ClimProjDiags::Subset( + metadata_seperate[[which_chunk[i]]], across_inner_dim, + ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1)) + } ind_in_array_seperate[[which_chunk[i]]] <- ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] } - + # re-build the array: paste - data_array_tmp <- array_piece[[1]] - along_pos <- which(names(dim(data_array_tmp)) == across_inner_dim) - if (length(array_piece) > 1) { - for (i in 2:length(array_piece)) { - data_array_tmp <- abind::abind(data_array_tmp, array_piece[[i]], - along = along_pos) + if (rebuild_data) { + data_array_tmp <- array_piece[[1]] + } else { + data_array_tmp <- NULL + } + if (rebuild_metadata) { + metadata_tmp <- metadata_piece[[1]] + } else { + metadata_tmp <- NULL + } + + if (rebuild_data) { + along_pos <- which(names(dim(data_array_tmp)) == across_inner_dim) + length_piece <- length(array_piece) + } + if (rebuild_metadata) { + along_pos_metadata <- which(names(dim(metadata_tmp)) == across_inner_dim) + if (!rebuild_data) + length_piece <- length(metadata_piece) + } + + if (length_piece > 1) { + for (i in 2:length_piece) { + if (rebuild_data) { + data_array_tmp <- abind::abind(data_array_tmp, array_piece[[i]], + along = along_pos) + } + if (rebuild_metadata) { + metadata_tmp <- abind::abind(metadata_tmp, metadata_piece[[i]], + along = along_pos_metadata) + } } } - } - - return(data_array_tmp) + } else { + data_array_tmp <- data_array + metadata_tmp <- metadata + } + + return(list(data_array = data_array_tmp, metadata = metadata_tmp)) } @@ -1288,7 +1430,7 @@ generate_picked_var_of_read <- function(var_to_read, var_to_check, array_of_file if (any(names(array_var_dims) %in% names(var_file_dims))) { array_var_dims <- array_var_dims[-which(names(array_var_dims) %in% names(var_file_dims))] } - if (names(array_var_dims) != names(var_dims)) { + if (any(names(array_var_dims) != names(var_dims))) { stop("Error while reading the variable '", var_to_read, "' from ", "the file. Dimensions do not match.\nExpected ", paste(paste0("'", names(array_var_dims), "'"), collapse = ', '), @@ -1316,8 +1458,11 @@ generate_picked_var_of_read <- function(var_to_read, var_to_check, array_of_file } else { padding <- array(dim = padding_dims) } + tmp_attr <- attributes(either_picked_vars)$variables either_picked_vars <- .abind2(either_picked_vars, padding, names(full_array_var_dims)[longer_dims_in_full_array]) + attr(either_picked_vars, 'variables') <- tmp_attr + } else { stop("Error while reading the variable '", var_to_read, "' from ", "the file. Found size (", paste(var_dims, collapse = ' x '), diff --git a/inst/doc/faq.md b/inst/doc/faq.md index 689e13a2f468cc1ccf27ffb146af46047794a2c4..83065d47e3ecef79af49e958188888d7eb45f4c7 100644 --- a/inst/doc/faq.md +++ b/inst/doc/faq.md @@ -927,6 +927,10 @@ data <- Start(dat = path, region = 'sdate'), retrieve = T) ``` +The dependency can be on more than one file dimension. What you need to do is just creat +an array with the depended file dimension as the array dimension name. See more examples +in [use case ex1_13](inst/doc/usecase/ex1_13_implicit_dependency.R). + ### 23. The best practice of using vector and list for selectors There are three ways to define the selectors in Start(): `indices()`, `values()`, and character string diff --git a/inst/doc/usecase/ex1_13_implicit_dependency.R b/inst/doc/usecase/ex1_13_implicit_dependency.R index 1ea9381e53a225612e295b526656910702456503..6740a21dc31c8916dacdfc48750a0bafabf9b285 100644 --- a/inst/doc/usecase/ex1_13_implicit_dependency.R +++ b/inst/doc/usecase/ex1_13_implicit_dependency.R @@ -1,5 +1,7 @@ # Author: An-Chi Ho # Date: 13th July 2021 +# Implement case 3: 6th April 2022 + #--------------------------------------------------------------------- # This script shows how to use a value array as the inner dimension selector to express # dependency on a file dimension. By this means, we don't need to specify the *_across @@ -11,6 +13,8 @@ # index for Nino3. sdate 1993 has 'Nino3' at index 9 while sdate 2013 has 'Nino3' at # index 11. Create a value array for region selector so Start() can look for 'Nino3' in # each file. +# In the third case, 'region' is defined as an array that has dimensions 'sdate', 'member', +# and 'region'. It works if region indices is dependent on both sdate and member. #--------------------------------------------------------------------- library(startR) @@ -80,9 +84,59 @@ data[1, 1, , 1, ] #[2,] 24.47482 24.75953 # --> region index 11 in orginal file +#============================================================================= + +# Case 3: 'region' depends on 'sdate' and 'member' + +#NOTE: Actually, the region indices are not dependent on sdate in this case, but +# it should work if it is. If you have a better example, please let me know. +region <- array(c('Nino3.4', 'Nino3'), dim = c(region = 2, sdate = 3, memb = 5)) +# check the array +region[, 1, 1] +#[1] "Nino3.4" "Nino3" +region[, 1, 2] +#[1] "Nino3.4" "Nino3" +region[, 2, 2] +#[1] "Nino3.4" "Nino3" +#--> For each sdate-memb combination, the desired regions are "Nino3.4" and "Nino3". +path <- paste0('/esarchive/exp/ecearth/a42y/diags/DCPP/EC-Earth-Consortium/', + 'EC-Earth3/dcppA-hindcast/$memb$/Omon/$var$/gn/v*/', + '$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$memb$_gn_$chunk$.nc') +data <- Start(dat = path, + var = 'tosmean', + memb = paste0('r', c(24:28), 'i1p1f1'), + region = region, + region_var = 'region', + sdate = paste0(2000:2002), + time = 'all', + chunk = 'all', + chunk_depends = 'sdate', + time_across = 'chunk', + merge_across_dims = TRUE, + return_vars = list(time = c('sdate','chunk'), + region = c('sdate', 'memb')), + retrieve = T) +# Check output +## Nino3.4 +drop(data)[ , 1, , 1] +# [,1] [,2] [,3] +#[1,] 26.87246 27.28198 27.65627 +#[2,] 26.87331 27.31887 27.63275 +#[3,] 26.89038 27.31446 27.58801 +#[4,] 26.90285 27.26750 27.66004 +#[5,] 26.88851 27.28953 27.68499 + +## Nino3 + drop(data)[ , 2, , 1] +# [,1] [,2] [,3] +#[1,] 26.58774 26.38932 26.80643 +#[2,] 26.58879 26.43760 26.68655 +#[3,] 26.59319 26.41373 26.64150 +#[4,] 26.69607 26.40465 26.69096 +#[5,] 26.59114 26.40454 26.71252 diff --git a/tests/testthat/test-Start-calendar.R b/tests/testthat/test-Start-calendar.R index 4f58a8744cfd69504eceed8883c5b7f3b6a1758e..53122c8368db45c9248beb5e4fc9eb66ed903fe7 100644 --- a/tests/testthat/test-Start-calendar.R +++ b/tests/testthat/test-Start-calendar.R @@ -48,7 +48,7 @@ expect_equal( }) test_that("2. 365_day, daily, unit = 'days since 1984-01-01'", { -path_bcc_csm2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/bcc-csm2-mr/cmip6-dcppA-hindcast_i1p1/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20191219/$var$_day_BCC-CSM2-MR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_19800101-19891231.nc' +path_bcc_csm2 <- '/esarchive/exp/CMIP6/dcppA-hindcast/bcc-csm2-mr/cmip6-dcppA-hindcast_i1p1/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r1i1p1f1/day/$var$/gn/v20200114/$var$_day_BCC-CSM2-MR_dcppA-hindcast_s$sdate$-r1i1p1f1_gn_19800101-19891231.nc' suppressWarnings( data <- Start(dat = path_bcc_csm2, @@ -212,9 +212,13 @@ suppressWarnings( ) expect_equal( - attr(obs, 'Variables')$common$time[1, 1], + attr(obs, 'Variables')$common$time[1], as.POSIXct('2005-05-16 12:00:00', tz = 'UTC') ) +expect_equal( + dim(attr(obs, 'Variables')$common$time), + c(time = 1) +) }) diff --git a/tests/testthat/test-Start-implicit_dependency_by_selector.R b/tests/testthat/test-Start-implicit_dependency_by_selector.R index 5c2f050ed90bbf23199a6e13f1ae40adae9375b3..10e5545fcb80f3af7eefed32a8f00dadd65feacc 100644 --- a/tests/testthat/test-Start-implicit_dependency_by_selector.R +++ b/tests/testthat/test-Start-implicit_dependency_by_selector.R @@ -115,4 +115,43 @@ tolerance = 0.0001 ) +}) + +test_that("3. region depends on member and sdate", { +#NOTE: This case, region indices are not dependent on sdate. But it should work if it is +reg <- array('Nino3.4', dim = c(sdate = 3, memb = 2, region = 1)) + +path_SR <- paste0('/esarchive/exp/ecearth/a42y/diags/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$memb$/Omon/$var$/gn/v*/$var$_Omon_EC-Earth3_dcppA-hindcast_s$sdate$-$memb$_gn_$chunk$.nc') +suppressWarnings( +data <- Start(dat = path_SR, + var = 'tosmean', + memb = paste0('r', c(24, 28), 'i1p1f1'), + region = reg, + region_var = 'region', + sdate = paste0(2000:2002), + time = c(1:4), + chunk = 'all', + chunk_depends = 'sdate', + time_across = 'chunk', + merge_across_dims = TRUE, + return_vars = list(time=c('sdate','chunk'), region=c('sdate', 'memb')), + retrieve = T) +) + +expect_equal( +dim(data), +c(dat = 1, var = 1, memb = 2, region = 1, sdate = 3, time = 4) +) +expect_equal( +as.vector(drop(data)[,,1]), +c(26.87246, 26.88851, 27.28198, 27.28953, 27.65627, 27.68499), +tolerance = 0.0001 +) + +expect_equal( +dim(attr(data, 'Variables')$common$region), +c(memb = 2, sdate = 3, region = 1) +) + + }) diff --git a/tests/testthat/test-Start-implicit_inner_dim.R b/tests/testthat/test-Start-implicit_inner_dim.R index 6a3262a6fe17373fb0d7df8469f593b545e76744..3788af0f48c41729cf56bcc2a00049bda3561c7e 100644 --- a/tests/testthat/test-Start-implicit_inner_dim.R +++ b/tests/testthat/test-Start-implicit_inner_dim.R @@ -3,6 +3,7 @@ context("Start() implicit inner dimension") # startR allows it not to be specified in the call. Users can still define it in # 'return_vars'. #--------------------------------------------------------------- +#NOTE: Also useful for test-Start-time_unit.R test3 test_that("1. time = 1", { diff --git a/tests/testthat/test-Start-metadata_filedim_dependency.R b/tests/testthat/test-Start-metadata_filedim_dependency.R index a5fa558cb8a76ad5ff6542c5678c2c7187e17226..cfd7dfb1d3559a7b67ed0afdfe8633998412eae2 100644 --- a/tests/testthat/test-Start-metadata_filedim_dependency.R +++ b/tests/testthat/test-Start-metadata_filedim_dependency.R @@ -65,7 +65,7 @@ expect_equal( return_vars = list(time = NULL), retrieve = TRUE) ) - time6 <- attr(test4, 'Variables')$common$time + time6 <- attr(test6, 'Variables')$common$time expect_equal( dim(time6), @@ -101,7 +101,7 @@ expect_equal( return_vars = list(time = NULL), retrieve = TRUE) ) - time6a <- attr(test4, 'Variables')$common$time + time6a <- attr(test6a, 'Variables')$common$time expect_equal( dim(time6a), diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R new file mode 100644 index 0000000000000000000000000000000000000000..d97cd18f4f7654e41f741d76fb1411cd58d5bbaa --- /dev/null +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -0,0 +1,752 @@ +context("Start() metadata reshaping") +# When data is reshaping (e.g., time_across = 'sdate'), the corresponding attribute should be reshaped too. + +test_that("1. time across fyear, fyear depends on sdate", { + +suppressWarnings( +data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960:1961), + time = 62:426, ## Jan to Dec (initialised in Nov) + time_across = 'fyear', + merge_across_dims = TRUE, + fyear = 'all', + fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude')), + return_vars = list(lat = NULL, lon = NULL, + time = c('sdate', 'fyear')), + retrieve = TRUE) +) +dates <- attr(data,'Variables')$common[['time']] + +expect_equal( +dim(dates), +c(sdate = 2, time = 365) +) +expect_equal( +dim(drop(data)), +dim(dates) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +as.vector(dates[1, ]), +as.vector(seq(as.POSIXct('1961-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1961-12-31 12:00:00', tz = 'UTC'), by = 'day')) +) +expect_equal( +as.vector(dates[2, ]), +as.vector(seq(as.POSIXct('1962-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1962-12-31 12:00:00', tz = 'UTC'), by = 'day')) +) + +# retrieve = FALSE +suppressWarnings( +dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960:1961), + time = 62:426, ## Jan to Dec (initialised in Nov) + time_across = 'fyear', + merge_across_dims = TRUE, + fyear = 'all', + fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude')), + return_vars = list(lat = NULL, lon = NULL, + time = c('sdate', 'fyear')), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + +}) + + +test_that("2. time across fyear, only one sdate", { + +suppressWarnings( +data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960), + time = 62:426, ## Jan to Dec (initialised in Nov) + time_across = 'fyear', + merge_across_dims = TRUE, + fyear = 'all', +# fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude')), + return_vars = list(lat = NULL, lon = NULL, + time = c('fyear')), + retrieve = TRUE) +) +dates <- attr(data,'Variables')$common[['time']] + + +expect_equal( +dim(dates), +c(time = 365) +) +expect_equal( +length(data), +length(dates) +) +expect_equal( +as.vector(dates), +as.vector(seq(as.POSIXct('1961-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1961-12-31 12:00:00', tz = 'UTC'), by = 'day')) +) + +#retrieve = FALSE +suppressWarnings( +dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960), + time = 62:426, ## Jan to Dec (initialised in Nov) + time_across = 'fyear', + merge_across_dims = TRUE, + fyear = 'all', +# fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude')), + return_vars = list(lat = NULL, lon = NULL, + time = c('fyear')), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + +}) + +test_that("3. time across fyear, fyear depends on sdate, 1st fyear is empty, 3rd fyear has more indices than 2nd one, 1964 is leap year", { + +suppressWarnings( +data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960:1961), + day = 700:860, ## initialised in Nov + day_across = 'fyear', + merge_across_dims = TRUE, + fyear = 'all', + fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude'), + day = c('day', 'time')), + return_vars = list(lat = NULL, lon = NULL, + day = c('sdate', 'fyear')), + retrieve = TRUE) +) +dates <- attr(data,'Variables')$common[['day']] + +expect_equal( +dim(dates), +c(sdate = 2, day = 161) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +as.vector(dates[1, ]), +as.vector(seq(as.POSIXct('1962-10-01 12:00:00', tz = 'UTC'), as.POSIXct('1963-03-10 12:00:00', tz = 'UTC'), by = 'day')) +) +expect_equal( +as.vector(dates[2, ]), +as.vector(seq(as.POSIXct('1963-10-01 12:00:00', tz = 'UTC'), as.POSIXct('1964-03-09 12:00:00', tz = 'UTC'), by = 'day')) +) + + +suppressWarnings( +dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960:1961), + day = 700:860, ## initialised in Nov + day_across = 'fyear', + merge_across_dims = TRUE, + fyear = 'all', + fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude'), + day = c('day', 'time')), + return_vars = list(lat = NULL, lon = NULL, + day = c('sdate', 'fyear')), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['day']] +expect_equal( +datesF, +dates +) + +}) + + +test_that("4. merge and split time dim", { +datess <- seq(as.POSIXct('1994-05-01', tz = 'UTC'), as.POSIXct('1994-12-31', tz = 'UTC'), by = 'days') +datess <- c(datess[c(1:31, 32:62, 62:92, 93:123, 124:154, 154:184, 185:215, 215:245)]) +datess <- as.POSIXct(array(datess, dim = c(time = 31, sdate = 8)), + origin = '1970-01-01', tz = 'UTC') +dates_file <- sort(unique(gsub('-', '', sapply(as.character(datess), + substr, 1, 7)))) + +suppressWarnings( + data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_$file_date$.nc'), + var = 'tas', + file_date = dates_file, + time = values(datess), #[time = 31, sdate = 8] + latitude = indices(1), + longitude = indices(1), + time_var = 'time', + #because time is assigned by 'values', set the tolerance to avoid too distinct match + time_tolerance = as.difftime(1, units = 'hours'), + #time values are across all the files + time_across = 'file_date', + #combine time and file_date dims + merge_across_dims = TRUE, + #exclude the additional NAs generated by merge_across_dims + merge_across_dims_narm = TRUE, + #split time dim, because it is two-dimensional + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = 'file_date'), + retrieve = TRUE) +) + +dates <- attr(data,'Variables')$common[['time']] + +expect_equal( +dim(dates), +c(time = 31, sdate = 8) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +as.vector(dates), +as.vector(datess) +) + +suppressWarnings( + dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_$file_date$.nc'), + var = 'tas', + file_date = dates_file, + time = values(datess), #[time = 31, sdate = 8] + latitude = indices(1), + longitude = indices(1), + time_var = 'time', + #because time is assigned by 'values', set the tolerance to avoid too distinct match + time_tolerance = as.difftime(1, units = 'hours'), + #time values are across all the files + time_across = 'file_date', + #combine time and file_date dims + merge_across_dims = TRUE, + #exclude the additional NAs generated by merge_across_dims + merge_across_dims_narm = TRUE, + #split time dim, because it is two-dimensional + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = 'file_date'), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + +}) + +test_that("5. test 1 but merge_across_dims_narm = F", { + +suppressWarnings( +data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960:1961), + time = 62:426, ## Jan to Dec (initialised in Nov) + time_across = 'fyear', + merge_across_dims = TRUE, + merge_across_dims_narm = F, + fyear = 'all', + fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude')), + return_vars = list(lat = NULL, lon = NULL, + time = c('sdate', 'fyear')), + retrieve = TRUE) +) +dates <- attr(data,'Variables')$common[['time']] + +expect_equal( +dim(dates), +c(sdate = 2, time = 608) +) +expect_equal( +dim(drop(data)), +dim(dates) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +as.vector(dates[1, ]), +c(as.vector(seq(as.POSIXct('1961-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1961-12-31 12:00:00', tz = 'UTC'), by = 'day')), rep(NA, 243)) +) +expect_equal( +as.vector(dates[2, ]), +c(as.vector(seq(as.POSIXct('1962-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1962-12-31 12:00:00', tz = 'UTC'), by = 'day')), rep(NA, 243)) +) + +suppressWarnings( +dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960:1961), + time = 62:426, ## Jan to Dec (initialised in Nov) + time_across = 'fyear', + merge_across_dims = TRUE, + merge_across_dims_narm = F, + fyear = 'all', + fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude')), + return_vars = list(lat = NULL, lon = NULL, + time = c('sdate', 'fyear')), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + +}) + +test_that("6. split time dim only", { + +datess <- seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-07-14', tz = 'UTC'), by = 'days') +datess <- as.POSIXct(array(datess, dim = c(time = 7, week = 2)), + origin = '1970-01-01', tz = 'UTC') +suppressWarnings( +data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_199407.nc'), + var = 'tas', +# file_date = '199407', + time = values(datess), #[time = 7, week = 2] + latitude = indices(1), + longitude = indices(1), + time_var = 'time', + time_tolerance = as.difftime(1, units = 'hours'), +# time_across = 'file_date', +# merge_across_dims = TRUE, + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = NULL), #'file_date'), + retrieve = TRUE) +) +dates <- attr(data,'Variables')$common[['time']] + + +expect_equal( +dim(dates), +c(time = 7, week = 2) +) +expect_equal( +dim(drop(data)), +dim(dates) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +as.vector(dates[, ]), +as.vector(seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-07-14', tz = 'UTC'), by = 'day')) +) + +suppressWarnings( +dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_199407.nc'), + var = 'tas', +# file_date = '199407', + time = values(datess), #[time = 7, week = 2] + latitude = indices(1), + longitude = indices(1), + time_var = 'time', + time_tolerance = as.difftime(1, units = 'hours'), +# time_across = 'file_date', +# merge_across_dims = TRUE, + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = NULL), #'file_date'), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + +}) + +test_that("7. split dim + merge + merge_narm = F", { + +datess <- seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-08-31', tz = 'UTC'), by = 'days') +datess <- as.POSIXct(array(datess, dim = c(time = 31, month = 2)), + origin = '1970-01-01', tz = 'UTC') + +suppressWarnings( +data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_$file_date$.nc'), + var = 'tas', + file_date = c('199407', '199408'), + time = values(datess), #[time = 31, month = 2] + latitude = indices(1), + longitude = indices(1), + time_var = 'time', + time_tolerance = as.difftime(1, units = 'hours'), + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = F, + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = 'file_date'), + retrieve = TRUE) +) +dates <- attr(data,'Variables')$common[['time']] + + +expect_equal( +dim(dates), +c(time = 31, month = 2) +) +expect_equal( +dim(drop(data)), +dim(dates) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +as.vector(dates[, ]), +as.vector(seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-08-31', tz = 'UTC'), by = 'day')) +) + +suppressWarnings( +dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_$file_date$.nc'), + var = 'tas', + file_date = c('199407', '199408'), + time = values(datess), #[time = 31, month = 2] + latitude = indices(1), + longitude = indices(1), + time_var = 'time', + time_tolerance = as.difftime(1, units = 'hours'), + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = F, + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = 'file_date'), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + +}) + + +test_that("8. split sdate dim", { + +file_date <- array(c(paste0(1993:1995, '07'), paste0(1993:1995, '08')), + dim = c(syear = 3, smonth = 2)) +suppressWarnings( +data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_$file_date$.nc'), + var = 'tas', + file_date = file_date, #[syear = 3, smonth = 2] + time = indices(1:2), + latitude = indices(1), + longitude = indices(1), + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = 'file_date'), + retrieve = TRUE) +) +dates <- attr(data,'Variables')$common[['time']] + + +expect_equal( +dim(dates), +c(syear = 3, smonth = 2, time = 2) +) +expect_equal( +dim(drop(data)), +dim(dates) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +dates[, 1, 1], +seq(as.POSIXct('1993-07-01', tz = 'UTC'), as.POSIXct('1995-07-01', tz = 'UTC'), by = 'year') +) +expect_equal( +dates[, 2, 2], +seq(as.POSIXct('1993-08-01 06:00:00', tz = 'UTC'), as.POSIXct('1995-08-01 06:00:00', tz = 'UTC'), by = 'year') +) + +suppressWarnings( +dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_$file_date$.nc'), + var = 'tas', + file_date = file_date, #[syear = 3, smonth = 2] + time = indices(1:2), + latitude = indices(1), + longitude = indices(1), + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = 'file_date'), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + +}) + +test_that("9. split file dim that contains 'time', and 'time' inner dim is implicit", { + +dates_arr <- array(c(paste0(1961, '0', 1:5), paste0(1962, '0', 1:5)), dim = c(time = 5, syear = 2)) + +suppressWarnings( +data <- Start(dat = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$file_date$.nc', + var = 'tas', + file_date = dates_arr, # [syear, time] + split_multiselected_dims = TRUE, + latitude = indices(1), + longitude = indices(1), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = NULL, longitude = NULL, + time = 'file_date'), + retrieve = TRUE) +) + +dates <- attr(data, 'Variables')$common$time + + +expect_equal( +dim(dates), +c(time = 5, syear = 2) +) +expect_equal( +dim(drop(data)), +dim(dates) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +format(dates, '%Y%m'), +as.vector(dates_arr) +) + + +suppressWarnings( +dataF <- Start(dat = '/esarchive/recon/jma/jra55/monthly_mean/$var$_f6h/$var$_$file_date$.nc', + var = 'tas', + file_date = dates_arr, # [syear, time] + split_multiselected_dims = TRUE, + latitude = indices(1), + longitude = indices(1), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = NULL, longitude = NULL, + time = 'file_date'), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + +}) + +test_that("10. implicit dependency, leap year", { + +a <- as.POSIXct('1995-02-27 18:00:00', tz = 'UTC') +b <- as.POSIXct('1995-03-01 18:00:00', tz = 'UTC') +y1 <- seq(a, b, by = 'days') +a <- as.POSIXct('1996-02-27 18:00:00', tz = 'UTC') +b <- as.POSIXct('1996-03-01 18:00:00', tz = 'UTC') +y2 <- seq(a, b, by = 'days') +y2 <- y2[-3] # remove 28 Feb +time_array <- array(c(y1, y2), dim = c(time = 3, file_date = 2)) +time_array <- as.POSIXct(time_array, origin = '1970-01-01', tz = 'UTC') +suppressWarnings( +data <- Start(dat = "/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$file_date$.nc", + var = "tas", + file_date = paste0(1994:1995, '1101'), #1996 is leap year + time = time_array, #[time = 3, file_date = 2] + latitude = indices(1), longitude = indices(1), ensemble = indices(1), + return_vars = list(latitude = NULL, longitude = NULL, time = 'file_date'), + retrieve = TRUE) +) +dates <- attr(data, 'Variables')$common$time + +expect_equal( +dim(dates), +c(file_date = 2, time = 3) +) +expect_equal( +dim(drop(data)), +dim(dates) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +as.vector(aperm(dates, 2:1)), +as.vector(time_array) +) + +suppressWarnings( +dataF <- Start(dat = "/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$file_date$.nc", + var = "tas", + file_date = paste0(1994:1995, '1101'), #1996 is leap year + time = time_array, #[time = 3, file_date = 2] + latitude = indices(1), longitude = indices(1), ensemble = indices(1), + return_vars = list(latitude = NULL, longitude = NULL, time = 'file_date'), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + + +}) + diff --git a/tests/testthat/test-Start-split-merge.R b/tests/testthat/test-Start-split-merge.R index fe686dc619c7aae495a6bd2815a35aefe1a4fd1a..8793296fd06001424a32a0a8369803a0a1831243 100644 --- a/tests/testthat/test-Start-split-merge.R +++ b/tests/testthat/test-Start-split-merge.R @@ -176,10 +176,10 @@ c(dat = 1, var = 1, smonth = 2, syear = 2, time = 1, latitude = 18, longitude = ) expect_equal( dim(attr(obs, 'Variables')$common$time), -c(file_date = 4, time = 1) +c(smonth = 2, syear = 2, time = 1) ) expect_equal( -attr(obs, 'Variables')$common$time[1, 1], +attr(obs, 'Variables')$common$time[1, 1, 1], as.POSIXct('2013-11-15', tz = 'UTC') ) diff --git a/tests/testthat/test-Start-time_unit.R b/tests/testthat/test-Start-time_unit.R new file mode 100644 index 0000000000000000000000000000000000000000..3aa193042caec24d0065c1eb60d987a3fb96aa0a --- /dev/null +++ b/tests/testthat/test-Start-time_unit.R @@ -0,0 +1,91 @@ +context("To detect the variable with time format and adjust the units") + +test_that("1. The data has units like time", { + + +suppressWarnings( +FD <- Start(dat = '/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var$_MON.nc', + var = 'FD', # units: days + time = indices(1), + longitude = indices(1), + latitude = indices(1), + num_procs = 1, + return_vars = list(time = NULL), + retrieve = TRUE) +) +suppressWarnings( +FD2 <- Start(dat = '/esarchive/obs/ukmo/hadex3/original_files/1961-90/HadEX3_$var$_MON.nc', + var = 'FD', # units: days + time = indices(1), + longitude = indices(1), + latitude = indices(1), + num_procs = 1, +# return_vars = list(time = NULL), + retrieve = TRUE) +) +expect_equal( +attr(FD, 'Variables')$common$FD$units, +'days' +) +expect_equal( +attr(FD2, 'Variables')$common$FD$units, +'days' +) + + +}) + +test_that("2. The metadata variable name is not time", { +# VITIGEOOS + +vari <- "rsds" +anlgs <- paste0("/esarchive/oper/VITIGEOSS","/output/cfsv2/weekly_mean/", + "$var$/$var$-vitigeoss-cat","_1999-2018_", "$file_date$.nc") + +file_date_array <- array(dim = c(sweek = 2, sday = 3)) +file_date_array[, 1] <- c(paste0('04', c('04', '07'))) +file_date_array[, 2] <- c(paste0('04', c('07', '11'))) +file_date_array[, 3] <- c(paste0('04', c('11', '14'))) + +suppressWarnings( + hcst <- Start(dat = anlgs, + var = vari, + latitude = indices(1), #'all', + longitude= indices(1), #'all', + member= indices(1), #'all', + time = 'all', # inner dim!! + syear = 'all', #inner dim!! + file_date = file_date_array, + split_multiselected_dims = TRUE, + retrieve = T, + return_vars = list(leadtimes = 'file_date'), + synonims = list(longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude'), + syear = c('sdate','syear'), + member = c('ensemble','member'))) +) +time_attr <- attr(hcst, 'Variables')$common$leadtimes + +expect_equal( +dim(time_attr), +c(sweek = 2, sday = 3, syear = 20, time = 4) +) +expect_equal( +time_attr[,1,1,1], +as.POSIXct(c("1999-04-08","1999-04-11"), tz = "UTC") +) +expect_equal( +time_attr[2,,1,1], +as.POSIXct(c("1999-04-11","1999-04-15", "1999-04-18"), tz = "UTC") +) +expect_equal( +time_attr[1,1,20,], +as.POSIXct(c("2018-04-08", "2018-04-15", "2018-04-22 UTC", "2018-04-29 UTC"), tz = "UTC") +) + +}) + + +#test_that("3. Time dimension is implicit", { +# See test-Start-implicit_inner_dim.R +#}) diff --git a/tests/testthat/test-Start-two_dats.R b/tests/testthat/test-Start-two_dats.R index 4fa8642e6eb238363912ad0c593a285e6bab7559..ff83441a05299f9db2b804a0d9f8234ae1ecaa12 100644 --- a/tests/testthat/test-Start-two_dats.R +++ b/tests/testthat/test-Start-two_dats.R @@ -19,7 +19,7 @@ data <- Start(dataset = list(list(path = path_tas), list(path = path_tos)), lon = values(list(150, 170)), lon_reorder = CircularSort(0, 360), fyear = 'all', - member = indices(1), + member = 'r10i1p1f1', #indices(1), fyear_depends = 'sdate', time_across = 'fyear', merge_across_dims = TRUE,