diff --git a/R/NcDataReader.R b/R/NcDataReader.R index fb61ef8e8171f04b3b893d0136a12843cbbed2a5..95fd1f1f46d5e069006cc2308ddeef7992ffecba 100644 --- a/R/NcDataReader.R +++ b/R/NcDataReader.R @@ -183,120 +183,122 @@ NcDataReader <- function(file_path = NULL, file_object = NULL, } }) - if (length(names(attr(result, 'variables'))) == 1 & all(names(attr(result, 'variables')) == 'time')) { - var_name <- names(attr(result, 'variables')) - units <- attr(result, 'variables')[[var_name]][['units']] + if (length(names(attr(result, 'variables'))) == 1) { + # The 1st condition is for implicit time dim (if time length = 1, it is allowed to not be defined in Start call. Therefore, it is not in the list of synonims) + if (names(attr(result, 'variables')) == 'time' | 'time' %in% synonims[[names(attr(result, 'variables'))]]) { + var_name <- names(attr(result, 'variables')) + units <- attr(result, 'variables')[[var_name]][['units']] - if (units %in% c('seconds', 'minutes', 'hours', 'days', 'weeks', 'months', 'years')) { - if (units == 'seconds') { -# units <- 'secs' - } else if (units == 'minutes') { -# units <- 'mins' - result <- result * 60 # min to sec - } - result[] <- paste(result[], units) - - } else if (grepl(' since ', units)) { - # Find the calendar - calendar <- attr(result, 'variables')[[var_name]]$calendar - if (calendar == 'standard') calendar <- 'gregorian' + if (units %in% c('seconds', 'minutes', 'hours', 'days', 'weeks', 'months', 'years')) { + if (units == 'seconds') { +# units <- 'secs' + } else if (units == 'minutes') { +# units <- 'mins' + result <- result * 60 # min to sec + } + result[] <- paste(result[], units) - parts <- strsplit(units, ' since ')[[1]] - units <- parts[1] + } else if (grepl(' since ', units)) { + # Find the calendar + calendar <- attr(result, 'variables')[[var_name]]$calendar + if (calendar == 'standard') calendar <- 'gregorian' - if (units %in% c('second', 'seconds')) { -# units <- 'secs' - } else if (units %in% c('minute', 'minutes')) { -# units <- 'mins' - result <- result * 60 # min to sec - } else if (units %in% c('hour', 'hours')) { - result <- result * 60 * 60 # hour to sec - } else if (units %in% c('day', 'days')) { -# units <- 'days' - result <- result * 24 * 60 * 60 # day to sec - } else if (units %in% c('month', 'months')) { - # define day in each month - leap_month_day <- c(31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) - no_leap_month_day <- c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) - # Origin year and month - ori_year <- as.numeric(substr(parts[2], 1, 4)) - ori_month <- as.numeric(substr(parts[2], 6, 7)) - if (is.na(ori_month)) { - ori_month <- as.numeric(substr(parts[2], 6, 6)) - } - if (!is.numeric(ori_year) | !is.numeric(ori_month)) { - stop(paste0("The time unit attribute format is not 'YYYY-MM-DD' or 'YYYY-M-D'. ", - "Check the file or contact the maintainer.")) - } + parts <- strsplit(units, ' since ')[[1]] + units <- parts[1] - if (calendar == 'gregorian') { - # Find how many years + months - yr_num <- floor(result / 12) - month_left <- result - yr_num * 12 - # Find the leap years we care - if (ori_month <= 2) { - leap_num <- length(which(sapply(ori_year:(ori_year + yr_num - 1), s2dv::LeapYear))) - } else { - leap_num <- length(which(sapply((ori_year + 1):(ori_year + yr_num), s2dv::LeapYear))) + if (units %in% c('second', 'seconds')) { +# units <- 'secs' + } else if (units %in% c('minute', 'minutes')) { +# units <- 'mins' + result <- result * 60 # min to sec + } else if (units %in% c('hour', 'hours')) { + result <- result * 60 * 60 # hour to sec + } else if (units %in% c('day', 'days')) { +# units <- 'days' + result <- result * 24 * 60 * 60 # day to sec + } else if (units %in% c('month', 'months')) { + # define day in each month + leap_month_day <- c(31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) + no_leap_month_day <- c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) + # Origin year and month + ori_year <- as.numeric(substr(parts[2], 1, 4)) + ori_month <- as.numeric(substr(parts[2], 6, 7)) + if (is.na(ori_month)) { + ori_month <- as.numeric(substr(parts[2], 6, 6)) + } + if (!is.numeric(ori_year) | !is.numeric(ori_month)) { + stop(paste0("The time unit attribute format is not 'YYYY-MM-DD' or 'YYYY-M-D'. ", + "Check the file or contact the maintainer.")) } - total_days <- leap_num * 366 + (yr_num - leap_num) * 365 # not include month_left yet + if (calendar == 'gregorian') { + # Find how many years + months + yr_num <- floor(result / 12) + month_left <- result - yr_num * 12 + # Find the leap years we care + if (ori_month <= 2) { + leap_num <- length(which(sapply(ori_year:(ori_year + yr_num - 1), s2dv::LeapYear))) + } else { + leap_num <- length(which(sapply((ori_year + 1):(ori_year + yr_num), s2dv::LeapYear))) + } + total_days <- leap_num * 366 + (yr_num - leap_num) * 365 # not include month_left yet - if (month_left != 0) { - if ((ori_month + month_left) <= 12) { # the last month is still in the same last yr - # Is the last year a leap year? - last_leap <- s2dv::LeapYear(ori_year + yr_num) - if (last_leap) { - total_days <- total_days + sum(leap_month_day[ori_month:(ori_month + month_left - 1)]) - } else { - total_days <- total_days + sum(no_leap_month_day[ori_month:(ori_month + month_left - 1)]) - } - } else { # the last month ends in the next yr - if (ori_month == 2) { # e.g., 2005-02-16 + 11mth = 2006-01-16 - last_leap <- s2dv::LeapYear(ori_year + yr_num) # still consider 2005 + if (month_left != 0) { + if ((ori_month + month_left) <= 12) { # the last month is still in the same last yr + # Is the last year a leap year? + last_leap <- s2dv::LeapYear(ori_year + yr_num) if (last_leap) { - total_days <- total_days + sum(leap_month_day[2:12]) + total_days <- total_days + sum(leap_month_day[ori_month:(ori_month + month_left - 1)]) } else { - total_days <- total_days + sum(no_leap_month_day[2:12]) + total_days <- total_days + sum(no_leap_month_day[ori_month:(ori_month + month_left - 1)]) } - } else { # e.g., 2005-04-16 + 11mth = 2006-03-16 - last_leap <- s2dv::LeapYear(ori_year + yr_num + 1) - needed_month <- c(ori_month:12, 1:(ori_month + month_left - 12 - 1)) - if (last_leap) { - total_days <- total_days + sum(leap_month_day[needed_month]) - } else { - total_days <- total_days + sum(no_leap_month_day[needed_month]) + } else { # the last month ends in the next yr + if (ori_month == 2) { # e.g., 2005-02-16 + 11mth = 2006-01-16 + last_leap <- s2dv::LeapYear(ori_year + yr_num) # still consider 2005 + if (last_leap) { + total_days <- total_days + sum(leap_month_day[2:12]) + } else { + total_days <- total_days + sum(no_leap_month_day[2:12]) + } + } else { # e.g., 2005-04-16 + 11mth = 2006-03-16 + last_leap <- s2dv::LeapYear(ori_year + yr_num + 1) + needed_month <- c(ori_month:12, 1:(ori_month + month_left - 12 - 1)) + if (last_leap) { + total_days <- total_days + sum(leap_month_day[needed_month]) + } else { + total_days <- total_days + sum(no_leap_month_day[needed_month]) + } } } } - } - result <- total_days * 24 * 60 * 60 # day to sec - } else if (calendar %in% c('365_day',' 365', 'noleap')) { - yr_num <- floor(result / 12) - month_left <- result - yr_num * 12 - total_days <- 365 * yr_num + sum(no_leap_month_day[ori_month:(month_left - 1)]) - result <- total_days * 24 * 60 * 60 # day to sec + result <- total_days * 24 * 60 * 60 # day to sec + } else if (calendar %in% c('365_day',' 365', 'noleap')) { + yr_num <- floor(result / 12) + month_left <- result - yr_num * 12 + total_days <- 365 * yr_num + sum(no_leap_month_day[ori_month:(month_left - 1)]) + result <- total_days * 24 * 60 * 60 # day to sec - } else if (calendar %in% c('360_day', '360')) { - result <- result * 30 * 24 * 60 * 60 # day to sec + } else if (calendar %in% c('360_day', '360')) { + result <- result * 30 * 24 * 60 * 60 # day to sec - } else { #old code. The calendar is not in any of the above. - result <- result * 30.5 - result <- result * 24 * 60 * 60 # day to sec + } else { #old code. The calendar is not in any of the above. + result <- result * 30.5 + result <- result * 24 * 60 * 60 # day to sec + } } - } - new_array <- PCICt::as.PCICt(result, cal = calendar, origin = parts[2])[] - new_array <- suppressWarnings(PCICt::as.POSIXct.PCICt(new_array, tz = "UTC")) + new_array <- PCICt::as.PCICt(result, cal = calendar, origin = parts[2])[] + new_array <- suppressWarnings(PCICt::as.POSIXct.PCICt(new_array, tz = "UTC")) - #new_array <- seq(as.POSIXct(parts[2]), - # length = max(result, na.rm = TRUE) + 1, - # by = units)[result[] + 1] - dim(new_array) <- dim(result) - attr(new_array, 'variables') <- attr(result, 'variables') - result <- new_array + #new_array <- seq(as.POSIXct(parts[2]), + # length = max(result, na.rm = TRUE) + 1, + # by = units)[result[] + 1] + dim(new_array) <- dim(result) + attr(new_array, 'variables') <- attr(result, 'variables') + result <- new_array + } } - } + } } if (close) { diff --git a/R/Start.R b/R/Start.R index d11c669daeec82be745586fb321c789ec8e03376..6fc05ea2d3d8d8c46437adf229040123db1e0f24 100644 --- a/R/Start.R +++ b/R/Start.R @@ -1745,11 +1745,23 @@ Start <- function(..., # dim = indices/selectors, if (inner_dim %in% inner_dims_across_files | is.character(file_dim_as_selector_array_dim)) { #(2) or (1) # inner_dim is not in return_vars or is NULL - if (((!inner_dim %in% names(common_return_vars)) & (!inner_dim %in% names(return_vars))) | - (inner_dim %in% names(common_return_vars) & is.null(common_return_vars[[inner_dim]]))) { - common_return_vars[[inner_dim]] <- correct_return_vars( - inner_dim, inner_dims_across_files, - found_pattern_dim, file_dim_as_selector_array_dim) + need_correct <- FALSE + if (((!inner_dim %in% names(common_return_vars)) & + (!inner_dim %in% names(return_vars))) | + (inner_dim %in% names(common_return_vars) & + is.null(common_return_vars[[inner_dim]]))) { + need_correct <- TRUE + } else if (inner_dim %in% names(common_return_vars) & + !is.null(names(inner_dims_across_files))) { + if (!names(inner_dims_across_files) %in% common_return_vars[[inner_dim]]) { + need_correct <- TRUE + } + } + if (need_correct) { + common_return_vars[[inner_dim]] <- + c(common_return_vars[[inner_dim]], + correct_return_vars(inner_dim, inner_dims_across_files, + found_pattern_dim, file_dim_as_selector_array_dim)) } } } @@ -3462,8 +3474,60 @@ Start <- function(..., # dim = indices/selectors, common_vars_to_crop[[common_var_to_crop]] <- Subset(transformed_var_with_selectors, inner_dim, crop_indices) } - } else { #old code - common_vars_to_crop[[common_var_to_crop]] <- Subset(common_vars_to_crop[[common_var_to_crop]], inner_dim, crop_indices) + } else { + if (!is.null(file_dim)) { #merge_across_dims, file_dims is the depended file dim + if (file_dim %in% names(dim(common_vars_to_crop[[common_var_to_crop]]))) { + tmp <- common_vars_to_crop[[common_var_to_crop]] + tmp_attributes <- attributes(common_vars_to_crop[[common_var_to_crop]]) + dim_extra_ind <- which(!names(dim(tmp)) %in% c(file_dim, inner_dim)) + if (!identical(dim_extra_ind, integer(0))) { + tmp_list <- asplit(tmp, dim_extra_ind) + dim_file_ind <- which(names(dim(tmp_list[[1]])) == file_dim) + tmp_list <- lapply(tmp_list, asplit, dim_file_ind) + } else { # only file_dim and inner_dim + dim_file_ind <- which(names(dim(tmp)) == file_dim) + tmp_list <- asplit(tmp, dim_file_ind) + # Add another layer to be consistent with the first case above + tmp_list <- list(tmp_list) + } + max_fri_length <- max(sapply(fri, length)) + for (i_extra_dim in 1:length(tmp_list)) { + for (i_fri in 1:length(fri)) { + tmp_list[[i_extra_dim]][[i_fri]] <- + tmp_list[[i_extra_dim]][[i_fri]][fri[[i_fri]]] + + if (length(tmp_list[[i_extra_dim]][[i_fri]]) != max_fri_length) { + tmp_list[[i_extra_dim]][[i_fri]] <- + c(tmp_list[[i_extra_dim]][[i_fri]], rep(NA, max_fri_length - length(tmp_list[[i_extra_dim]][[i_fri]]))) + } + } + } + # Change list back to array + tmp_new_dim <- c(max_fri_length, dim(tmp)[file_dim], dim(tmp)[dim_extra_ind]) + names(tmp_new_dim) <- c(inner_dim, file_dim, names(dim(tmp))[dim_extra_ind]) + common_vars_to_crop[[common_var_to_crop]] <- + array(unlist(tmp_list), dim = tmp_new_dim) + # Reorder back + common_vars_to_crop[[common_var_to_crop]] <- + aperm(common_vars_to_crop[[common_var_to_crop]], match(names(dim(tmp)), names(tmp_new_dim))) + # Put attributes back + tmp <- which(!names(tmp_attributes) %in% names(attributes(common_vars_to_crop[[common_var_to_crop]]))) + attributes(common_vars_to_crop[[common_var_to_crop]]) <- + c(attributes(common_vars_to_crop[[common_var_to_crop]]), + tmp_attributes[tmp]) + + if ('time' %in% synonims[[common_var_to_crop]]) { + # Convert number back to time + common_vars_to_crop[[common_var_to_crop]] <- + as.POSIXct(common_vars_to_crop[[common_var_to_crop]], + origin = "1970-01-01", tz = 'UTC') + } + } + } else { # old code + + common_vars_to_crop[[common_var_to_crop]] <- Subset(common_vars_to_crop[[common_var_to_crop]], inner_dim, crop_indices) + } + } } @@ -3668,6 +3732,21 @@ Start <- function(..., # dim = indices/selectors, all_split_dims[[1]] <- tmp[[2]] } } + + if (merge_across_dims) { + # Save dim and attributes for later use + dims_of_merge_dim <- dim(picked_common_vars[[across_inner_dim]]) + tmp_attr <- attr(picked_common_vars[[across_inner_dim]], 'variables') + # Find final_dim_fake for metadata and put it in an array + if (!split_multiselected_dims) { + final_dims_fake_merge_dim <- final_dims_fake[names(final_dims_fake) %in% names(dims_of_merge_dim)] + } else { + final_dims_fake_merge_dim <- final_dims_fake[names(final_dims_fake) %in% names(all_split_dims[[across_inner_dim]])] + } + } else if (split_multiselected_dims) { + tmp_attr <- attr(picked_common_vars[[names(all_split_dims)]], 'variables') + final_dims_fake_merge_dim <- all_split_dims[[1]] + } # The following several lines will only run if retrieve = TRUE if (retrieve) { @@ -3782,45 +3861,101 @@ Start <- function(..., # dim = indices/selectors, } #print("P") - # NOTE: If merge_across_dims = TRUE, there might be additional NAs due to - # unequal inner_dim ('time') length across file_dim ('file_date'). - # If merge_across_dims_narm = TRUE, add additional lines to remove these NAs. + # If merge_across_dims = TRUE, there might be additional NAs due to unequal + # inner_dim ('time') length across file_dim ('file_date'). + # If merge_across_dims_narm = TRUE, add additional lines to remove these NAs. # TODO: Now it assumes that only one '_across'. Add a for loop for more-than-one case. if (merge_across_dims & (split_multiselected_dims | merge_across_dims_narm)) { if (!merge_across_dims_narm) { data_array_tmp <- array(bigmemory::as.matrix(data_array), dim = final_dims) + tmp <- match(names(final_dims), names(dims_of_merge_dim)) + if (any(diff(tmp[!is.na(tmp)]) < 0)) { #need to reorder + picked_common_vars[[across_inner_dim]] <- .aperm2(picked_common_vars[[across_inner_dim]], tmp[!is.na(tmp)]) + } + metadata_tmp <- picked_common_vars[[across_inner_dim]] } else { - data_array_tmp <- remove_additional_na_from_merge( - inner_dims_across_files, final_dims, across_inner_dim, - length_inner_across_dim, data_array) + tmp <- remove_additional_na_from_merge( + data_array = bigmemory::as.matrix(data_array), + merge_dim_metadata = picked_common_vars[[across_inner_dim]], + inner_dims_across_files, final_dims, + length_inner_across_dim) + data_array_tmp <- tmp$data_array + metadata_tmp <- tmp$merge_dim_metadata } if (length(data_array_tmp) != prod(final_dims_fake)) { stop(paste0("After reshaping, the data do not fit into the expected output dimension. ", "Check if the reshaping parameters are used correctly.")) } - + if (length(metadata_tmp) != prod(final_dims_fake_merge_dim)) { + stop(paste0("After reshaping, the metadata do not fit into the expected output dimension. ", + "Check if the reshaping parameters are used correctly or contact support.")) + } + #NOTE: When one file contains values for dicrete dimensions, rearrange the # chunks (i.e., work_piece) is necessary. if (split_multiselected_dims) { - data_array_tmp <- rebuild_array_merge_split( - data_array_tmp, indices_chunk, all_split_dims, final_dims_fake, - across_inner_dim, length_inner_across_dim) + tmp <- rebuild_array_merge_split( + data_array = data_array_tmp, metadata = metadata_tmp, indices_chunk, + all_split_dims, final_dims_fake, across_inner_dim, length_inner_across_dim) + data_array_tmp <- tmp$data_array + metadata_tmp <- tmp$metadata } - + data_array <- array(data_array_tmp, dim = final_dims_fake) - + metadata_tmp <- array(metadata_tmp, dim = final_dims_fake_merge_dim) + + # If split_multiselected_dims + merge_across_dims, the dimension order may change above. + # To get the user-required dim order, we need to reorder the array again. + if (split_multiselected_dims) { + if (inner_dim_pos_in_split_dims != 1) { + correct_order <- match(names(final_dims_fake_output), names(final_dims_fake)) + data_array <- .aperm2(data_array, correct_order) + correct_order_metadata <- match(names(final_dims_fake_output), names(all_split_dims[[across_inner_dim]])) + metadata_tmp <- .aperm2(metadata_tmp, correct_order_metadata[!is.na(correct_order_metadata)]) + } + } + # Convert numeric back to dates + if ('time' %in% synonims[[across_inner_dim]]) { + metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') + } + + picked_common_vars[[across_inner_dim]] <- metadata_tmp + attr(picked_common_vars[[across_inner_dim]], 'variables') <- tmp_attr + } else { # ! (merge_across_dims + split_multiselected_dims) (old version) data_array <- array(bigmemory::as.matrix(data_array), dim = final_dims_fake) - } - - # NOTE: If split_multiselected_dims + merge_across_dims, the dimension order may change above. - # To get the user-required dim order, we need to reorder the array again. - if (split_multiselected_dims & merge_across_dims) { - if (inner_dim_pos_in_split_dims != 1) { - correct_order <- match(names(final_dims_fake_output), names(final_dims_fake)) - data_array <- .aperm2(data_array, correct_order) + if (merge_across_dims) { + # merge_across_dims = TRUE but (merge_across_dims_narm = F & split_multiselected_dims = F) + + inner_dim_pos <- which(names(dims_of_merge_dim) == inner_dims_across_files) + file_dim_pos <- which(names(dims_of_merge_dim) == names(inner_dims_across_files)) + if (file_dim_pos < inner_dim_pos) { #need to reorder + tmp <- seq(1, length(dims_of_merge_dim)) + tmp[inner_dim_pos] <- file_dim_pos + tmp[file_dim_pos] <- inner_dim_pos + picked_common_vars[[across_inner_dim]] <- .aperm2(picked_common_vars[[across_inner_dim]], tmp) + } + metadata_tmp <- array(picked_common_vars[[across_inner_dim]], dim = final_dims_fake_merge_dim) + # Convert numeric back to dates + if ('time' %in% synonims[[across_inner_dim]]) { + metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') + } + picked_common_vars[[across_inner_dim]] <- metadata_tmp + attr(picked_common_vars[[across_inner_dim]], 'variables') <- tmp_attr + } + if (split_multiselected_dims) { + if (names(all_split_dims) %in% names(picked_common_vars)) { + metadata_tmp <- array(picked_common_vars[[names(all_split_dims)]], dim = final_dims_fake_merge_dim) + # Convert numeric back to dates + if ('time' %in% synonims[[names(all_split_dims)]]) { + metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') + } + picked_common_vars[[names(all_split_dims)]] <- metadata_tmp + attr(picked_common_vars[[names(all_split_dims)]], 'variables') <- tmp_attr + } } + } gc() @@ -3870,7 +4005,89 @@ Start <- function(..., # dim = indices/selectors, } } # End if (retrieve) - + else { # if retrieve = FALSE, metadata still needs to reshape + + if (merge_across_dims & (split_multiselected_dims | merge_across_dims_narm)) { + if (!merge_across_dims_narm) { + tmp <- match(names(final_dims), names(dims_of_merge_dim)) + if (any(diff(tmp[!is.na(tmp)]) < 0)) { #need to reorder + picked_common_vars[[across_inner_dim]] <- .aperm2(picked_common_vars[[across_inner_dim]], tmp[!is.na(tmp)]) + } + metadata_tmp <- picked_common_vars[[across_inner_dim]] + } else { + tmp <- remove_additional_na_from_merge( + data_array = NULL, + merge_dim_metadata = picked_common_vars[[across_inner_dim]], + inner_dims_across_files, final_dims, + length_inner_across_dim) + metadata_tmp <- tmp$merge_dim_metadata + } + + if (length(metadata_tmp) != prod(final_dims_fake_merge_dim)) { + stop(paste0("After reshaping, the metadata do not fit into the expected output dimension. ", + "Check if the reshaping parameters are used correctly or contact support.")) + } + + #NOTE: When one file contains values for dicrete dimensions, rearrange the + # chunks (i.e., work_piece) is necessary. + if (split_multiselected_dims) { + tmp <- rebuild_array_merge_split( + data_array = NULL, metadata = metadata_tmp, indices_chunk, all_split_dims, final_dims_fake, across_inner_dim, length_inner_across_dim) + metadata_tmp <- tmp$metadata + } + metadata_tmp <- array(metadata_tmp, dim = final_dims_fake_merge_dim) + + # If split_multiselected_dims + merge_across_dims, the dimension order may change above. + # To get the user-required dim order, we need to reorder the array again. + if (split_multiselected_dims) { + if (inner_dim_pos_in_split_dims != 1) { + correct_order <- match(names(final_dims_fake_output), names(final_dims_fake)) +# data_array <- .aperm2(data_array, correct_order) + correct_order_metadata <- match(names(final_dims_fake_output), names(all_split_dims[[across_inner_dim]])) + metadata_tmp <- .aperm2(metadata_tmp, correct_order_metadata[!is.na(correct_order_metadata)]) + } + } + # Convert numeric back to dates + if ('time' %in% synonims[[across_inner_dim]]) { + metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') + } + picked_common_vars[[across_inner_dim]] <- metadata_tmp + attr(picked_common_vars[[across_inner_dim]], 'variables') <- tmp_attr + } else { # ! (merge_across_dims + split_multiselected_dims) (old version) + if (merge_across_dims) { + # merge_across_dims = TRUE but (merge_across_dims_narm = F & split_multiselected_dims = F) + + inner_dim_pos <- which(names(dims_of_merge_dim) == inner_dims_across_files) + file_dim_pos <- which(names(dims_of_merge_dim) == names(inner_dims_across_files)) + if (file_dim_pos < inner_dim_pos) { #need to reorder + tmp <- seq(1, length(dims_of_merge_dim)) + tmp[inner_dim_pos] <- file_dim_pos + tmp[file_dim_pos] <- inner_dim_pos + picked_common_vars[[across_inner_dim]] <- .aperm2(picked_common_vars[[across_inner_dim]], tmp) + } + metadata_tmp <- array(picked_common_vars[[across_inner_dim]], dim = final_dims_fake_merge_dim) + # Convert numeric back to dates + if ('time' %in% synonims[[across_inner_dim]]) { + metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') + } + picked_common_vars[[across_inner_dim]] <- metadata_tmp + attr(picked_common_vars[[across_inner_dim]], 'variables') <- tmp_attr + } + if (split_multiselected_dims) { + if (names(all_split_dims) %in% names(picked_common_vars)) { + metadata_tmp <- array(picked_common_vars[[names(all_split_dims)]], dim = final_dims_fake_merge_dim) + # Convert numeric back to dates + if ('time' %in% synonims[[names(all_split_dims)]]) { + metadata_tmp <- as.POSIXct(metadata_tmp, origin = "1970-01-01", tz = 'UTC') + } + picked_common_vars[[names(all_split_dims)]] <- metadata_tmp + attr(picked_common_vars[[names(all_split_dims)]], 'variables') <- tmp_attr + } + } + } + + } + # Change final_dims_fake back because retrieve = FALSE will use it for attributes later if (exists("final_dims_fake_output")) { final_dims_fake <- final_dims_fake_output diff --git a/R/zzz.R b/R/zzz.R index 0130724599137903e4c47d9fea52504dcd36d2fa..0067d6d88022615a6c8583fe7d22e79e367aeade 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -446,10 +446,10 @@ correct_return_vars <- function(inner_dim, inner_dims_across_files, found_patter corrected_value <- file_dim_name } } - .warning(paste0("Found ", inner_dim, " dependency on file diemnsion '", corrected_value, - "', but '", inner_dim, "' is not in return_vars list or is NULL. ", - "To provide the correct metadata, the value of ", inner_dim, - " in 'return_vars' is specified as '", corrected_value, "'.")) + .warning(paste0("Found '", inner_dim, "' dependency on file dimension '", corrected_value, + "', but '", inner_dim, "' is not in return_vars list or does not include '", corrected_value, + "'. To provide the correct metadata, '", corrected_value, "' is included under '", inner_dim, + "' in 'return_vars.")) return(corrected_value) } @@ -1028,9 +1028,13 @@ retrieve_progress_message <- function(work_pieces, num_procs, silent) { # If merge_across_dims = TRUE and merge_across_dims_narm = TRUE, remove the additional NAs # due to unequal inner_dim ('time') length across file_dim ('sdate'). -remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims, across_inner_dim, - length_inner_across_dim, data_array) { +remove_additional_na_from_merge <- function(data_array = NULL, merge_dim_metadata = NULL, + inner_dims_across_files, final_dims, length_inner_across_dim) { + # data_array is a vector from bigmemory::as.matrix + # merge_dim_metadata is an array + across_file_dim <- names(inner_dims_across_files) #TODO: more than one? + across_inner_dim <- inner_dims_across_files[[1]] #TODO: more than one? # Get the length of these two dimensions in final_dims length_inner_across_store_dims <- final_dims[across_inner_dim] length_file_across_store_dims <- final_dims[across_file_dim] @@ -1042,37 +1046,69 @@ remove_additional_na_from_merge <- function(inner_dims_across_files, final_dims, for (i in 1:length_file_across_store_dims) { #1:4 logi_array[1:length_inner_across_dim[[i]], i] <- TRUE } - - # First, get the data array with final_dims dimension - data_array_final_dims <- array(bigmemory::as.matrix(data_array), dim = final_dims) - + + if (!is.null(data_array)) { + # First, turn the data vector into array with final_dims + data_array_final_dims <- array(data_array, dim = final_dims) + } + # Change the NA derived from additional spaces to -9999, then remove these -9999 func_remove_blank <- function(data_array, logi_array) { # dim(data_array) = [time, file_date] # dim(logi_array) = [time, file_date] - # Change the blank spaces from NA to -9999 - data_array[which(!logi_array)] <- -9999 + # data_array can be data or metadata; if data, change the blank spaces from + # NA to -9999; if metadata (supposed to be 'time'), change the corresponding + # spaces to -12^10. + if (any(class(data_array) %in% c("POSIXct", "POSIXt"))) { + # change to numeric first + data_array <- array(as.vector(data_array), dim = dim(data_array)) + data_array[which(!logi_array)] <- -12^10 + } else { + data_array[which(!logi_array)] <- -9999 + } return(data_array) } - data_array_final_dims <- multiApply::Apply(data_array_final_dims, - target_dims = c(across_inner_dim, across_file_dim), #c('time', 'file_date') - output_dims = c(across_inner_dim, across_file_dim), - fun = func_remove_blank, - logi_array = logi_array)$output1 - ## reorder back to the correct dim - tmp <- match(names(final_dims), names(dim(data_array_final_dims))) - data_array_final_dims <- .aperm2(data_array_final_dims, tmp) - data_array_tmp <- data_array_final_dims[data_array_final_dims != -9999] # become a vector - - return(data_array_tmp) -} + if (!is.null(data_array)) { + data_array_final_dims <- multiApply::Apply(data_array_final_dims, + target_dims = c(across_inner_dim, across_file_dim), #c('time', 'file_date') + output_dims = c(across_inner_dim, across_file_dim), + fun = func_remove_blank, + logi_array = logi_array)$output1 + } + if (!is.null(merge_dim_metadata)) { + tmp_attr <- attributes(merge_dim_metadata)$variables + merge_dim_metadata <- multiApply::Apply(merge_dim_metadata, + target_dims = c(across_inner_dim, across_file_dim), + output_dims = c(across_inner_dim, across_file_dim), + fun = func_remove_blank, + logi_array = logi_array)$output1 + } + + if (!is.null(data_array)) { + ## reorder back to the correct dim + tmp <- match(names(final_dims), names(dim(data_array_final_dims))) + data_array_final_dims <- .aperm2(data_array_final_dims, tmp) + data_array_tmp <- data_array_final_dims[data_array_final_dims != -9999] # become a vector + } else { + data_array_tmp <- NULL + } + if (!is.null(merge_dim_metadata)) { + # Reorder metadata dim as final dim + tmp <- match(names(final_dims), names(dim(merge_dim_metadata))) + merge_dim_metadata <- aperm(merge_dim_metadata, tmp[!is.na(tmp)]) + merge_dim_metadata <- merge_dim_metadata[merge_dim_metadata != -12^10] + attr(merge_dim_metadata, 'variables') <- tmp_attr + } + + #NOTE: both outputs are vectors. If 'merge_dim_metadata' is actually time, it is just numeric here. + return(list(data_array = data_array_tmp, merge_dim_metadata = merge_dim_metadata)) +} # When merge_across_dims = TRUE and split_multiselected_dims = TRUE, rearrange the chunks # (i.e., work_piece) is necessary if one file contains values for discrete dimensions -rebuild_array_merge_split <- function(data_array_tmp, indices_chunk, all_split_dims, - final_dims_fake, across_inner_dim, length_inner_across_dim) { +rebuild_array_merge_split <- function(data_array = NULL, metadata = NULL, indices_chunk, all_split_dims, final_dims_fake, across_inner_dim, length_inner_across_dim) { # generate the correct order list from indices_chunk final_order_list <- list() i <- 1 @@ -1103,15 +1139,32 @@ rebuild_array_merge_split <- function(data_array_tmp, indices_chunk, all_split_d if (split_dims_pos[length(split_dims_pos)] < length(final_dims_fake)) { new_dims <- c(new_dims, final_dims_fake[(split_dims_pos[length(split_dims_pos)] + 1):length(final_dims_fake)]) } - data_array_no_split <- array(data_array_tmp, dim = new_dims) - # seperate 'time' dim into each work_piece length - data_array_seperate <- list() + + if (!is.null(data_array)) { + data_array_no_split <- array(data_array, dim = new_dims) + # seperate 'time' dim into each work_piece length + data_array_seperate <- list() + array_piece <- list() + } + if (!is.null(metadata)) { + metadata_no_split <- array(metadata, dim = new_dims) + metadata_seperate <- list() + metadata_piece <- list() + } + tmp <- cumsum(unlist(length_inner_across_dim)) tmp <- c(0, tmp) for (i in 1:length(length_inner_across_dim)) { - data_array_seperate[[i]] <- ClimProjDiags::Subset(data_array_no_split, + if (!is.null(data_array)) { + data_array_seperate[[i]] <- ClimProjDiags::Subset(data_array_no_split, + across_inner_dim, + (tmp[i] + 1):tmp[i + 1]) + } + if (!is.null(metadata)) { + metadata_seperate[[i]] <- ClimProjDiags::Subset(metadata_no_split, across_inner_dim, (tmp[i] + 1):tmp[i + 1]) + } } # re-build the array: chunk which_chunk <- as.numeric(names(final_order_list)) @@ -1119,27 +1172,65 @@ rebuild_array_merge_split <- function(data_array_tmp, indices_chunk, all_split_d which_chunk <- sapply(lapply(which_chunk, '==', sort_which_chunk), which) how_many_indices <- unlist(final_order_list) - array_piece <- list() - ind_in_array_seperate <- as.list(rep(1, length(data_array_seperate))) + + if (!is.null(data_array)) { + ind_in_array_seperate <- as.list(rep(1, length(data_array_seperate))) + } else if (!is.null(metadata)) { + ind_in_array_seperate <- as.list(rep(1, length(metadata_seperate))) + } + for (i in 1:length(final_order_list)) { - array_piece[[i]] <- ClimProjDiags::Subset( - data_array_seperate[[which_chunk[i]]], across_inner_dim, - ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1)) + if (!is.null(data_array)) { + array_piece[[i]] <- ClimProjDiags::Subset( + data_array_seperate[[which_chunk[i]]], across_inner_dim, + ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1)) + } + if (!is.null(metadata)) { + metadata_piece[[i]] <- ClimProjDiags::Subset( + metadata_seperate[[which_chunk[i]]], across_inner_dim, + ind_in_array_seperate[[which_chunk[i]]]:(ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] - 1)) + } ind_in_array_seperate[[which_chunk[i]]] <- ind_in_array_seperate[[which_chunk[i]]] + how_many_indices[i] } # re-build the array: paste - data_array_tmp <- array_piece[[1]] - along_pos <- which(names(dim(data_array_tmp)) == across_inner_dim) - if (length(array_piece) > 1) { - for (i in 2:length(array_piece)) { - data_array_tmp <- abind::abind(data_array_tmp, array_piece[[i]], + if (!is.null(data_array)) { + data_array_tmp <- array_piece[[1]] + } else { + data_array_tmp <- NULL + } + if (!is.null(metadata)) { + metadata_tmp <- metadata_piece[[1]] + } else { + metadata_tmp <- NULL + } + + if (!is.null(data_array)) { + along_pos <- which(names(dim(data_array_tmp)) == across_inner_dim) + length_piece <- length(array_piece) + } else if (!is.null(metadata)) { + along_pos <- which(names(dim(metadata_tmp)) == across_inner_dim) + length_piece <- length(metadata_piece) + } + + if (length_piece > 1) { + for (i in 2:length_piece) { + if (!is.null(data_array)) { + data_array_tmp <- abind::abind(data_array_tmp, array_piece[[i]], + along = along_pos) + } + if (!is.null(metadata)) { + metadata_tmp <- abind::abind(metadata_tmp, metadata_piece[[i]], along = along_pos) + } } } - } - - return(data_array_tmp) + } else { + data_array_tmp <- data_array + metadata_tmp <- metadata + } + + return(list(data_array = data_array_tmp, metadata = metadata_tmp)) } @@ -1316,8 +1407,11 @@ generate_picked_var_of_read <- function(var_to_read, var_to_check, array_of_file } else { padding <- array(dim = padding_dims) } + tmp_attr <- attributes(either_picked_vars)$variables either_picked_vars <- .abind2(either_picked_vars, padding, names(full_array_var_dims)[longer_dims_in_full_array]) + attr(either_picked_vars, 'variables') <- tmp_attr + } else { stop("Error while reading the variable '", var_to_read, "' from ", "the file. Found size (", paste(var_dims, collapse = ' x '), diff --git a/tests/testthat/test-Start-calendar.R b/tests/testthat/test-Start-calendar.R index 4f58a8744cfd69504eceed8883c5b7f3b6a1758e..10cf7bf4dc0a99d0292103feb0c65112bd358ac4 100644 --- a/tests/testthat/test-Start-calendar.R +++ b/tests/testthat/test-Start-calendar.R @@ -212,9 +212,13 @@ suppressWarnings( ) expect_equal( - attr(obs, 'Variables')$common$time[1, 1], + attr(obs, 'Variables')$common$time[1], as.POSIXct('2005-05-16 12:00:00', tz = 'UTC') ) +expect_equal( + dim(attr(obs, 'Variables')$common$time), + c(time = 1) +) }) diff --git a/tests/testthat/test-Start-metadata_filedim_dependency.R b/tests/testthat/test-Start-metadata_filedim_dependency.R index a5fa558cb8a76ad5ff6542c5678c2c7187e17226..cfd7dfb1d3559a7b67ed0afdfe8633998412eae2 100644 --- a/tests/testthat/test-Start-metadata_filedim_dependency.R +++ b/tests/testthat/test-Start-metadata_filedim_dependency.R @@ -65,7 +65,7 @@ expect_equal( return_vars = list(time = NULL), retrieve = TRUE) ) - time6 <- attr(test4, 'Variables')$common$time + time6 <- attr(test6, 'Variables')$common$time expect_equal( dim(time6), @@ -101,7 +101,7 @@ expect_equal( return_vars = list(time = NULL), retrieve = TRUE) ) - time6a <- attr(test4, 'Variables')$common$time + time6a <- attr(test6a, 'Variables')$common$time expect_equal( dim(time6a), diff --git a/tests/testthat/test-Start-metadata_reshaping.R b/tests/testthat/test-Start-metadata_reshaping.R new file mode 100644 index 0000000000000000000000000000000000000000..91a793d53572f502442a6e89d519b849ebd0c825 --- /dev/null +++ b/tests/testthat/test-Start-metadata_reshaping.R @@ -0,0 +1,546 @@ +context("Start() metadata reshaping") +# When data is reshaping (e.g., time_across = 'sdate'), the corresponding attribute should be reshaped too. + +test_that("1. time across fyear, fyear depends on sdate", { + +suppressWarnings( +data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960:1961), + time = 62:426, ## Jan to Dec (initialised in Nov) + time_across = 'fyear', + merge_across_dims = TRUE, + fyear = 'all', + fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude')), + return_vars = list(lat = NULL, lon = NULL, + time = c('sdate', 'fyear')), + retrieve = TRUE) +) +dates <- attr(data,'Variables')$common[['time']] + +expect_equal( +dim(dates), +c(sdate = 2, time = 365) +) +expect_equal( +dim(drop(data)), +dim(dates) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +as.vector(dates[1, ]), +as.vector(seq(as.POSIXct('1961-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1961-12-31 12:00:00', tz = 'UTC'), by = 'day')) +) +expect_equal( +as.vector(dates[2, ]), +as.vector(seq(as.POSIXct('1962-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1962-12-31 12:00:00', tz = 'UTC'), by = 'day')) +) + +# retrieve = FALSE +suppressWarnings( +dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960:1961), + time = 62:426, ## Jan to Dec (initialised in Nov) + time_across = 'fyear', + merge_across_dims = TRUE, + fyear = 'all', + fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude')), + return_vars = list(lat = NULL, lon = NULL, + time = c('sdate', 'fyear')), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + +}) + + +test_that("2. time across fyear, only one sdate", { + +suppressWarnings( +data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960), + time = 62:426, ## Jan to Dec (initialised in Nov) + time_across = 'fyear', + merge_across_dims = TRUE, + fyear = 'all', +# fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude')), + return_vars = list(lat = NULL, lon = NULL, + time = c('fyear')), + retrieve = TRUE) +) +dates <- attr(data,'Variables')$common[['time']] + + +expect_equal( +dim(dates), +c(time = 365) +) +expect_equal( +length(data), +length(dates) +) +expect_equal( +as.vector(dates), +as.vector(seq(as.POSIXct('1961-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1961-12-31 12:00:00', tz = 'UTC'), by = 'day')) +) + +#retrieve = FALSE +suppressWarnings( +dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960), + time = 62:426, ## Jan to Dec (initialised in Nov) + time_across = 'fyear', + merge_across_dims = TRUE, + fyear = 'all', +# fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude')), + return_vars = list(lat = NULL, lon = NULL, + time = c('fyear')), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + +}) + +test_that("3. time across fyear, fyear depends on sdate, 1st fyear is empty, 3rd fyear has more indices than 2nd one, 1964 is leap year", { + +suppressWarnings( +data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960:1961), + day = 700:860, ## initialised in Nov + day_across = 'fyear', + merge_across_dims = TRUE, + fyear = 'all', + fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude'), + day = c('day', 'time')), + return_vars = list(lat = NULL, lon = NULL, + day = c('sdate', 'fyear')), + retrieve = TRUE) +) +dates <- attr(data,'Variables')$common[['day']] + +expect_equal( +dim(dates), +c(sdate = 2, day = 161) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +as.vector(dates[1, ]), +as.vector(seq(as.POSIXct('1962-10-01 12:00:00', tz = 'UTC'), as.POSIXct('1963-03-10 12:00:00', tz = 'UTC'), by = 'day')) +) +expect_equal( +as.vector(dates[2, ]), +as.vector(seq(as.POSIXct('1963-10-01 12:00:00', tz = 'UTC'), as.POSIXct('1964-03-09 12:00:00', tz = 'UTC'), by = 'day')) +) + + +suppressWarnings( +dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960:1961), + day = 700:860, ## initialised in Nov + day_across = 'fyear', + merge_across_dims = TRUE, + fyear = 'all', + fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude'), + day = c('day', 'time')), + return_vars = list(lat = NULL, lon = NULL, + day = c('sdate', 'fyear')), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['day']] +expect_equal( +datesF, +dates +) + +}) + + +test_that("4. merge and split time dim", { +datess <- seq(as.POSIXct('1994-05-01', tz = 'UTC'), as.POSIXct('1994-12-31', tz = 'UTC'), by = 'days') +datess <- c(datess[c(1:31, 32:62, 62:92, 93:123, 124:154, 154:184, 185:215, 215:245)]) +datess <- as.POSIXct(array(datess, dim = c(time = 31, sdate = 8)), + origin = '1970-01-01', tz = 'UTC') +dates_file <- sort(unique(gsub('-', '', sapply(as.character(datess), + substr, 1, 7)))) + +suppressWarnings( + data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_$file_date$.nc'), + var = 'tas', + file_date = dates_file, + time = values(datess), #[time = 31, sdate = 8] + latitude = indices(1), + longitude = indices(1), + time_var = 'time', + #because time is assigned by 'values', set the tolerance to avoid too distinct match + time_tolerance = as.difftime(1, units = 'hours'), + #time values are across all the files + time_across = 'file_date', + #combine time and file_date dims + merge_across_dims = TRUE, + #exclude the additional NAs generated by merge_across_dims + merge_across_dims_narm = TRUE, + #split time dim, because it is two-dimensional + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = 'file_date'), + retrieve = TRUE) +) + +dates <- attr(data,'Variables')$common[['time']] + +expect_equal( +dim(dates), +c(time = 31, sdate = 8) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +as.vector(dates), +as.vector(datess) +) + +suppressWarnings( + dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_$file_date$.nc'), + var = 'tas', + file_date = dates_file, + time = values(datess), #[time = 31, sdate = 8] + latitude = indices(1), + longitude = indices(1), + time_var = 'time', + #because time is assigned by 'values', set the tolerance to avoid too distinct match + time_tolerance = as.difftime(1, units = 'hours'), + #time values are across all the files + time_across = 'file_date', + #combine time and file_date dims + merge_across_dims = TRUE, + #exclude the additional NAs generated by merge_across_dims + merge_across_dims_narm = TRUE, + #split time dim, because it is two-dimensional + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = 'file_date'), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + +}) + +test_that("5. test 1 but merge_across_dims_narm = F", { + +suppressWarnings( +data <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960:1961), + time = 62:426, ## Jan to Dec (initialised in Nov) + time_across = 'fyear', + merge_across_dims = TRUE, + merge_across_dims_narm = F, + fyear = 'all', + fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude')), + return_vars = list(lat = NULL, lon = NULL, + time = c('sdate', 'fyear')), + retrieve = TRUE) +) +dates <- attr(data,'Variables')$common[['time']] + +expect_equal( +dim(dates), +c(sdate = 2, time = 608) +) +expect_equal( +dim(drop(data)), +dim(dates) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +as.vector(dates[1, ]), +c(as.vector(seq(as.POSIXct('1961-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1961-12-31 12:00:00', tz = 'UTC'), by = 'day')), rep(NA, 243)) +) +expect_equal( +as.vector(dates[2, ]), +c(as.vector(seq(as.POSIXct('1962-01-01 12:00:00', tz = 'UTC'), as.POSIXct('1962-12-31 12:00:00', tz = 'UTC'), by = 'day')), rep(NA, 243)) +) + +suppressWarnings( +dataF <- Start(dat = '/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/$member$/day/$var$/gr/v20210910/$var$_day_EC-Earth3_dcppA-hindcast_s$sdate$-$member$_gr_$fyear$.nc', + var = 'tasmin', + lat = indices(1), + lon = indices(1), + sdate = paste0(1960:1961), + time = 62:426, ## Jan to Dec (initialised in Nov) + time_across = 'fyear', + merge_across_dims = TRUE, + merge_across_dims_narm = F, + fyear = 'all', + fyear_depends = 'sdate', + member = 'r1i4p1f1', + synonims = list(lat = c('lat','latitude'), + lon = c('lon','longitude')), + return_vars = list(lat = NULL, lon = NULL, + time = c('sdate', 'fyear')), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + +}) + +test_that("6. split dim only", { + +datess <- seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-07-14', tz = 'UTC'), by = 'days') +datess <- as.POSIXct(array(datess, dim = c(time = 7, week = 2)), + origin = '1970-01-01', tz = 'UTC') +suppressWarnings( +data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_199407.nc'), + var = 'tas', +# file_date = '199407', + time = values(datess), #[time = 7, week = 2] + latitude = indices(1), + longitude = indices(1), + time_var = 'time', + time_tolerance = as.difftime(1, units = 'hours'), +# time_across = 'file_date', +# merge_across_dims = TRUE, + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = NULL), #'file_date'), + retrieve = TRUE) +) +dates <- attr(data,'Variables')$common[['time']] + + +expect_equal( +dim(dates), +c(time = 7, week = 2) +) +expect_equal( +dim(drop(data)), +dim(dates) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +as.vector(dates[, ]), +as.vector(seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-07-14', tz = 'UTC'), by = 'day')) +) + +suppressWarnings( +dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_199407.nc'), + var = 'tas', +# file_date = '199407', + time = values(datess), #[time = 7, week = 2] + latitude = indices(1), + longitude = indices(1), + time_var = 'time', + time_tolerance = as.difftime(1, units = 'hours'), +# time_across = 'file_date', +# merge_across_dims = TRUE, + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = NULL), #'file_date'), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + +}) + +test_that("7. split dim + merge + merge_narm = F", { + +datess <- seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-08-31', tz = 'UTC'), by = 'days') +datess <- as.POSIXct(array(datess, dim = c(time = 31, month = 2)), + origin = '1970-01-01', tz = 'UTC') + +suppressWarnings( +data <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_$file_date$.nc'), + var = 'tas', + file_date = c('199407', '199408'), + time = values(datess), #[time = 31, month = 2] + latitude = indices(1), + longitude = indices(1), + time_var = 'time', + time_tolerance = as.difftime(1, units = 'hours'), + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = F, + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = 'file_date'), + retrieve = TRUE) +) +dates <- attr(data,'Variables')$common[['time']] + + +expect_equal( +dim(dates), +c(time = 31, month = 2) +) +expect_equal( +dim(drop(data)), +dim(dates) +) +expect_equal( +length(attributes(dates)), +4 +) +expect_equal( +all(names(attributes(dates)) %in% c('variables', 'dim', 'class', 'tzone')), +TRUE +) +expect_equal( +class(dates), +c("POSIXct", "POSIXt") +) +expect_equal( +as.vector(dates[, ]), +as.vector(seq(as.POSIXct('1994-07-01', tz = 'UTC'), as.POSIXct('1994-08-31', tz = 'UTC'), by = 'day')) +) + +suppressWarnings( +dataF <- Start(dat = paste0('/esarchive/recon/ecmwf/erainterim/6hourly/', + '$var$/$var$_$file_date$.nc'), + var = 'tas', + file_date = c('199407', '199408'), + time = values(datess), #[time = 31, month = 2] + latitude = indices(1), + longitude = indices(1), + time_var = 'time', + time_tolerance = as.difftime(1, units = 'hours'), + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = F, + split_multiselected_dims = TRUE, + return_vars = list(latitude = NULL, + longitude = NULL, + time = 'file_date'), + retrieve = FALSE) +) +datesF <- attr(dataF,'Variables')$common[['time']] +expect_equal( +datesF, +dates +) + +}) + diff --git a/tests/testthat/test-Start-two_dats.R b/tests/testthat/test-Start-two_dats.R index 4fa8642e6eb238363912ad0c593a285e6bab7559..ff83441a05299f9db2b804a0d9f8234ae1ecaa12 100644 --- a/tests/testthat/test-Start-two_dats.R +++ b/tests/testthat/test-Start-two_dats.R @@ -19,7 +19,7 @@ data <- Start(dataset = list(list(path = path_tas), list(path = path_tos)), lon = values(list(150, 170)), lon_reorder = CircularSort(0, 360), fyear = 'all', - member = indices(1), + member = 'r10i1p1f1', #indices(1), fyear_depends = 'sdate', time_across = 'fyear', merge_across_dims = TRUE,