diff --git a/R/CST_SplitDim.R b/R/CST_SplitDim.R index 71f51bab4bdc27e7567241e9758c6a0e08d07937..bac378be1202675fd1fb08c080023e717d0d4184 100644 --- a/R/CST_SplitDim.R +++ b/R/CST_SplitDim.R @@ -10,7 +10,7 @@ #' #'@param data A 's2dv_cube' object #'@param split_dim A character string indicating the name of the dimension to -#' split. +#' split. It is set as 'time' by default. #'@param indices A vector of numeric indices or dates. If left at NULL, the #' dates provided in the s2dv_cube object (element Dates) will be used. #'@param freq A character string indicating the frequency: by 'day', 'month' and @@ -21,6 +21,12 @@ #' dimension. #'@param insert_ftime An integer indicating the number of time steps to add at #' the begining of the time series. +#'@param ftime_dim A character string indicating the name of the forecast time +#' dimension. It is set as 'time' by default. +#'@param sdate_dim A character string indicating the name of the start date +#' dimension. It is set as 'sdate' by default. +#'@param return_indices A logical value that if it is TRUE, the indices +#' used in splitting the dimension will be returned. It is FALSE by default. #' #'@details Parameter 'insert_ftime' has been included for the case of using #'daily data, requiring split the temporal dimensions by months (or similar) and @@ -51,10 +57,12 @@ #'new_data <- CST_SplitDim(data, indices = time, freq = 'year') #'@import abind #'@importFrom ClimProjDiags Subset +#'@importFrom s2dv Reorder #'@export CST_SplitDim <- function(data, split_dim = 'time', indices = NULL, freq = 'monthly', new_dim_name = NULL, - insert_ftime = NULL) { + insert_ftime = NULL, ftime_dim = 'time', + sdate_dim = 'sdate', return_indices = FALSE) { # Check 's2dv_cube' if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of the class 's2dv_cube'.") @@ -62,51 +70,84 @@ CST_SplitDim <- function(data, split_dim = 'time', indices = NULL, if (!is.null(insert_ftime)) { if (!is.numeric(insert_ftime)) { stop("Parameter 'insert_ftime' should be an integer.") + } + if (length(insert_ftime) > 1) { + warning("Parameter 'insert_ftime' must be of length 1, and only the", + " first element will be used.") + insert_ftime <- insert_ftime[1] + } + # Check Dates + if (is.null(dim(data$attrs$Dates))) { + warning("Parameter 'Dates' must have dimensions, 'insert_ftime' won't ", + "be used.") + insert_ftime <- NULL + } + } + if (!is.null(insert_ftime)) { + # adding NAs at the begining of the data in ftime dim + ftimedim <- which(names(dim(data$data)) == ftime_dim) + dims <- dim(data$data) + dims[ftimedim] <- insert_ftime + empty_array <- array(NA, dims) + data$data <- abind(empty_array, data$data, along = ftimedim) + names(dim(data$data)) <- names(dims) + # Reorder dates + data$attrs$Dates <- Reorder(data$attrs$Dates, c(ftime_dim, sdate_dim)) + dates <- data$attrs$Dates + dates_subset <- Subset(dates, sdate_dim, 1) + # adding dates to Dates for the new NAs introduced + if ((dates_subset[2] - dates_subset[1]) == 1) { + timefreq <- 'days' } else { - if (length(insert_ftime) > 1) { - warning("Parameter 'insert_ftime' must be of length 1, and only the", - " first element will be used.") - insert_ftime <- insert_ftime[1] - } - # adding NAs at the begining of the data in ftime dim - ftimedim <- which(names(dim(data$data)) == 'ftime') - dims <- dim(data$data) - dims[ftimedim] <- insert_ftime - empty_array <- array(NA, dims) - data$data <- abind(empty_array, data$data, along = ftimedim) - names(dim(data$data)) <- names(dims) - # adding dates to Dates for the new NAs introduced - if ((data$attrs$Dates[2] - data$attrs$Dates[1]) == 1) { - timefreq <- 'days' - } else { - timefreq <- 'months' - warning("Time frequency of forecast time is considered monthly.") - } - start <- data$attrs$Dates - dim(start) <- c(ftime = length(start)/dims['sdate'], sdate = dims['sdate']) - # new <- array(NA, prod(dim(data$data)[c('ftime', 'sdate')])) - # Pending fix transform to UTC when concatenaiting - data$attrs$Dates <- do.call(c, lapply(1:dim(start)[2], function(x) { - seq(start[1,x] - as.difftime(insert_ftime, - units = timefreq), - start[dim(start)[1],x], by = timefreq, tz = "UTC")})) + timefreq <- 'months' + warning("Time frequency of forecast time is considered monthly.") } + + dim(dates) <- c(length(dates)/dims[sdate_dim], dims[sdate_dim]) + names(dim(dates)) <- c(ftime_dim, sdate_dim) + # new <- array(NA, prod(dim(data$data)[c('ftime', 'sdate')])) + # Pending fix transform to UTC when concatenaiting + data$attrs$Dates <- do.call(c, lapply(1:dim(dates)[2], function(x) { + seq(dates[1,x] - as.difftime(insert_ftime, + units = timefreq), + dates[dim(dates)[1],x], by = timefreq, tz = "UTC")})) } if (is.null(indices)) { - if (any(split_dim %in% c('ftime', 'time', 'sdate'))) { + if (any(split_dim %in% c(ftime_dim, sdate_dim))) { indices <- data$attrs$Dates - if (any(names(dim(data$data)) %in% 'sdate')) { + if (any(names(dim(data$data)) %in% sdate_dim)) { if (!any(names(dim(data$data)) %in% split_dim)) { stop("Parameter 'split_dims' must be one of the dimension ", "names in parameter 'data'.") } - indices <- indices[1 : dim(data$data)[which(names(dim(data$data)) == split_dim)]] + indices <- indices[1:dim(data$data)[which(names(dim(data$data)) == split_dim)]] } } } - data$data <- SplitDim(data$data, split_dim = split_dim, indices = indices, - freq = freq, new_dim_name = new_dim_name) - return(data) + # Call the function + res <- SplitDim(data = data$data, split_dim = split_dim, + indices = indices, freq = freq, + new_dim_name = new_dim_name, + dates = data$attrs$Dates, + return_indices = return_indices) + if (inherits(res, 'list')) { + data$data <- res$data + # Split dim on Dates + if (!is.null(res$dates)) { + data$attrs$Dates <- res$dates + } + } else { + data$data <- res + } + data$dims <- dim(data$data) + + # Coordinates + # TO DO: Subset splitted coordinate and add the new dimension coordinate. + if (return_indices) { + return(list(data = data, indices = res$indices)) + } else { + return(data) + } } #'Function to Split Dimension #' @@ -128,6 +169,11 @@ CST_SplitDim <- function(data, split_dim = 'time', indices = NULL, #' the length in which to subset the dimension. #'@param new_dim_name A character string indicating the name of the new #' dimension. +#'@param dates An optional parameter containing an array of dates of class +#' 'POSIXct' with the corresponding time dimensions of 'data'. It is NULL +#' by default. +#'@param return_indices A logical value that if it is TRUE, the indices +#' used in splitting the dimension will be returned. It is FALSE by default. #'@examples #'data <- 1 : 20 #'dim(data) <- c(time = 10, lat = 2) @@ -144,7 +190,8 @@ CST_SplitDim <- function(data, split_dim = 'time', indices = NULL, #'@importFrom ClimProjDiags Subset #'@export SplitDim <- function(data, split_dim = 'time', indices, freq = 'monthly', - new_dim_name = NULL) { + new_dim_name = NULL, dates = NULL, + return_indices = FALSE) { # check data if (is.null(data)) { stop("Parameter 'data' cannot be NULL.") @@ -166,7 +213,7 @@ SplitDim <- function(data, split_dim = 'time', indices, freq = 'monthly', "one and only the first element will be used.") } if (!any(names(dims) %in% split_dim)) { - stop("Parameter 'split_dims' must be one of the dimension ", + stop("Parameter 'split_dim' must be one of the dimension ", "names in parameter 'data'.") } pos_split <- which(names(dims) == split_dim) @@ -209,8 +256,8 @@ SplitDim <- function(data, split_dim = 'time', indices, freq = 'monthly', }) if ('try-error' %in% class(indices) | sum(is.na(indices)) == length(indices)) { - stop("Dates provided in parameter 'indices' must be of class", - " 'POSIXct' or convertable to 'POSIXct'.") + stop("Dates provided in parameter 'indices' must be of class ", + "'POSIXct' or convertable to 'POSIXct'.") } } } @@ -229,7 +276,7 @@ SplitDim <- function(data, split_dim = 'time', indices, freq = 'monthly', } else if (freq == 'year') { indices <- as.numeric(strftime(indices, format = "%Y")) repited <- unique(indices) - } else if (freq == 'monthly' ) { + } else if (freq == 'monthly') { indices <- as.numeric(strftime(indices, format = "%m%Y")) repited <- unique(indices) } else { @@ -254,15 +301,41 @@ SplitDim <- function(data, split_dim = 'time', indices, freq = 'monthly', data <- lapply(repited, function(x) {rebuild(x, data, along = split_dim, indices = indices, max_times)}) data <- abind(data, along = length(dims) + 1) - if (is.character(freq)) { - names(dim(data)) <- c(names(dims), freq) - } else { - names(dim(data)) <- c(names(dims), 'index') + + # Add new dim name + if (is.null(new_dim_name)) { + if (is.character(freq)) { + new_dim_name <- freq + } else { + new_dim_name <- 'index' + } } - if (!is.null(new_dim_name)) { - names(dim(data)) <- c(names(dims), new_dim_name) + names(dim(data)) <- c(names(dims), new_dim_name) + + # Split also Dates + dates_exist <- FALSE + if (!is.null(dates)) { + if (any(split_dim %in% names(dim(dates)))) { + datesdims <- dim(dates) + dates <- lapply(repited, function(x) {rebuild(x, dates, along = split_dim, + indices = indices, max_times)}) + dates <- abind(dates, along = length(datesdims) + 1) + dates <- as.POSIXct(dates, origin = '1970-01-01', tz = "UTC") + names(dim(dates)) <- c(names(datesdims), new_dim_name) + } + dates_exist <- TRUE + } + + # Return objects + if (all(dates_exist, return_indices)) { + return(list(data = data, dates = dates, indices = indices)) + } else if (all(dates_exist, !return_indices)) { + return(list(data = data, dates = dates)) + } else if (all(!dates_exist, return_indices)) { + return(list(data = data, indices = indices)) + } else { + return(data) } - return(data) } rebuild <- function(x, data, along, indices, max_times) { diff --git a/inst/doc/usecase/ex3_modify_dims.R b/inst/doc/usecase/ex3_modify_dims.R index 6d1fa5b87b2300231bb954cf43a242b9362aeeea..1cf2984bed9cfd8598b1a283f980769d0646e589 100644 --- a/inst/doc/usecase/ex3_modify_dims.R +++ b/inst/doc/usecase/ex3_modify_dims.R @@ -116,6 +116,7 @@ dim(dates) <- c(time = 2192) # (2) Now, we will split the array in a new 'year' dimension: dates_year <- SplitDim(dates, indices = dates, split_dim = 'time', freq = 'year') +dim(dates_year) # time year # 366 6 @@ -140,7 +141,7 @@ dates_day <- as.POSIXct(dates_day * 24 * 3600, origin = '1970-01-01', tz = 'UTC' # of the 's2dv_cube' # (1) Call the function CST_SplitDim with adding 'day' dimension: -data_day <- CST_SplitDim(lonlat_prec_st, indices = lonlat_prec_st$attrs$Dates[1,], +data_day <- CST_SplitDim(lonlat_prec_st, indices = lonlat_prec_st$attrs$Dates[1, ], split_dim = 'ftime', freq = 'day') # (2) Explore the dimensions of the data array dim(data_day$data) diff --git a/man/CST_SaveExp.Rd b/man/CST_SaveExp.Rd index 1520eb08c50f80d46082b024bda8522ea7e06024..1ac3e7aba3ef41169d9ec5c9161e8e13e36cd154 100644 --- a/man/CST_SaveExp.Rd +++ b/man/CST_SaveExp.Rd @@ -17,7 +17,7 @@ CST_SaveExp( single_file = FALSE, extra_string = NULL, global_attrs = NULL, - units_hours_since = TRUE + units_hours_since = FALSE ) } \arguments{ @@ -83,11 +83,11 @@ default.} attributes to be saved in the NetCDF.} \item{units_hours_since}{(Optional) A logical value only available for the -case: Dates have forecast time and start date dimension, single_file is +case: 'Dates' have forecast time and start date dimension, 'single_file' is TRUE and 'time_bounds' are not used. When it is TRUE, it saves the forecast time with units of 'hours since'; if it is FALSE, the time units will be a number of time steps with its corresponding frequency (e.g. n days, n months -or n hours). It is TRUE by default.} +or n hours). It is FALSE by default.} } \value{ Multiple or single NetCDF files containing the data array.\cr diff --git a/man/CST_SplitDim.Rd b/man/CST_SplitDim.Rd index b07d9897ceac08db2b876ffa67d30322c87d34b5..4b55d6da3b2a761be6deee2519c26bff146cc785 100644 --- a/man/CST_SplitDim.Rd +++ b/man/CST_SplitDim.Rd @@ -10,14 +10,17 @@ CST_SplitDim( indices = NULL, freq = "monthly", new_dim_name = NULL, - insert_ftime = NULL + insert_ftime = NULL, + ftime_dim = "time", + sdate_dim = "sdate", + return_indices = FALSE ) } \arguments{ \item{data}{A 's2dv_cube' object} \item{split_dim}{A character string indicating the name of the dimension to -split.} +split. It is set as 'time' by default.} \item{indices}{A vector of numeric indices or dates. If left at NULL, the dates provided in the s2dv_cube object (element Dates) will be used.} @@ -32,6 +35,15 @@ dimension.} \item{insert_ftime}{An integer indicating the number of time steps to add at the begining of the time series.} + +\item{ftime_dim}{A character string indicating the name of the forecast time +dimension. It is set as 'time' by default.} + +\item{sdate_dim}{A character string indicating the name of the start date +dimension. It is set as 'sdate' by default.} + +\item{return_indices}{A logical value that if it is TRUE, the indices +used in splitting the dimension will be returned. It is FALSE by default.} } \description{ This function split a dimension in two. The user can select the diff --git a/man/SaveExp.Rd b/man/SaveExp.Rd index 53c791f76d19be05730fc1f519fbcf84f7790426..d7f0b30d84b8aadb722ff2733516819c909a8400 100644 --- a/man/SaveExp.Rd +++ b/man/SaveExp.Rd @@ -23,7 +23,7 @@ SaveExp( single_file = FALSE, extra_string = NULL, global_attrs = NULL, - units_hours_since = TRUE + units_hours_since = FALSE ) } \arguments{ @@ -116,7 +116,7 @@ case: Dates have forecast time and start date dimension, single_file is TRUE and 'time_bounds' is NULL. When it is TRUE, it saves the forecast time with units of 'hours since'; if it is FALSE, the time units will be a number of time steps with its corresponding frequency (e.g. n days, n months or n -hours). It is TRUE by default.} +hours). It is FALSE by default.} } \value{ Multiple or single NetCDF files containing the data array.\cr diff --git a/man/SplitDim.Rd b/man/SplitDim.Rd index a0dc8bc69bc65b0e729491a0e27c132f2854ee76..b0785b58a584cacfa67b9ca5ab4388c1fad0d92e 100644 --- a/man/SplitDim.Rd +++ b/man/SplitDim.Rd @@ -9,7 +9,9 @@ SplitDim( split_dim = "time", indices, freq = "monthly", - new_dim_name = NULL + new_dim_name = NULL, + dates = NULL, + return_indices = FALSE ) } \arguments{ @@ -28,6 +30,13 @@ the length in which to subset the dimension.} \item{new_dim_name}{A character string indicating the name of the new dimension.} + +\item{dates}{An optional parameter containing an array of dates of class +'POSIXct' with the corresponding time dimensions of 'data'. It is NULL +by default.} + +\item{return_indices}{A logical value that if it is TRUE, the indices +used in splitting the dimension will be returned. It is FALSE by default.} } \description{ This function split a dimension in two. The user can select the diff --git a/tests/testthat/test-CST_SplitDim.R b/tests/testthat/test-CST_SplitDim.R index 45e2b1a89cf9895241f2181ea3ea324a2700fc23..f8ad88ee09329e6106b630359643641d79b65f63 100644 --- a/tests/testthat/test-CST_SplitDim.R +++ b/tests/testthat/test-CST_SplitDim.R @@ -10,6 +10,7 @@ indices1 <- c(rep(1,5), rep(2,5), rep (3, 5), rep(4, 5)) output1 <- matrix(data1$data, nrow = 5, ncol = 4) names(dim(output1)) <- c('time', 'monthly') output1 <- list(data = output1) +output1$dims <- dim(output1$data) class(output1) <- 's2dv_cube' exp_cor <- 1 : 20 @@ -21,6 +22,7 @@ class(exp_cor) <- 's2dv_cube' output2 <- matrix(data1$data, nrow = 5, ncol = 4) names(dim(output2)) <- c('time', 'index') output2 <- list(data = output2) +output2$dims <- dim(output2$data) class(output2) <- 's2dv_cube' time2 <- c(seq(ISOdate(1903, 1, 1), ISOdate(1903, 1, 4), "days"), @@ -41,6 +43,7 @@ output3 <- c(data3$data, rep(NA, 4)) dim(output3) <- c(time = 8, monthly = 3) result3 <- data3 result3$data <- output3 +result3$dims <- dim(result3$data) # dat4 data4 <- list(data = array(rnorm(10), dim = c(sdate = 2, lon = 5))) @@ -92,38 +95,62 @@ test_that("2. Output checks", { ############################################## -# test_that("3. Output checks: sample data", { -# output <- lonlat_temp$exp$data -# output <- abind(output[, , , 1, ,], output[, , , 2, ,], output[, , , 3, ,], along = 5) -# dim(output) <- c(dataset = 1, member = 15, sdate = 6, ftime = 1, -# lat = 22, lon = 53, monthly = 3) -# result <- lonlat_temp$exp -# result$data <- output -# expect_equal( -# CST_SplitDim(data = lonlat_temp$exp, split_dim = 'ftime'), -# result -# ) -# expect_equal( -# dim(CST_SplitDim(data = lonlat_temp$exp, split_dim = 'member', -# freq = 5)$data), -# c(dataset = 1, member = 5, sdate = 6, ftime = 3, lat = 22, -# lon = 53, index = 3) -# ) -# expect_warning( -# CST_SplitDim(data = lonlat_temp$exp, split_dim = 'member', freq = 5, -# new_dim_name = c('a', 'b')), -# paste0("Parameter 'new_dim_name' has length greater than 1 ", -# "and only the first elemenst is used.") -# ) -# expect_error( -# CST_SplitDim(data = lonlat_temp$exp, split_dim = 'member', freq = 5, -# new_dim_name = 3), -# "Parameter 'new_dim_name' must be character string" -# ) -# expect_equal( -# dim(CST_SplitDim(data = lonlat_temp$exp, split_dim = 'member', -# freq = 5, new_dim_name = 'wt')$data), -# c(dataset = 1, member = 5, sdate = 6, ftime = 3, lat = 22, -# lon = 53, wt = 3) -# ) -# }) +test_that("3. Output checks: sample data", { + output <- lonlat_temp$exp$data + output <- abind(output[, , , 1, ,], output[, , , 2, ,], output[, , , 3, ,], along = 5) + dim(output) <- c(dataset = 1, member = 15, sdate = 6, ftime = 1, + lat = 22, lon = 53, monthly = 3) + result <- lonlat_temp$exp + result$data <- output + result$attrs$Dates <- s2dv::Reorder(result$attrs$Dates, c('sdate', 'ftime')) + dim(result$attrs$Dates) <- c(ftime = 1, sdate = 6, monthly = 3) + result$dims <- c(dataset = 1, member = 15, sdate = 6, ftime = 1, lat = 22, + lon = 53, monthly = 3) + attributes(result$attrs$Dates)$end <- NULL + expect_equal( + CST_SplitDim(data = lonlat_temp$exp, split_dim = 'ftime', ftime_dim = 'ftime'), + result + ) + expect_equal( + dim(CST_SplitDim(data = lonlat_temp$exp, split_dim = 'member', + freq = 5)$data), + c(dataset = 1, member = 5, sdate = 6, ftime = 3, lat = 22, + lon = 53, index = 3) + ) + expect_warning( + CST_SplitDim(data = lonlat_temp$exp, split_dim = 'member', freq = 5, + new_dim_name = c('a', 'b')), + paste0("Parameter 'new_dim_name' has length greater than 1 ", + "and only the first elemenst is used.") + ) + expect_error( + CST_SplitDim(data = lonlat_temp$exp, split_dim = 'member', freq = 5, + new_dim_name = 3), + "Parameter 'new_dim_name' must be character string" + ) + expect_equal( + dim(CST_SplitDim(data = lonlat_temp$exp, split_dim = 'member', + freq = 5, new_dim_name = 'wt')$data), + c(dataset = 1, member = 5, sdate = 6, ftime = 3, lat = 22, + lon = 53, wt = 3) + ) +}) + +############################################## + +test_that("4. Output checks II", { + res <- CST_SplitDim(lonlat_prec_st, indices = lonlat_prec_st$attrs$Dates[,1], + split_dim = 'sdate', freq = 'year', return_indices = T) + expect_equal( + names(res), + c('data', 'indices') + ) + expect_equal( + res$dims, + dim(res$data) + ) + expect_equal( + all(names(dim(res$data$attrs$Dates)) %in% names(res$data$dims)), + TRUE + ) +})