From 7f05e11cd2fb606a394043212bcea3d5cf9f0977 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 7 Feb 2023 14:40:53 +0100 Subject: [PATCH 01/13] Add new function CST_Subset() --- R/CST_Subset.R | 145 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 R/CST_Subset.R diff --git a/R/CST_Subset.R b/R/CST_Subset.R new file mode 100644 index 00000000..54830ddf --- /dev/null +++ b/R/CST_Subset.R @@ -0,0 +1,145 @@ +#'Subset an object of class s2dv_cube +#' +#'@author Agudetse Roures Victoria, \email{victoria.agudetse@bsc.es} +#'This function allows to subset (i.e. slice, take a chunk of) the data inside +#'an object of class \code{s2dv_cube} and modify the dimensions, coordinates and +#'attributes accordingly, removing any variables, time steps and spatial +#'coordinates that are dropped when subsetting. It ensures that the information +#'inside the s2dv_cube remains coherent with the data it contains.\cr\cr +#'As in the function \code{Subset} from the ClimProjDiags package, the +#'dimensions to subset along can be specified via the parameter \code{along} +#'either with integer indices or by their name.\cr\cr +#'There are additional ways to adjust which dimensions are dropped in the +#'resulting object: either to drop all, to drop none, to drop only the ones that +#'have been sliced or to drop only the ones that have not been sliced.\cr\cr +#'The \code{load_parameters} and \code{when} attributes of the original cube +#'are preserved. The \code{source_files} attribute is subset along the +#'\code{var_dim} and \code{dat_dim} dimensions. +#' +#'@param x An object of class \code{s2dv_cube} to be sliced. +#'@param along A vector with references to the dimensions to take the subset +#' from: either integers or dimension names. +#'@param indices A list of indices to take from each dimension specified in +#' 'along'. If a single dimension is specified in 'along', it can be directly +#' provided as an integer or a vector. +#'@param drop Whether to drop all the dimensions of length 1 in the resulting +#' array, none, only those that are specified in 'along', or only those that +#' are not specified in 'along'. The possible values are: 'all' or TRUE, 'none' +#' or FALSE, 'selected', and 'non-selected'. The default value is FALSE. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The default value is NULL. +#'@param var_dim A chatacter string indicating the name of the variable +#' dimension. The default value is NULL. +#' +#'@return An object of class \code{s2dv_cube} with similar data, coordinates and +#' attributes as the \code{x} input, but with trimmed or dropped dimensions. +#' +#'@examples +#'#Example synthetic data: +#'# Dimension has name already +#'data <- 1:(2 * 3 * 372 * 1) +#'dim(data) <- c(time = 372, lon = 2, lat = 3, model = 1) +#'data_subset <- Subset(data, c('time', 'model'), +#' list(1:10, TRUE), drop = 'selected') +#'dim(data_subset) +#'# Use attributes 'dimensions' +#'data <- array(1:(2 * 3 * 372 * 1), dim = c(2, 3, 372, 1)) +#'attributes(data)[['dimensions']] <- c('lat', 'lon', 'time', 'model') +#'data_subset <- Subset(data, c('lon', 'lat'), list(1, 1), drop = TRUE) +#'dim(data_subset) +#' + +CST_Subset <- function(x, along, indices, drop = FALSE, + var_dim = NULL, + dat_dim = NULL) { + # Check that x is s2dv_cube + if (!inherits(x, 's2dv_cube')) { + stop("Parameter 'x' must be of the class 's2dv_cube'") + } + # Check var_dim + if (!is.null(var_dim)) { + if ((!is.character(var_dim)) || (length(var_dim) > 1)) { + stop("Parameter 'var_dim' must be a character string.") + } + } + # Check dat_dim + if (!is.null(dat_dim)) { + if ((!is.character(dat_dim)) || (length(dat_dim) > 1)) { + stop("Parameter 'dat_dim' must be a character string.") + } + } + + # Subset data + ## TODO: Test other "drop" options + x$data <- ClimProjDiags::Subset(x$data, + along = along, + indices = indices, + drop = drop) + # Adjust dimensions + x$dims <- dim(x$data) + # Adjust coordinates + for (dimension in 1:length(along)) { + dim_name <- along[dimension] + index <- indices[[dimension]] + if (dim_name %in% names(x$dims)) { + # Make coordinate 'sticky' to preserve attributes upon subsetting + x$coords[[dim_name]] <- sticky(x$coords[[dim_name]]) + # Subset coordinate by indices + x$coords[[dim_name]] <- x$coords[[dim_name]][index] + # Remove 'sticky' class + x$coords[[dim_name]] <- unstick(x$coords[[dim_name]]) + } else { + # Remove coordinates that have been dropped + x$coords <- within(x$coords, rm(dim_name)) + } + + } + # Adjust attributes + ## TODO: Correctly subset $Variable$metadata$time + ## TODO: Change 'len' + # Variable + for (dimension in 1:length(along)) { + dim_name <- along[dimension] + index <- indices[[dimension]] + if ((!is.null(var_dim)) && (dim_name == var_dim)) { + x$attrs$Variable$varName <- x$coords[[dim_name]][1] + } + if ((!is.null(dat_dim)) && (dim_name == dat_dim)) { + x$attrs$Datasets <- x$coords[[dim_name]][1] + } + if ((!is.null(x$attrs$source_files)) && + (dimension %in% dim(x$attrs$source_files))) { + x$attrs$source_files <- ClimProjDiags::Subset(x$attrs$source_files, + along = dim_name, + indices = index, + drop = drop) + } + if ((dim_name %in% names(x$dims)) && + (dim_name %in% names(x$attrs$Variable$metadata))) { + # Make sticky + x$attrs$Variable$metadata[[dim_name]] <- + sticky(x$attrs$Variable$metadata[[dim_name]]) + # Subset coords by indices + x$attrs$Variable$metadata[[dim_name]] <- + x$attrs$Variable$metadata[[dim_name]][index] + # Remove 'sticky' class + x$attrs$Variable$metadata[[dim_name]] <- + unstick(x$attrs$Variable$metadata[[dim_name]]) + } + } + vars_to_keep <- na.omit(match(c(names(x$dims), (x$attrs$Variable$varName)), + names(x$attrs$Variable$metadata))) + x$attrs$Variable$metadata <- x$attrs$Variable$metadata[vars_to_keep] + # Subset Dates + time_along <- intersect(along, names(dim(x$attrs$Dates))) + if (!(length(time_along) == 0)) { + time_indices <- indices[match(time_along, along)] + original_dates <- x$attrs$Dates + x$attrs$Dates <- ClimProjDiags::Subset(x$attrs$Dates, + along = time_along, + indices = time_indices, + drop = drop) + } + return(x) +} + -- GitLab From 89ece398f4299295426bfb3a1bae5b05152445f1 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 7 Feb 2023 16:06:48 +0100 Subject: [PATCH 02/13] Refine reassingment of --- R/CST_Subset.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/CST_Subset.R b/R/CST_Subset.R index 54830ddf..87b9ed90 100644 --- a/R/CST_Subset.R +++ b/R/CST_Subset.R @@ -92,7 +92,6 @@ CST_Subset <- function(x, along, indices, drop = FALSE, # Remove coordinates that have been dropped x$coords <- within(x$coords, rm(dim_name)) } - } # Adjust attributes ## TODO: Correctly subset $Variable$metadata$time @@ -105,7 +104,7 @@ CST_Subset <- function(x, along, indices, drop = FALSE, x$attrs$Variable$varName <- x$coords[[dim_name]][1] } if ((!is.null(dat_dim)) && (dim_name == dat_dim)) { - x$attrs$Datasets <- x$coords[[dim_name]][1] + x$attrs$Datasets <- x$attrs$Datasets[x$coords[[dim_name]][1]] } if ((!is.null(x$attrs$source_files)) && (dimension %in% dim(x$attrs$source_files))) { -- GitLab From 15683879bff1a2b747c64aa461db40b4a2e44d79 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 8 Feb 2023 17:15:45 +0100 Subject: [PATCH 03/13] Adapt coordinate handling to all 'drop' options --- R/CST_Subset.R | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/R/CST_Subset.R b/R/CST_Subset.R index 87b9ed90..efa76b1b 100644 --- a/R/CST_Subset.R +++ b/R/CST_Subset.R @@ -81,6 +81,7 @@ CST_Subset <- function(x, along, indices, drop = FALSE, for (dimension in 1:length(along)) { dim_name <- along[dimension] index <- indices[[dimension]] + # Only rename coordinates that have not been dropped if (dim_name %in% names(x$dims)) { # Make coordinate 'sticky' to preserve attributes upon subsetting x$coords[[dim_name]] <- sticky(x$coords[[dim_name]]) @@ -88,9 +89,12 @@ CST_Subset <- function(x, along, indices, drop = FALSE, x$coords[[dim_name]] <- x$coords[[dim_name]][index] # Remove 'sticky' class x$coords[[dim_name]] <- unstick(x$coords[[dim_name]]) - } else { - # Remove coordinates that have been dropped - x$coords <- within(x$coords, rm(dim_name)) + } + } + # Remove dropped coordinates + for (coordinate in names(x$coords)) { + if (!(coordinate %in% names(x$dims))) { + x$coords[[coordinate]] <- NULL } } # Adjust attributes -- GitLab From 2b942ce30d3e2b2db4fe26d960d62f82e9010b83 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 9 Feb 2023 14:38:56 +0100 Subject: [PATCH 04/13] Add imports --- R/CST_Subset.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/CST_Subset.R b/R/CST_Subset.R index efa76b1b..6d3f3b09 100644 --- a/R/CST_Subset.R +++ b/R/CST_Subset.R @@ -34,6 +34,9 @@ #'@return An object of class \code{s2dv_cube} with similar data, coordinates and #' attributes as the \code{x} input, but with trimmed or dropped dimensions. #' +#'@importFrom sticky sticky unstick +#'@importFrom ClimProjDiags Subset +#' #'@examples #'#Example synthetic data: #'# Dimension has name already -- GitLab From f5e03df11afe8e596e25e9839899b710114f3b57 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 17 Feb 2023 14:29:36 +0100 Subject: [PATCH 05/13] Remove package sticky and replace with dot function --- R/CST_Subset.R | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/R/CST_Subset.R b/R/CST_Subset.R index 6d3f3b09..2d627e31 100644 --- a/R/CST_Subset.R +++ b/R/CST_Subset.R @@ -34,7 +34,6 @@ #'@return An object of class \code{s2dv_cube} with similar data, coordinates and #' attributes as the \code{x} input, but with trimmed or dropped dimensions. #' -#'@importFrom sticky sticky unstick #'@importFrom ClimProjDiags Subset #' #'@examples @@ -86,12 +85,8 @@ CST_Subset <- function(x, along, indices, drop = FALSE, index <- indices[[dimension]] # Only rename coordinates that have not been dropped if (dim_name %in% names(x$dims)) { - # Make coordinate 'sticky' to preserve attributes upon subsetting - x$coords[[dim_name]] <- sticky(x$coords[[dim_name]]) # Subset coordinate by indices - x$coords[[dim_name]] <- x$coords[[dim_name]][index] - # Remove 'sticky' class - x$coords[[dim_name]] <- unstick(x$coords[[dim_name]]) + x$coords[[dim_name]] <- .subset_with_attrs(x$coords[[dim_name]], index) } } # Remove dropped coordinates @@ -122,17 +117,12 @@ CST_Subset <- function(x, along, indices, drop = FALSE, } if ((dim_name %in% names(x$dims)) && (dim_name %in% names(x$attrs$Variable$metadata))) { - # Make sticky - x$attrs$Variable$metadata[[dim_name]] <- - sticky(x$attrs$Variable$metadata[[dim_name]]) # Subset coords by indices x$attrs$Variable$metadata[[dim_name]] <- - x$attrs$Variable$metadata[[dim_name]][index] - # Remove 'sticky' class - x$attrs$Variable$metadata[[dim_name]] <- - unstick(x$attrs$Variable$metadata[[dim_name]]) + subset_with_attrs(x$attrs$Variable$metadata[[dim_name]], index) } } + # Remove metadata from variables that were dropped vars_to_keep <- na.omit(match(c(names(x$dims), (x$attrs$Variable$varName)), names(x$attrs$Variable$metadata))) x$attrs$Variable$metadata <- x$attrs$Variable$metadata[vars_to_keep] @@ -149,3 +139,10 @@ CST_Subset <- function(x, along, indices, drop = FALSE, return(x) } +.subset_with_attrs <- function(x, ...) { + l <- x[...] + attr.names <- names(attributes(x)) + attr.names <- attr.names[attr.names != 'names'] + attributes(l)[attr.names] <- attributes(x)[attr.names] + return(l) +} -- GitLab From fc7bac15b3eb8b71efac0c2dc81885de52c3c20c Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 17 Feb 2023 15:44:24 +0100 Subject: [PATCH 06/13] Improve coordinate dimension handling --- R/CST_Subset.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/CST_Subset.R b/R/CST_Subset.R index 2d627e31..674ba8bc 100644 --- a/R/CST_Subset.R +++ b/R/CST_Subset.R @@ -119,7 +119,7 @@ CST_Subset <- function(x, along, indices, drop = FALSE, (dim_name %in% names(x$attrs$Variable$metadata))) { # Subset coords by indices x$attrs$Variable$metadata[[dim_name]] <- - subset_with_attrs(x$attrs$Variable$metadata[[dim_name]], index) + subset_with_attrs(x$attrs$Variable$metadata[[dim_name]], index) } } # Remove metadata from variables that were dropped @@ -141,8 +141,11 @@ CST_Subset <- function(x, along, indices, drop = FALSE, .subset_with_attrs <- function(x, ...) { l <- x[...] + x.dims <- names(dim(x)) attr.names <- names(attributes(x)) attr.names <- attr.names[attr.names != 'names'] + attr.names <- attr.names[attr.names != 'dim'] attributes(l)[attr.names] <- attributes(x)[attr.names] + names(dim(l)) <- x.dims return(l) } -- GitLab From 63c6f6882d8442a4d58f713baddafbd01c8dd162 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 24 Feb 2023 16:16:41 +0100 Subject: [PATCH 07/13] Fix bug in detecting dimension names to subset source_files --- R/CST_Subset.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/CST_Subset.R b/R/CST_Subset.R index 674ba8bc..e0511c46 100644 --- a/R/CST_Subset.R +++ b/R/CST_Subset.R @@ -109,7 +109,7 @@ CST_Subset <- function(x, along, indices, drop = FALSE, x$attrs$Datasets <- x$attrs$Datasets[x$coords[[dim_name]][1]] } if ((!is.null(x$attrs$source_files)) && - (dimension %in% dim(x$attrs$source_files))) { + (dim_name %in% dim(x$attrs$source_files))) { x$attrs$source_files <- ClimProjDiags::Subset(x$attrs$source_files, along = dim_name, indices = index, -- GitLab From 4a86ae24611edf1cb3404924f11e1476880f56d2 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 9 Mar 2023 12:18:38 +0100 Subject: [PATCH 08/13] Fix bug (call to atomic function) --- R/CST_Subset.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/CST_Subset.R b/R/CST_Subset.R index e0511c46..cfea8606 100644 --- a/R/CST_Subset.R +++ b/R/CST_Subset.R @@ -119,7 +119,7 @@ CST_Subset <- function(x, along, indices, drop = FALSE, (dim_name %in% names(x$attrs$Variable$metadata))) { # Subset coords by indices x$attrs$Variable$metadata[[dim_name]] <- - subset_with_attrs(x$attrs$Variable$metadata[[dim_name]], index) + .subset_with_attrs(x$attrs$Variable$metadata[[dim_name]], index) } } # Remove metadata from variables that were dropped -- GitLab From 4af98b72526b2d0d9ec98400d1b1d2a949d28f2e Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 9 Mar 2023 12:29:06 +0100 Subject: [PATCH 09/13] Create example --- R/CST_Subset.R | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/R/CST_Subset.R b/R/CST_Subset.R index cfea8606..9138d55e 100644 --- a/R/CST_Subset.R +++ b/R/CST_Subset.R @@ -37,18 +37,18 @@ #'@importFrom ClimProjDiags Subset #' #'@examples -#'#Example synthetic data: -#'# Dimension has name already -#'data <- 1:(2 * 3 * 372 * 1) -#'dim(data) <- c(time = 372, lon = 2, lat = 3, model = 1) -#'data_subset <- Subset(data, c('time', 'model'), -#' list(1:10, TRUE), drop = 'selected') -#'dim(data_subset) -#'# Use attributes 'dimensions' -#'data <- array(1:(2 * 3 * 372 * 1), dim = c(2, 3, 372, 1)) -#'attributes(data)[['dimensions']] <- c('lat', 'lon', 'time', 'model') -#'data_subset <- Subset(data, c('lon', 'lat'), list(1, 1), drop = TRUE) -#'dim(data_subset) +#'#Example with sample data: +#'# Check original dimensions and coordinates +#'lonlat_temp$exp$dims +#'names(lonlat_temp$exp$coords) +#'# Subset the s2dv_cube +#'exp_subset <- CST_Subset(lonlat_temp$exp, +#' along = c("lat", "lon"), +#' indices = list(1:10, 1:10), +#' drop = 'non-selected') +#'# Check new dimensions and coordinates +#'exp_subset$dims +#'names(exp_subset$coords) #' CST_Subset <- function(x, along, indices, drop = FALSE, -- GitLab From 84b1945f5a924714b239091557801d745af5d134 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 14 Mar 2023 12:04:12 +0100 Subject: [PATCH 10/13] Remove TODOs --- R/CST_Subset.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/CST_Subset.R b/R/CST_Subset.R index 9138d55e..d74c2157 100644 --- a/R/CST_Subset.R +++ b/R/CST_Subset.R @@ -96,8 +96,6 @@ CST_Subset <- function(x, along, indices, drop = FALSE, } } # Adjust attributes - ## TODO: Correctly subset $Variable$metadata$time - ## TODO: Change 'len' # Variable for (dimension in 1:length(along)) { dim_name <- along[dimension] -- GitLab From efd9b622e5e4b95e8f2ea0d57976ec4456b5e265 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 15 Mar 2023 10:12:30 +0100 Subject: [PATCH 11/13] Correct format code, documentation, minor error and add unit test --- NAMESPACE | 1 + R/CST_Subset.R | 44 +++--- man/CST_Subset.Rd | 70 ++++++++++ tests/testthat/test-CST_Subset.R | 222 +++++++++++++++++++++++++++++++ 4 files changed, 316 insertions(+), 21 deletions(-) create mode 100644 man/CST_Subset.Rd create mode 100644 tests/testthat/test-CST_Subset.R diff --git a/NAMESPACE b/NAMESPACE index 1896857d..d28fa03b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -34,6 +34,7 @@ export(CST_RainFARM) export(CST_RegimesAssign) export(CST_SaveExp) export(CST_SplitDim) +export(CST_Subset) export(CST_WeatherRegimes) export(Calibration) export(CategoricalEnsCombination) diff --git a/R/CST_Subset.R b/R/CST_Subset.R index d74c2157..8c3a5a5f 100644 --- a/R/CST_Subset.R +++ b/R/CST_Subset.R @@ -1,6 +1,5 @@ #'Subset an object of class s2dv_cube #' -#'@author Agudetse Roures Victoria, \email{victoria.agudetse@bsc.es} #'This function allows to subset (i.e. slice, take a chunk of) the data inside #'an object of class \code{s2dv_cube} and modify the dimensions, coordinates and #'attributes accordingly, removing any variables, time steps and spatial @@ -15,6 +14,8 @@ #'The \code{load_parameters} and \code{when} attributes of the original cube #'are preserved. The \code{source_files} attribute is subset along the #'\code{var_dim} and \code{dat_dim} dimensions. +#' +#'@author Agudetse Roures Victoria, \email{victoria.agudetse@bsc.es} #' #'@param x An object of class \code{s2dv_cube} to be sliced. #'@param along A vector with references to the dimensions to take the subset @@ -34,8 +35,6 @@ #'@return An object of class \code{s2dv_cube} with similar data, coordinates and #' attributes as the \code{x} input, but with trimmed or dropped dimensions. #' -#'@importFrom ClimProjDiags Subset -#' #'@examples #'#Example with sample data: #'# Check original dimensions and coordinates @@ -49,14 +48,16 @@ #'# Check new dimensions and coordinates #'exp_subset$dims #'names(exp_subset$coords) +#' +#'@seealso \link[ClimProjDiags]{Subset} #' - -CST_Subset <- function(x, along, indices, drop = FALSE, - var_dim = NULL, - dat_dim = NULL) { +#'@importFrom ClimProjDiags Subset +#'@export +CST_Subset <- function(x, along, indices, drop = FALSE, var_dim = NULL, + dat_dim = NULL) { # Check that x is s2dv_cube if (!inherits(x, 's2dv_cube')) { - stop("Parameter 'x' must be of the class 's2dv_cube'") + stop("Parameter 'x' must be of the class 's2dv_cube'.") } # Check var_dim if (!is.null(var_dim)) { @@ -72,11 +73,9 @@ CST_Subset <- function(x, along, indices, drop = FALSE, } # Subset data - ## TODO: Test other "drop" options - x$data <- ClimProjDiags::Subset(x$data, - along = along, - indices = indices, - drop = drop) + x$data <- ClimProjDiags::Subset(x$data, along = along, + indices = indices, + drop = drop) # Adjust dimensions x$dims <- dim(x$data) # Adjust coordinates @@ -107,22 +106,22 @@ CST_Subset <- function(x, along, indices, drop = FALSE, x$attrs$Datasets <- x$attrs$Datasets[x$coords[[dim_name]][1]] } if ((!is.null(x$attrs$source_files)) && - (dim_name %in% dim(x$attrs$source_files))) { + (dim_name %in% dim(x$attrs$source_files))) { x$attrs$source_files <- ClimProjDiags::Subset(x$attrs$source_files, - along = dim_name, - indices = index, - drop = drop) + along = dim_name, + indices = index, + drop = drop) } if ((dim_name %in% names(x$dims)) && - (dim_name %in% names(x$attrs$Variable$metadata))) { + (dim_name %in% names(x$attrs$Variable$metadata))) { # Subset coords by indices x$attrs$Variable$metadata[[dim_name]] <- - .subset_with_attrs(x$attrs$Variable$metadata[[dim_name]], index) + .subset_with_attrs(x$attrs$Variable$metadata[[dim_name]], index) } } # Remove metadata from variables that were dropped vars_to_keep <- na.omit(match(c(names(x$dims), (x$attrs$Variable$varName)), - names(x$attrs$Variable$metadata))) + names(x$attrs$Variable$metadata))) x$attrs$Variable$metadata <- x$attrs$Variable$metadata[vars_to_keep] # Subset Dates time_along <- intersect(along, names(dim(x$attrs$Dates))) @@ -132,7 +131,7 @@ CST_Subset <- function(x, along, indices, drop = FALSE, x$attrs$Dates <- ClimProjDiags::Subset(x$attrs$Dates, along = time_along, indices = time_indices, - drop = drop) + drop = drop) } return(x) } @@ -144,6 +143,9 @@ CST_Subset <- function(x, along, indices, drop = FALSE, attr.names <- attr.names[attr.names != 'names'] attr.names <- attr.names[attr.names != 'dim'] attributes(l)[attr.names] <- attributes(x)[attr.names] + if (is.null(dim(l))) { + dim(l) <- length(l) + } names(dim(l)) <- x.dims return(l) } diff --git a/man/CST_Subset.Rd b/man/CST_Subset.Rd new file mode 100644 index 00000000..a9f480aa --- /dev/null +++ b/man/CST_Subset.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_Subset.R +\name{CST_Subset} +\alias{CST_Subset} +\title{Subset an object of class s2dv_cube} +\usage{ +CST_Subset(x, along, indices, drop = FALSE, var_dim = NULL, dat_dim = NULL) +} +\arguments{ +\item{x}{An object of class \code{s2dv_cube} to be sliced.} + +\item{along}{A vector with references to the dimensions to take the subset +from: either integers or dimension names.} + +\item{indices}{A list of indices to take from each dimension specified in +'along'. If a single dimension is specified in 'along', it can be directly +provided as an integer or a vector.} + +\item{drop}{Whether to drop all the dimensions of length 1 in the resulting +array, none, only those that are specified in 'along', or only those that +are not specified in 'along'. The possible values are: 'all' or TRUE, 'none' +or FALSE, 'selected', and 'non-selected'. The default value is FALSE.} + +\item{var_dim}{A chatacter string indicating the name of the variable +dimension. The default value is NULL.} + +\item{dat_dim}{A character string indicating the name of dataset dimension. +The default value is NULL.} +} +\value{ +An object of class \code{s2dv_cube} with similar data, coordinates and + attributes as the \code{x} input, but with trimmed or dropped dimensions. +} +\description{ +This function allows to subset (i.e. slice, take a chunk of) the data inside +an object of class \code{s2dv_cube} and modify the dimensions, coordinates and +attributes accordingly, removing any variables, time steps and spatial +coordinates that are dropped when subsetting. It ensures that the information +inside the s2dv_cube remains coherent with the data it contains.\cr\cr +As in the function \code{Subset} from the ClimProjDiags package, the +dimensions to subset along can be specified via the parameter \code{along} +either with integer indices or by their name.\cr\cr +There are additional ways to adjust which dimensions are dropped in the +resulting object: either to drop all, to drop none, to drop only the ones that +have been sliced or to drop only the ones that have not been sliced.\cr\cr +The \code{load_parameters} and \code{when} attributes of the original cube +are preserved. The \code{source_files} attribute is subset along the +\code{var_dim} and \code{dat_dim} dimensions. +} +\examples{ +#Example with sample data: +# Check original dimensions and coordinates +lonlat_temp$exp$dims +names(lonlat_temp$exp$coords) +# Subset the s2dv_cube +exp_subset <- CST_Subset(lonlat_temp$exp, + along = c("lat", "lon"), + indices = list(1:10, 1:10), + drop = 'non-selected') +# Check new dimensions and coordinates +exp_subset$dims +names(exp_subset$coords) + +} +\seealso{ +\link[ClimProjDiags]{Subset} +} +\author{ +Agudetse Roures Victoria, \email{victoria.agudetse@bsc.es} +} diff --git a/tests/testthat/test-CST_Subset.R b/tests/testthat/test-CST_Subset.R new file mode 100644 index 00000000..de2e1c7c --- /dev/null +++ b/tests/testthat/test-CST_Subset.R @@ -0,0 +1,222 @@ +context("CSTools::CST_Subset tests") + +############################################## + +test_that("1. Input checks: CST_Subset", { + # Check that x is s2dv_cube + expect_error( + CST_Subset(array(10)), + "Parameter 'x' must be of the class 's2dv_cube'." + ) + # Check var_dim + expect_error( + CST_Subset(lonlat_prec, var_dim = 1), + "Parameter 'var_dim' must be a character string." + ) + expect_error( + CST_Subset(lonlat_prec, var_dim = c('tas', 'psl')), + "Parameter 'var_dim' must be a character string." + ) + # Check dat_dim + expect_error( + CST_Subset(lonlat_prec, dat_dim = 1), + "Parameter 'dat_dim' must be a character string." + ) + expect_error( + CST_Subset(lonlat_prec, dat_dim = c('dat1', 'dat2')), + "Parameter 'dat_dim' must be a character string." + ) +}) + +############################################## + +test_that("2. Output checks: CST_Subset", { + res1 <- CST_Subset(lonlat_prec, along = c('lat', 'lon', 'sdate', 'member'), + indices = list(1, 1:2, 1, 1:2), + drop = 'all') + # Check dimensions + expect_equal( + dim(res1$data), + res1$dims + ) + expect_equal( + dim(res1$data), + c(member = 2, ftime = 31, lon = 2) + ) + # Check coordinates + expect_equal( + names(res1$coords), + c("member", "ftime", "lon") + ) + # Check attrs + expect_equal( + names(res1$attrs), + names(lonlat_prec$attrs) + ) + expect_equal( + names(res1$attrs$Variable$metadata), + c("lon", "prlr") + ) + expect_equal( + names(res1$attrs$Datasets), + c("exp1") + ) + # Check 'dat_dim' + res2 <- CST_Subset(lonlat_prec, along = c('lat'), indices = list(1), + drop = 'all', dat_dim = 'dataset') + res3 <- CST_Subset(lonlat_prec, along = c('lat'), indices = list(1), + drop = 'selected', dat_dim = 'dataset') + res4 <- CST_Subset(lonlat_prec, along = c('dataset'), indices = list(1), + drop = 'all', dat_dim = 'dataset') + res5 <- CST_Subset(lonlat_prec, along = c('dataset'), indices = list(1), + drop = 'selected', dat_dim = 'dataset') + expect_equal( + names(res2$attrs$Datasets), + names(res3$attrs$Datasets), + "exp1" + ) + expect_equal( + length(res4$attrs$Datasets), + length(res5$attrs$Datasets), + 0 + ) + # Check 'Dates' + res6 <- CST_Subset(lonlat_prec, along = c('sdate', 'ftime'), + indices = list(1, 1:10), drop = 'selected') + res7 <- CST_Subset(lonlat_prec, along = c('sdate', 'ftime'), + indices = list(1, 1:10), drop = 'none') + # Dates dimensions + expect_equal( + dim(res6$attrs$Dates), + res6$dims[which(names(dim(res6$data)) %in% c('sdate', 'ftime'))] + ) + expect_equal( + dim(res7$attrs$Dates), + c(ftime = 10, sdate = 1) + ) + # sdates coordinates + expect_equal( + names(res6$coords), + c("dataset", "member", "ftime", "lat", "lon") + ) + expect_equal( + as.vector(res7$coords$sdate), + c("20101101") + ) +}) + +############################################## + +repos1 <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + +suppressWarnings( + data <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos1)), + var = c('tas', 'sfcWind'), + sdate = c('20170101'), + ensemble = indices(1), + time = indices(1), + lat = indices(1:10), + lon = indices(1:10), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = 'dat', + latitude = 'dat'), + metadata_dims = c('dat', 'var'), + retrieve = T) +) +suppressWarnings( + exp_start <- as.s2dv_cube(data) +) + +############################################## + +test_that("3. Output checks with Start", { + res8 <- CST_Subset(exp_start, along = c('lon', 'sdate', 'ensemble', 'var', 'dat'), + indices = list(1:2, 1, 1, 1, 1), + drop = 'none') + res9 <- CST_Subset(exp_start, along = c('lon', 'sdate', 'ensemble', 'var', 'dat'), + indices = list(1:2, 1, 1, 1, 1), + drop = FALSE, var_dim = 'var', dat_dim = 'dat') + res10 <- CST_Subset(exp_start, along = c('lon', 'sdate', 'ensemble', 'var', 'dat'), + indices = list(1:2, 1, 1, 1, 1), + drop = 'selected', var_dim = 'var', dat_dim = 'dat') + # Check dimensions + expect_equal( + dim(res8$data), + c(dat = 1, var = 1, sdate = 1, ensemble = 1, time = 1, lat = 10, lon = 2) + ) + expect_equal( + dim(res8$data), + dim(res9$data) + ) + expect_equal( + dim(res10$data), + c(time = 1, lat = 10, lon = 2) + ) + # Check coordinates + expect_equal( + names(res8$coords), + names(res8$dims) + ) + expect_equal( + names(res9$coords), + names(res9$dims) + ) + # varName + expect_equal( + res8$attrs$Variable$varName, + c("tas", "sfcWind") + ) + expect_equal( + res9$attrs$Variable$varName, + c("tas") + ) + expect_equal( + res10$attrs$Variable$varName, + NULL + ) + # metadata + expect_equal( + names(res8$attrs$Variable$metadata), + c("time", "lat", "lon", "tas", "sfcWind") + ) + expect_equal( + names(res9$attrs$Variable$metadata), + c("time", "lat", "lon", "tas") + ) + expect_equal( + names(res10$attrs$Variable$metadata), + c("time", "lat", "lon") + ) + # Datasets + expect_equal( + res8$attrs$Datasets, + c("system4_m1", "system5_m1") + ) + expect_equal( + res9$attrs$Datasets, + c("system4_m1") + ) + expect_equal( + length(res10$attrs$Datasets), + 0 + ) + # Check source_files + expect_equal( + dim(res8$attrs$source_files), + c(dat = 2, var = 2, sdate = 1) + ) + # Error, this is needed to be subset + expect_equal( + dim(res9$attrs$source_files), + c(dat = 2, var = 2, sdate = 1) + ) + # Error, this is needed to be subset + expect_equal( + dim(res10$attrs$source_files), + c(dat = 2, var = 2, sdate = 1) + ) +}) -- GitLab From 19f94e3772b8b60cc5ed5cd2af05da7578c1cb01 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 15 Mar 2023 12:06:32 +0100 Subject: [PATCH 12/13] Fix bug on subsetting source_files --- R/CST_Subset.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/CST_Subset.R b/R/CST_Subset.R index d74c2157..42d3f2c0 100644 --- a/R/CST_Subset.R +++ b/R/CST_Subset.R @@ -107,7 +107,7 @@ CST_Subset <- function(x, along, indices, drop = FALSE, x$attrs$Datasets <- x$attrs$Datasets[x$coords[[dim_name]][1]] } if ((!is.null(x$attrs$source_files)) && - (dim_name %in% dim(x$attrs$source_files))) { + (dim_name %in% names(dim(x$attrs$source_files)))) { x$attrs$source_files <- ClimProjDiags::Subset(x$attrs$source_files, along = dim_name, indices = index, -- GitLab From aab8ee5508ee0b216712af2ceae24d22ff44cc7a Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 15 Mar 2023 12:41:47 +0100 Subject: [PATCH 13/13] Uncomment errors in unit test --- tests/testthat/test-CST_Subset.R | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-CST_Subset.R b/tests/testthat/test-CST_Subset.R index de2e1c7c..27e3cbef 100644 --- a/tests/testthat/test-CST_Subset.R +++ b/tests/testthat/test-CST_Subset.R @@ -207,16 +207,14 @@ test_that("3. Output checks with Start", { # Check source_files expect_equal( dim(res8$attrs$source_files), - c(dat = 2, var = 2, sdate = 1) + c(dat = 1, var = 1, sdate = 1) ) - # Error, this is needed to be subset expect_equal( dim(res9$attrs$source_files), - c(dat = 2, var = 2, sdate = 1) + c(dat = 1, var = 1, sdate = 1) ) - # Error, this is needed to be subset expect_equal( dim(res10$attrs$source_files), - c(dat = 2, var = 2, sdate = 1) + c(1) ) }) -- GitLab