diff --git a/NAMESPACE b/NAMESPACE index 1896857d4d8f4702911a8bf299ae37276b0c0aa5..d28fa03bb411264e7a485251b5cee7849370196b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -34,6 +34,7 @@ export(CST_RainFARM) export(CST_RegimesAssign) export(CST_SaveExp) export(CST_SplitDim) +export(CST_Subset) export(CST_WeatherRegimes) export(Calibration) export(CategoricalEnsCombination) diff --git a/R/CST_Subset.R b/R/CST_Subset.R new file mode 100644 index 0000000000000000000000000000000000000000..9f247e27a9f670d77a36dffc5b1566286ad995e6 --- /dev/null +++ b/R/CST_Subset.R @@ -0,0 +1,151 @@ +#'Subset an object of class s2dv_cube +#' +#'This function allows to subset (i.e. slice, take a chunk of) the data inside +#'an object of class \code{s2dv_cube} and modify the dimensions, coordinates and +#'attributes accordingly, removing any variables, time steps and spatial +#'coordinates that are dropped when subsetting. It ensures that the information +#'inside the s2dv_cube remains coherent with the data it contains.\cr\cr +#'As in the function \code{Subset} from the ClimProjDiags package, the +#'dimensions to subset along can be specified via the parameter \code{along} +#'either with integer indices or by their name.\cr\cr +#'There are additional ways to adjust which dimensions are dropped in the +#'resulting object: either to drop all, to drop none, to drop only the ones that +#'have been sliced or to drop only the ones that have not been sliced.\cr\cr +#'The \code{load_parameters} and \code{when} attributes of the original cube +#'are preserved. The \code{source_files} attribute is subset along the +#'\code{var_dim} and \code{dat_dim} dimensions. +#' +#'@author Agudetse Roures Victoria, \email{victoria.agudetse@bsc.es} +#' +#'@param x An object of class \code{s2dv_cube} to be sliced. +#'@param along A vector with references to the dimensions to take the subset +#' from: either integers or dimension names. +#'@param indices A list of indices to take from each dimension specified in +#' 'along'. If a single dimension is specified in 'along', it can be directly +#' provided as an integer or a vector. +#'@param drop Whether to drop all the dimensions of length 1 in the resulting +#' array, none, only those that are specified in 'along', or only those that +#' are not specified in 'along'. The possible values are: 'all' or TRUE, 'none' +#' or FALSE, 'selected', and 'non-selected'. The default value is FALSE. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The default value is NULL. +#'@param var_dim A chatacter string indicating the name of the variable +#' dimension. The default value is NULL. +#' +#'@return An object of class \code{s2dv_cube} with similar data, coordinates and +#' attributes as the \code{x} input, but with trimmed or dropped dimensions. +#' +#'@examples +#'#Example with sample data: +#'# Check original dimensions and coordinates +#'lonlat_temp$exp$dims +#'names(lonlat_temp$exp$coords) +#'# Subset the s2dv_cube +#'exp_subset <- CST_Subset(lonlat_temp$exp, +#' along = c("lat", "lon"), +#' indices = list(1:10, 1:10), +#' drop = 'non-selected') +#'# Check new dimensions and coordinates +#'exp_subset$dims +#'names(exp_subset$coords) +#' +#'@seealso \link[ClimProjDiags]{Subset} +#' +#'@importFrom ClimProjDiags Subset +#'@export +CST_Subset <- function(x, along, indices, drop = FALSE, var_dim = NULL, + dat_dim = NULL) { + # Check that x is s2dv_cube + if (!inherits(x, 's2dv_cube')) { + stop("Parameter 'x' must be of the class 's2dv_cube'.") + } + # Check var_dim + if (!is.null(var_dim)) { + if ((!is.character(var_dim)) || (length(var_dim) > 1)) { + stop("Parameter 'var_dim' must be a character string.") + } + } + # Check dat_dim + if (!is.null(dat_dim)) { + if ((!is.character(dat_dim)) || (length(dat_dim) > 1)) { + stop("Parameter 'dat_dim' must be a character string.") + } + } + + # Subset data + x$data <- ClimProjDiags::Subset(x$data, along = along, + indices = indices, + drop = drop) + # Adjust dimensions + x$dims <- dim(x$data) + # Adjust coordinates + for (dimension in 1:length(along)) { + dim_name <- along[dimension] + index <- indices[[dimension]] + # Only rename coordinates that have not been dropped + if (dim_name %in% names(x$dims)) { + # Subset coordinate by indices + x$coords[[dim_name]] <- .subset_with_attrs(x$coords[[dim_name]], index) + } + } + # Remove dropped coordinates + for (coordinate in names(x$coords)) { + if (!(coordinate %in% names(x$dims))) { + x$coords[[coordinate]] <- NULL + } + } + # Adjust attributes + # Variable + for (dimension in 1:length(along)) { + dim_name <- along[dimension] + index <- indices[[dimension]] + if ((!is.null(var_dim)) && (dim_name == var_dim)) { + x$attrs$Variable$varName <- x$coords[[dim_name]][1] + } + if ((!is.null(dat_dim)) && (dim_name == dat_dim)) { + x$attrs$Datasets <- x$attrs$Datasets[x$coords[[dim_name]][1]] + } + if ((!is.null(x$attrs$source_files)) && + (dim_name %in% names(dim(x$attrs$source_files)))) { + x$attrs$source_files <- ClimProjDiags::Subset(x$attrs$source_files, + along = dim_name, + indices = index, + drop = drop) + } + if ((dim_name %in% names(x$dims)) && + (dim_name %in% names(x$attrs$Variable$metadata))) { + # Subset coords by indices + x$attrs$Variable$metadata[[dim_name]] <- + .subset_with_attrs(x$attrs$Variable$metadata[[dim_name]], index) + } + } + # Remove metadata from variables that were dropped + vars_to_keep <- na.omit(match(c(names(x$dims), (x$attrs$Variable$varName)), + names(x$attrs$Variable$metadata))) + x$attrs$Variable$metadata <- x$attrs$Variable$metadata[vars_to_keep] + # Subset Dates + time_along <- intersect(along, names(dim(x$attrs$Dates))) + if (!(length(time_along) == 0)) { + time_indices <- indices[match(time_along, along)] + original_dates <- x$attrs$Dates + x$attrs$Dates <- ClimProjDiags::Subset(x$attrs$Dates, + along = time_along, + indices = time_indices, + drop = drop) + } + return(x) +} + +.subset_with_attrs <- function(x, ...) { + l <- x[...] + x.dims <- names(dim(x)) + attr.names <- names(attributes(x)) + attr.names <- attr.names[attr.names != 'names'] + attr.names <- attr.names[attr.names != 'dim'] + attributes(l)[attr.names] <- attributes(x)[attr.names] + if (is.null(dim(l))) { + dim(l) <- length(l) + } + names(dim(l)) <- x.dims + return(l) +} diff --git a/man/CST_Subset.Rd b/man/CST_Subset.Rd new file mode 100644 index 0000000000000000000000000000000000000000..a9f480aa0cc48cd5eace57633d778fbd43a73769 --- /dev/null +++ b/man/CST_Subset.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_Subset.R +\name{CST_Subset} +\alias{CST_Subset} +\title{Subset an object of class s2dv_cube} +\usage{ +CST_Subset(x, along, indices, drop = FALSE, var_dim = NULL, dat_dim = NULL) +} +\arguments{ +\item{x}{An object of class \code{s2dv_cube} to be sliced.} + +\item{along}{A vector with references to the dimensions to take the subset +from: either integers or dimension names.} + +\item{indices}{A list of indices to take from each dimension specified in +'along'. If a single dimension is specified in 'along', it can be directly +provided as an integer or a vector.} + +\item{drop}{Whether to drop all the dimensions of length 1 in the resulting +array, none, only those that are specified in 'along', or only those that +are not specified in 'along'. The possible values are: 'all' or TRUE, 'none' +or FALSE, 'selected', and 'non-selected'. The default value is FALSE.} + +\item{var_dim}{A chatacter string indicating the name of the variable +dimension. The default value is NULL.} + +\item{dat_dim}{A character string indicating the name of dataset dimension. +The default value is NULL.} +} +\value{ +An object of class \code{s2dv_cube} with similar data, coordinates and + attributes as the \code{x} input, but with trimmed or dropped dimensions. +} +\description{ +This function allows to subset (i.e. slice, take a chunk of) the data inside +an object of class \code{s2dv_cube} and modify the dimensions, coordinates and +attributes accordingly, removing any variables, time steps and spatial +coordinates that are dropped when subsetting. It ensures that the information +inside the s2dv_cube remains coherent with the data it contains.\cr\cr +As in the function \code{Subset} from the ClimProjDiags package, the +dimensions to subset along can be specified via the parameter \code{along} +either with integer indices or by their name.\cr\cr +There are additional ways to adjust which dimensions are dropped in the +resulting object: either to drop all, to drop none, to drop only the ones that +have been sliced or to drop only the ones that have not been sliced.\cr\cr +The \code{load_parameters} and \code{when} attributes of the original cube +are preserved. The \code{source_files} attribute is subset along the +\code{var_dim} and \code{dat_dim} dimensions. +} +\examples{ +#Example with sample data: +# Check original dimensions and coordinates +lonlat_temp$exp$dims +names(lonlat_temp$exp$coords) +# Subset the s2dv_cube +exp_subset <- CST_Subset(lonlat_temp$exp, + along = c("lat", "lon"), + indices = list(1:10, 1:10), + drop = 'non-selected') +# Check new dimensions and coordinates +exp_subset$dims +names(exp_subset$coords) + +} +\seealso{ +\link[ClimProjDiags]{Subset} +} +\author{ +Agudetse Roures Victoria, \email{victoria.agudetse@bsc.es} +} diff --git a/tests/testthat/test-CST_Subset.R b/tests/testthat/test-CST_Subset.R new file mode 100644 index 0000000000000000000000000000000000000000..27e3cbef5ff3f21336e6aef91ec68f9e3d633136 --- /dev/null +++ b/tests/testthat/test-CST_Subset.R @@ -0,0 +1,220 @@ +context("CSTools::CST_Subset tests") + +############################################## + +test_that("1. Input checks: CST_Subset", { + # Check that x is s2dv_cube + expect_error( + CST_Subset(array(10)), + "Parameter 'x' must be of the class 's2dv_cube'." + ) + # Check var_dim + expect_error( + CST_Subset(lonlat_prec, var_dim = 1), + "Parameter 'var_dim' must be a character string." + ) + expect_error( + CST_Subset(lonlat_prec, var_dim = c('tas', 'psl')), + "Parameter 'var_dim' must be a character string." + ) + # Check dat_dim + expect_error( + CST_Subset(lonlat_prec, dat_dim = 1), + "Parameter 'dat_dim' must be a character string." + ) + expect_error( + CST_Subset(lonlat_prec, dat_dim = c('dat1', 'dat2')), + "Parameter 'dat_dim' must be a character string." + ) +}) + +############################################## + +test_that("2. Output checks: CST_Subset", { + res1 <- CST_Subset(lonlat_prec, along = c('lat', 'lon', 'sdate', 'member'), + indices = list(1, 1:2, 1, 1:2), + drop = 'all') + # Check dimensions + expect_equal( + dim(res1$data), + res1$dims + ) + expect_equal( + dim(res1$data), + c(member = 2, ftime = 31, lon = 2) + ) + # Check coordinates + expect_equal( + names(res1$coords), + c("member", "ftime", "lon") + ) + # Check attrs + expect_equal( + names(res1$attrs), + names(lonlat_prec$attrs) + ) + expect_equal( + names(res1$attrs$Variable$metadata), + c("lon", "prlr") + ) + expect_equal( + names(res1$attrs$Datasets), + c("exp1") + ) + # Check 'dat_dim' + res2 <- CST_Subset(lonlat_prec, along = c('lat'), indices = list(1), + drop = 'all', dat_dim = 'dataset') + res3 <- CST_Subset(lonlat_prec, along = c('lat'), indices = list(1), + drop = 'selected', dat_dim = 'dataset') + res4 <- CST_Subset(lonlat_prec, along = c('dataset'), indices = list(1), + drop = 'all', dat_dim = 'dataset') + res5 <- CST_Subset(lonlat_prec, along = c('dataset'), indices = list(1), + drop = 'selected', dat_dim = 'dataset') + expect_equal( + names(res2$attrs$Datasets), + names(res3$attrs$Datasets), + "exp1" + ) + expect_equal( + length(res4$attrs$Datasets), + length(res5$attrs$Datasets), + 0 + ) + # Check 'Dates' + res6 <- CST_Subset(lonlat_prec, along = c('sdate', 'ftime'), + indices = list(1, 1:10), drop = 'selected') + res7 <- CST_Subset(lonlat_prec, along = c('sdate', 'ftime'), + indices = list(1, 1:10), drop = 'none') + # Dates dimensions + expect_equal( + dim(res6$attrs$Dates), + res6$dims[which(names(dim(res6$data)) %in% c('sdate', 'ftime'))] + ) + expect_equal( + dim(res7$attrs$Dates), + c(ftime = 10, sdate = 1) + ) + # sdates coordinates + expect_equal( + names(res6$coords), + c("dataset", "member", "ftime", "lat", "lon") + ) + expect_equal( + as.vector(res7$coords$sdate), + c("20101101") + ) +}) + +############################################## + +repos1 <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + +suppressWarnings( + data <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos1)), + var = c('tas', 'sfcWind'), + sdate = c('20170101'), + ensemble = indices(1), + time = indices(1), + lat = indices(1:10), + lon = indices(1:10), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = 'dat', + latitude = 'dat'), + metadata_dims = c('dat', 'var'), + retrieve = T) +) +suppressWarnings( + exp_start <- as.s2dv_cube(data) +) + +############################################## + +test_that("3. Output checks with Start", { + res8 <- CST_Subset(exp_start, along = c('lon', 'sdate', 'ensemble', 'var', 'dat'), + indices = list(1:2, 1, 1, 1, 1), + drop = 'none') + res9 <- CST_Subset(exp_start, along = c('lon', 'sdate', 'ensemble', 'var', 'dat'), + indices = list(1:2, 1, 1, 1, 1), + drop = FALSE, var_dim = 'var', dat_dim = 'dat') + res10 <- CST_Subset(exp_start, along = c('lon', 'sdate', 'ensemble', 'var', 'dat'), + indices = list(1:2, 1, 1, 1, 1), + drop = 'selected', var_dim = 'var', dat_dim = 'dat') + # Check dimensions + expect_equal( + dim(res8$data), + c(dat = 1, var = 1, sdate = 1, ensemble = 1, time = 1, lat = 10, lon = 2) + ) + expect_equal( + dim(res8$data), + dim(res9$data) + ) + expect_equal( + dim(res10$data), + c(time = 1, lat = 10, lon = 2) + ) + # Check coordinates + expect_equal( + names(res8$coords), + names(res8$dims) + ) + expect_equal( + names(res9$coords), + names(res9$dims) + ) + # varName + expect_equal( + res8$attrs$Variable$varName, + c("tas", "sfcWind") + ) + expect_equal( + res9$attrs$Variable$varName, + c("tas") + ) + expect_equal( + res10$attrs$Variable$varName, + NULL + ) + # metadata + expect_equal( + names(res8$attrs$Variable$metadata), + c("time", "lat", "lon", "tas", "sfcWind") + ) + expect_equal( + names(res9$attrs$Variable$metadata), + c("time", "lat", "lon", "tas") + ) + expect_equal( + names(res10$attrs$Variable$metadata), + c("time", "lat", "lon") + ) + # Datasets + expect_equal( + res8$attrs$Datasets, + c("system4_m1", "system5_m1") + ) + expect_equal( + res9$attrs$Datasets, + c("system4_m1") + ) + expect_equal( + length(res10$attrs$Datasets), + 0 + ) + # Check source_files + expect_equal( + dim(res8$attrs$source_files), + c(dat = 1, var = 1, sdate = 1) + ) + expect_equal( + dim(res9$attrs$source_files), + c(dat = 1, var = 1, sdate = 1) + ) + expect_equal( + dim(res10$attrs$source_files), + c(1) + ) +})