diff --git a/.Rbuildignore b/.Rbuildignore index 31cdda424dd02966ebb9f5f9ee8c1caa31ee008e..6cd6d8c85b536ff118f5edcc0caf6668aa64fda5 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,6 +6,7 @@ ./.nc$ .*^(?!data)\.RData$ .*\.gitlab-ci.yml$ +.lintr ^tests$ #^inst/doc$ ^inst/doc/usecase/UseCase2_PrecipitationDownscaling_RainFARM_RF100\.R$ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e245a6cccf20d743bddeb28f8fd424e6218a39c6..cbc39ada0ea6744da506a87409949d4e824aedbf 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -9,3 +9,10 @@ build: - R CMD build --resave-data . - R CMD check --as-cran --no-manual --run-donttest CSTools_*.tar.gz - R -e 'covr::package_coverage()' + +lint-check: + stage: build + script: + - module load R/4.1.2-foss-2015a-bare + - echo "Run lintr on the package..." + - Rscript -e 'lintr::lint_package(path = ".")' diff --git a/.lintr b/.lintr new file mode 100644 index 0000000000000000000000000000000000000000..31497af1361e5de67c6498933e2f944c754dda16 --- /dev/null +++ b/.lintr @@ -0,0 +1,39 @@ +linters: linters_with_tags( # lintr_3.1.1 + tags = c("package_development", "readability", "best_practices"), + line_length_linter = line_length_linter(100L), + T_and_F_symbol_linter = NULL, + quotes_linter = NULL, + commented_code_linter = NULL, + implicit_integer_linter = NULL, + vector_logic_linter = NULL, + extraction_operator_linter = NULL, + function_left_parentheses_linter = NULL, + semicolon_linter = NULL, + indentation_linter = NULL, + unnecessary_nested_if_linter = NULL, + if_not_else_linter = NULL, + object_length_linter = NULL, + infix_spaces_linter(exclude_operators = "~") + ) +exclusions: list( + "inst", + "R/AnalogsPred_train.R", + "R/BEI_PDFBest.R", + "R/BEI_Weights.R", + "R/CST_AdamontAnalog.R", + "R/CST_AdamontQQCorr.R", + "R/Analogs.R", + "R/CST_AnalogsPredictors.R", + "R/CST_BEI_Weighting.R", + "R/CST_CategoricalEnsCombination.R", + "R/CST_DynBiasCorrection.R", + "R/CST_EnsClustering.R", + "R/PlotCombinedMap.R", + "R/PlotForecastPDF.R", + "R/PlotMostLikelyQuantileMap.R", + "R/PlotPDFsOLE.R", + "R/PlotTriangles4Categories.R", + "R/PlotWeeklyClim.R", + "tests/testthat/", + "tests/testthat.R" + ) diff --git a/DESCRIPTION b/DESCRIPTION index 3af5dcb16eedc1a288f09e7fb6cc1866ade12c6e..b352859dd73355af00868dae2a717f358482dbbe 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: CSTools Title: Assessing Skill of Climate Forecasts on Seasonal-to-Decadal Timescales -Version: 5.1.1 +Version: 5.2.0 Authors@R: c( person("Nuria", "Perez-Zanon", , "nuria.perez@bsc.es", role = "aut", comment = c(ORCID = "0000-0001-8568-3071")), person("Louis-Philippe", "Caron", , "louis-philippe.caron@bsc.es", role = "aut", comment = c(ORCID = "0000-0001-5221-0147")), @@ -79,7 +79,8 @@ Imports: utils, verification, lubridate, - scales + scales, + easyNCDF Suggests: zeallot, testthat, @@ -90,5 +91,5 @@ VignetteBuilder: knitr License: GPL-3 Encoding: UTF-8 LazyData: true -RoxygenNote: 7.2.0 +RoxygenNote: 7.2.3 Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 012f76cfb679e9a7bf310fc913581d7b07bd4caa..2a976636158adf2a36afb18e279bb434312e5211 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,6 +19,7 @@ export(CST_BEI_Weighting) export(CST_BiasCorrection) export(CST_Calibration) export(CST_CategoricalEnsCombination) +export(CST_ChangeDimNames) export(CST_DynBiasCorrection) export(CST_EnsClustering) export(CST_InsertDim) @@ -69,6 +70,7 @@ export(s2dv_cube) export(training_analogs) import(RColorBrewer) import(abind) +import(easyNCDF) import(ggplot2) import(lubridate) import(multiApply) diff --git a/NEWS.md b/NEWS.md index 250410084c486ff70f9d6cf00d768a965686fe0b..cb82665167a043a1ba556c689e8bd321a1912d85 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,14 @@ +# CSTools 5.2.0 (Release date: 25-01-2024) + +### Development +- New function CST_ChangeDimNames +- CST_SplitDim: added dimension names and split also Dates +- CST_SaveExp: save time bounds and global attributes; improved code + +### Other +- Updated README +- Added citation file + # CSTools 5.1.1 (Release date: 19-10-2023) ### Fixes diff --git a/R/CST_ChangeDimNames.R b/R/CST_ChangeDimNames.R new file mode 100644 index 0000000000000000000000000000000000000000..433039b66f17d382f874c7a515fe4fe71daa5705 --- /dev/null +++ b/R/CST_ChangeDimNames.R @@ -0,0 +1,89 @@ +#'Change the name of one or more dimensions for an object of class s2dv_cube +#' +#'Change the names of the dimensions specified in 'original_names' to the names +#'in 'new_names'. The coordinate names and the dimensions of any attributes +#'are also modified accordingly. +#' +#'@author Agudetse Roures Victoria, \email{victoria.agudetse@bsc.es} +#' +#'@param data An object of class \code{s2dv_cube} whose dimension names +#' should be changed. +#'@param original_names A single character string or a vector indicating the +#' dimensions to be renamed. +#'@param new_names A single character string or a vector indicating the new +#' dimension names, in the same order as the dimensions in 'original_names'. +#' +#'@return An object of class \code{s2dv_cube} with similar data, coordinates and +#'attributes as the \code{data} input, but with modified dimension names. +#' +#'@examples +#'# Example with sample data: +#'# Check original dimensions and coordinates +#'lonlat_temp$exp$dims +#'names(lonlat_temp$exp$coords) +#'dim(lonlat_temp$exp$attrs$Dates) +#'# Change 'dataset' to 'dat' and 'ftime' to 'time' +#'exp <- CST_ChangeDimNames(lonlat_temp$exp, +#' original_names = c("dataset", "ftime"), +#' new_names = c("dat", "time")) +#'# Check new dimensions and coordinates +#'exp$dims +#'names(exp$coords) +#'dim(exp$attrs$Dates) +#' +#'@export +CST_ChangeDimNames <- function(data, original_names, new_names) { + if (!inherits(data, "s2dv_cube")) { + stop("Parameter 'data' must be an object of class 's2dv_cube'.") + } + if (!is.character(original_names)) { + stop("Parameter 'original_names' must be a character string or a ", + "vector of character strings.") + } + if (!is.character(new_names)) { + stop("Parameter 'new_names' must be a character string or a ", + "vector of character strings.") + } + + if (!(length(original_names) == length(new_names))) { + stop("The number of dimension names in 'new_names' must be the same ", + "as in 'original_names'.") + } + if (!all(original_names %in% names(data$dims))) { + stop("Some of the dimensions in 'original_names' could not be found in ", + "'data'.") + } + for (index in 1:length(original_names)) { + original_name <- original_names[index] + new_name <- new_names[index] + # Step 1: Change dims + names(data$dims)[which(names(data$dims) == original_name)] <- new_name + # Step 2: Change coords + names(data$coords)[which(names(data$coords) == original_name)] <- new_name + # Step 3: Change attrs + # 3.1 - Dates + if (original_name %in% names(dim(data$attrs$Dates))) { + names(dim(data$attrs$Dates))[which(names(dim(data$attrs$Dates)) + == original_name)] <- new_name + } + # 3.2 - Variable metadata + if (original_name %in% names(data$attrs$Variable$metadata)) { + names(data$attrs$Variable$metadata)[which(names(data$attrs$Variable$metadata) + == original_name)] <- new_name + } + # 3.3 - Source files + if (original_name %in% names(dim(data$attrs$source_files))) { + names(dim(data$attrs$source_files))[which(names(dim(data$attrs$source_files)) + == original_name)] <- new_name + } + } + # Change data dimnames after renaming all dimensions + dim(data$data) <- data$dims + if (!is.null(attributes(data$data)$dimensions)) { + attributes(data$data)$dimensions <- names(data$dims) + } + # Change $Dates 'dim' attribute + attr(attributes(data$attrs$Dates)$dim, "names") <- names(dim(data$attrs$Dates)) + return(data) +} + diff --git a/R/CST_MergeDims.R b/R/CST_MergeDims.R index 4b66629ef724460287e01e26a22b098ae705ab05..dabdc57f3ae0aafd543f45d9f2594acdd5738c88 100644 --- a/R/CST_MergeDims.R +++ b/R/CST_MergeDims.R @@ -15,8 +15,6 @@ #' \code{merge_dims} will be used. #'@param na.rm A logical indicating if the NA values should be removed or not. #' -#'@import abind -#'@importFrom ClimProjDiags Subset #'@examples #'data <- 1 : c(2 * 3 * 4 * 5 * 6 * 7) #'dim(data) <- c(time = 7, lat = 2, lon = 3, monthly = 4, member = 6, @@ -35,8 +33,32 @@ CST_MergeDims <- function(data, merge_dims = c('ftime', 'monthly'), if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of the class 's2dv_cube'.") } + if (is.null(rename_dim)) { + rename_dim <- merge_dims[1] + } + # data data$data <- MergeDims(data$data, merge_dims = merge_dims, rename_dim = rename_dim, na.rm = na.rm) + # dims + data$dims <- dim(data$data) + + # rename_dim + if (length(rename_dim) > 1) { + rename_dim <- as.character(rename_dim[1]) + } + # coords + data$coords[merge_dims] <- NULL + data$coords[[rename_dim]] <- 1:dim(data$data)[rename_dim] + attr(data$coords[[rename_dim]], 'indices') <- TRUE + + # attrs + if (all(merge_dims %in% names(dim(data$attrs$Dates)))) { + dim(data$attrs$Dates) <- dim(data$data)[rename_dim] + } else if (any(merge_dims %in% names(dim(data$attrs$Dates)))) { + warning("The dimensions of 'Dates' array will be different from ", + "the temporal dimensions in 'data'. Parameter 'merge_dims' ", + "only includes one temporal dimension of 'Dates'.") + } return(data) } #'Function to Split Dimension @@ -55,12 +77,12 @@ CST_MergeDims <- function(data, merge_dims = c('ftime', 'monthly'), #' \code{merge_dims} will be used. #'@param na.rm A logical indicating if the NA values should be removed or not. #' -#'@import abind -#'@importFrom ClimProjDiags Subset #'@examples #'data <- 1 : 20 #'dim(data) <- c(time = 10, lat = 2) #'new_data <- MergeDims(data, merge_dims = c('time', 'lat')) +#'@import abind +#'@importFrom ClimProjDiags Subset #'@export MergeDims <- function(data, merge_dims = c('time', 'monthly'), rename_dim = NULL, na.rm = FALSE) { diff --git a/R/CST_SaveExp.R b/R/CST_SaveExp.R index 7d5733f1f21fe3c756c85a918fd0f3790bfce7be..4e25a51d54978d5fa6ae62eff8f5da4497850ad3 100644 --- a/R/CST_SaveExp.R +++ b/R/CST_SaveExp.R @@ -4,67 +4,88 @@ #' #'@description This function allows to divide and save a object of class #''s2dv_cube' into a NetCDF file, allowing to reload the saved data using -#'\code{Start} function from StartR package. If the original 's2dv_cube' object -#'has been created from \code{CST_Load()}, then it can be reloaded with -#'\code{Load()}. +#'\code{CST_Start} or \code{CST_Load} functions. It also allows to save any +#''s2dv_cube' object that follows the NetCDF attributes conventions. #' #'@param data An object of class \code{s2dv_cube}. #'@param destination A character string containing the directory name in which #' to save the data. NetCDF file for each starting date are saved into the -#' folder tree: \cr -#' destination/Dataset/variable/. By default the function -#' creates and saves the data into the working directory. +#' folder tree: 'destination/Dataset/variable/'. By default the function +#' saves the data into the working directory. #'@param sdate_dim A character string indicating the name of the start date #' dimension. By default, it is set to 'sdate'. It can be NULL if there is no #' start date dimension. #'@param ftime_dim A character string indicating the name of the forecast time -#' dimension. By default, it is set to 'time'. It can be NULL if there is no -#' forecast time dimension. +#' dimension. If 'Dates' are used, it can't be NULL. If there is no forecast +#' time dimension, 'Dates' will be set to NULL and will not be used. By +#' default, it is set to 'time'. #'@param dat_dim A character string indicating the name of dataset dimension. -#' By default, it is set to 'dataset'. It can be NULL if there is no dataset -#' dimension. +#' It can be NULL if there is no dataset dimension. By default, it is set to +#' 'dataset'. #'@param var_dim A character string indicating the name of variable dimension. -#' By default, it is set to 'var'. It can be NULL if there is no variable -#' dimension. -#'@param memb_dim A character string indicating the name of the member dimension. -#' By default, it is set to 'member'. It can be NULL if there is no member -#' dimension. +#' It can be NULL if there is no variable dimension. By default, it is set to +#' 'var'. +#'@param memb_dim A character string indicating the name of the member +#' dimension. It can be NULL if there is no member dimension. By default, it is +#' set to 'member'. #'@param startdates A vector of dates that will be used for the filenames -#' when saving the data in multiple files. It must be a vector of the same -#' length as the start date dimension of data. It must be a vector of class -#' \code{Dates}, \code{'POSIXct'} or character with lenghts between 1 and 10. -#' If it is NULL, the coordinate corresponding the the start date dimension or -#' the first Date of each time step will be used as the name of the files. -#' It is NULL by default. -#'@param drop_dims A vector of character strings indicating the dimension names -#' of length 1 that need to be dropped in order that they don't appear in the -#' netCDF file. It is NULL by default (optional). +#' when saving the data in multiple files (single_file = FALSE). It must be a +#' vector of the same length as the start date dimension of data. It must be a +#' vector of class \code{Dates}, \code{'POSIXct'} or character with lenghts +#' between 1 and 10. If it is NULL, the coordinate corresponding the the start +#' date dimension or the first Date of each time step will be used as the name +#' of the files. It is NULL by default. #'@param single_file A logical value indicating if all object is saved in a #' single file (TRUE) or in multiple files (FALSE). When it is FALSE, -#' the array is separated for Datasets, variable and start date. It is FALSE -#' by default. -#'@param extra_string A character string to be include as part of the file name, -#' for instance, to identify member or realization. It would be added to the -#' file name between underscore characters. +#' the array is separated for datasets, variable and start date. When there are +#' no specified time dimensions, the data will be saved in a single file by +#' default. The output file name when 'single_file' is TRUE is a character +#' string containing: '__.nc'; when it is FALSE, +#' it is '_.nc'. It is FALSE by default. +#'@param drop_dims (optional) A vector of character strings indicating the +#' dimension names of length 1 that need to be dropped in order that they don't +#' appear in the netCDF file. Only is allowed to drop dimensions that are not +#' used in the computation. The dimensions used in the computation are the ones +#' specified in: sdate_dim, ftime_dim, dat_dim, var_dim and memb_dim. It is +#' NULL by default. +#'@param extra_string (Optional) A character string to be included as part of +#' the file name, for instance, to identify member or realization. When +#' single_file is TRUE, the 'extra_string' will substitute all the default +#' file name; when single_file is FALSE, the 'extra_string' will be added +#' in the file name as: '__.nc'. It is NULL by +#' default. +#'@param units_hours_since (Optional) A logical value only available for the +#' case: 'Dates' have forecast time and start date dimension, 'single_file' is +#' TRUE and 'time_bounds' are not used. When it is TRUE, it saves the forecast +#' time with units of 'hours since'; if it is FALSE, the time units will be a +#' number of time steps with its corresponding frequency (e.g. n days, n months +#' or n hours). It is FALSE by default. +#'@param global_attrs (Optional) A list with elements containing the global +#' attributes to be saved in the NetCDF. #' #'@return Multiple or single NetCDF files containing the data array.\cr -#'\item{\code{single_file = TRUE}}{ +#'\item{\code{single_file is TRUE}}{ #' All data is saved in a single file located in the specified destination -#' path with the following name: -#' ___.nc. Multiple -#' variables are saved separately in the same file. The forecast time units -#' is extracted from the frequency of the time steps (hours, days, months). -#' The first value of forecast time is 1. If no frequency is found, the units -#' will be 'hours since' each start date and the time steps are assumed to be -#' equally spaced. +#' path with the following name (by default): +#' '__.nc'. Multiple variables +#' are saved separately in the same file. The forecast time units +#' are calculated from each start date (if sdate_dim is not NULL) or from +#' the time step. If 'units_hours_since' is TRUE, the forecast time units +#' will be 'hours since '. If 'units_hours_since' is FALSE, +#' the forecast time units are extracted from the frequency of the time steps +#' (hours, days, months); if no frequency is found, the units will be ’hours +#' since’. When the time units are 'hours since' the time ateps are assumed to +#' be equally spaced. #'} -#'\item{\code{single_file = FALSE}}{ +#'\item{\code{single_file is FALSE}}{ #' The data array is subset and stored into multiple files. Each file #' contains the data subset for each start date, variable and dataset. Files -#' with different variables and Datasets are stored in separated directories -#' within the following directory tree: destination/Dataset/variable/. -#' The name of each file will be: -#' __.nc. +#' with different variables and datasets are stored in separated directories +#' within the following directory tree: 'destination/Dataset/variable/'. +#' The name of each file will be by default: '_.nc'. +#' The forecast time units are calculated from each start date (if sdate_dim +#' is not NULL) or from the time step. The forecast time units will be 'hours +#' since '. #'} #' #'@seealso \code{\link[startR]{Start}}, \code{\link{as.s2dv_cube}} and @@ -73,21 +94,17 @@ #'@examples #'\dontrun{ #'data <- lonlat_temp_st$exp -#'destination <- "./" -#'CST_SaveExp(data = data, destination = destination, ftime_dim = 'ftime', -#' var_dim = 'var', dat_dim = 'dataset') +#'CST_SaveExp(data = data, ftime_dim = 'ftime', var_dim = 'var', +#' dat_dim = 'dataset', sdate_dim = 'sdate') #'} #' -#'@import ncdf4 -#'@importFrom s2dv Reorder -#'@importFrom ClimProjDiags Subset -#'@import multiApply #'@export -CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', - ftime_dim = 'time', dat_dim = 'dataset', - var_dim = 'var', memb_dim = 'member', - startdates = NULL, drop_dims = NULL, - single_file = FALSE, extra_string = NULL) { +CST_SaveExp <- function(data, destination = "./", startdates = NULL, + sdate_dim = 'sdate', ftime_dim = 'time', + memb_dim = 'member', dat_dim = 'dataset', + var_dim = 'var', drop_dims = NULL, + single_file = FALSE, extra_string = NULL, + global_attrs = NULL, units_hours_since = FALSE) { # Check 's2dv_cube' if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of the class 's2dv_cube'.") @@ -100,22 +117,11 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', if (!inherits(data$attrs, 'list')) { stop("Level 'attrs' must be a list with at least 'Dates' element.") } - if (!all(c('coords') %in% names(data))) { - warning("Element 'coords' not found. No coordinates will be used.") - } # metadata - if (is.null(data$attrs$Variable$metadata)) { - warning("No metadata found in element Variable from attrs.") - } else { + if (!is.null(data$attrs$Variable$metadata)) { if (!inherits(data$attrs$Variable$metadata, 'list')) { stop("Element metadata from Variable element in attrs must be a list.") } - if (!any(names(data$attrs$Variable$metadata) %in% names(data$coords))) { - warning("Metadata is not found for any coordinate.") - } else if (!any(names(data$attrs$Variable$metadata) %in% - data$attrs$Variable$varName)) { - warning("Metadata is not found for any variable.") - } } # Dates if (is.null(data$attrs$Dates)) { @@ -129,50 +135,31 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', if (!is.character(sdate_dim)) { stop("Parameter 'sdate_dim' must be a character string.") } - if (length(sdate_dim) > 1) { - warning("Parameter 'sdate_dim' has length greater than 1 and ", - "only the first element will be used.") - sdate_dim <- sdate_dim[1] - } - } else if (length(dim(data$attrs$Dates)) == 1) { - sdate_dim <- 'sdate' - dim(data$data) <- c(sdate = 1, dim(data$data)) - data$dims <- dim(data$data) - dim(data$attrs$Dates) <- c(sdate = 1, dim(data$attrs$Dates)) - data$coords[[sdate_dim]] <- data$attrs$Dates[1] } # startdates if (is.null(startdates)) { - startdates <- data$coords[[sdate_dim]] - } else { - if (!is.character(startdates)) { - warning(paste0("Parameter 'startdates' is not a character string, ", - "it will not be used.")) + if (is.character(data$coords[[sdate_dim]])) { startdates <- data$coords[[sdate_dim]] } - if (!is.null(sdate_dim)) { - if (dim(data$data)[sdate_dim] != length(startdates)) { - warning(paste0("Parameter 'startdates' doesn't have the same length ", - "as dimension '", sdate_dim,"', it will not be used.")) - startdates <- data$coords[[sdate_dim]] - } - } } SaveExp(data = data$data, destination = destination, - Dates = data$attrs$Dates, coords = data$coords, + Dates = data$attrs$Dates, + time_bounds = data$attrs$time_bounds, + startdates = startdates, varname = data$attrs$Variable$varName, metadata = data$attrs$Variable$metadata, Datasets = data$attrs$Datasets, - startdates = startdates, - dat_dim = dat_dim, sdate_dim = sdate_dim, - ftime_dim = ftime_dim, var_dim = var_dim, + sdate_dim = sdate_dim, ftime_dim = ftime_dim, memb_dim = memb_dim, + dat_dim = dat_dim, var_dim = var_dim, drop_dims = drop_dims, + single_file = single_file, extra_string = extra_string, - single_file = single_file) + global_attrs = global_attrs, + units_hours_since = units_hours_since) } #'Save a multidimensional array with metadata to data in NetCDF format #'@description This function allows to save a data array with metadata into a @@ -185,13 +172,26 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', #'@param data A multi-dimensional array with named dimensions. #'@param destination A character string indicating the path where to store the #' NetCDF files. -#'@param Dates A named array of dates with the corresponding sdate and forecast -#' time dimension. #'@param coords A named list with elements of the coordinates corresponding to #' the dimensions of the data parameter. The names and length of each element #' must correspond to the names of the dimensions. If any coordinate is not #' provided, it is set as an index vector with the values from 1 to the length #' of the corresponding dimension. +#'@param Dates A named array of dates with the corresponding sdate and forecast +#' time dimension. If there is no sdate_dim, you can set it to NULL. +#' It must have ftime_dim dimension. +#'@param time_bounds (Optional) A list of two arrays of dates containing +#' the lower (first array) and the upper (second array) time bounds +#' corresponding to Dates. Each array must have the same dimensions as Dates. +#' If 'Dates' parameter is NULL, 'time_bounds' are not used. It is NULL by +#' default. +#'@param startdates A vector of dates that will be used for the filenames +#' when saving the data in multiple files (single_file = FALSE). It must be a +#' vector of the same length as the start date dimension of data. It must be a +#' vector of class \code{Dates}, \code{'POSIXct'} or character with lenghts +#' between 1 and 10. If it is NULL, the coordinate corresponding the the start +#' date dimension or the first Date of each time step will be used as the name +#' of the files. It is NULL by default. #'@param varname A character string indicating the name of the variable to be #' saved. #'@param metadata A named list where each element is a variable containing the @@ -199,12 +199,6 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', #' lists for each variable. #'@param Datasets A vector of character string indicating the names of the #' datasets. -#'@param startdates A vector of dates that will be used for the filenames -#' when saving the data in multiple files. It must be a vector of the same -#' length as the start date dimension of data. It must be a vector of class -#' \code{Dates}, \code{'POSIXct'} or character with lenghts between 1 and 10. -#' If it is NULL, the first Date of each time step will be used as the name of -#' the files. It is NULL by default. #'@param sdate_dim A character string indicating the name of the start date #' dimension. By default, it is set to 'sdate'. It can be NULL if there is no #' start date dimension. @@ -217,38 +211,60 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', #'@param var_dim A character string indicating the name of variable dimension. #' By default, it is set to 'var'. It can be NULL if there is no variable #' dimension. -#'@param memb_dim A character string indicating the name of the member dimension. -#' By default, it is set to 'member'. It can be NULL if there is no member -#' dimension. -#'@param drop_dims A vector of character strings indicating the dimension names -#' of length 1 that need to be dropped in order that they don't appear in the -#' netCDF file. It is NULL by default (optional). +#'@param memb_dim A character string indicating the name of the member +#' dimension. By default, it is set to 'member'. It can be NULL if there is no +#' member dimension. +#'@param drop_dims (optional) A vector of character strings indicating the +#' dimension names of length 1 that need to be dropped in order that they don't +#' appear in the netCDF file. Only is allowed to drop dimensions that are not +#' used in the computation. The dimensions used in the computation are the ones +#' specified in: sdate_dim, ftime_dim, dat_dim, var_dim and memb_dim. It is +#' NULL by default. #'@param single_file A logical value indicating if all object is saved in a -#' unique file (TRUE) or in separated directories (FALSE). When it is FALSE, -#' the array is separated for Datasets, variable and start date. It is FALSE -#' by default (optional). -#'@param extra_string A character string to be include as part of the file name, -#' for instance, to identify member or realization. It would be added to the -#' file name between underscore characters (optional). +#' single file (TRUE) or in multiple files (FALSE). When it is FALSE, +#' the array is separated for datasets, variable and start date. When there are +#' no specified time dimensions, the data will be saved in a single file by +#' default. The output file name when 'single_file' is TRUE is a character +#' string containing: '__.nc'; when it is FALSE, +#' it is '_.nc'. It is FALSE by default. +#'@param extra_string (Optional) A character string to be included as part of +#' the file name, for instance, to identify member or realization. When +#' single_file is TRUE, the 'extra_string' will substitute all the default +#' file name; when single_file is FALSE, the 'extra_string' will be added +#' in the file name as: '__.nc'. It is NULL by +#' default. +#'@param global_attrs (Optional) A list with elements containing the global +#' attributes to be saved in the NetCDF. +#'@param units_hours_since (Optional) A logical value only available for the +#' case: Dates have forecast time and start date dimension, single_file is +#' TRUE and 'time_bounds' is NULL. When it is TRUE, it saves the forecast time +#' with units of 'hours since'; if it is FALSE, the time units will be a number +#' of time steps with its corresponding frequency (e.g. n days, n months or n +#' hours). It is FALSE by default. #' #'@return Multiple or single NetCDF files containing the data array.\cr -#'\item{\code{single_file = TRUE}}{ +#'\item{\code{single_file is TRUE}}{ #' All data is saved in a single file located in the specified destination -#' path with the following name: -#' ___.nc. Multiple -#' variables are saved separately in the same file. The forecast time units -#' is extracted from the frequency of the time steps (hours, days, months). -#' The first value of forecast time is 1. If no frequency is found, the units -#' will be 'hours since' each start date and the time steps are assumed to be -#' equally spaced. +#' path with the following name (by default): +#' '__.nc'. Multiple variables +#' are saved separately in the same file. The forecast time units +#' are calculated from each start date (if sdate_dim is not NULL) or from +#' the time step. If 'units_hours_since' is TRUE, the forecast time units +#' will be 'hours since '. If 'units_hours_since' is FALSE, +#' the forecast time units are extracted from the frequency of the time steps +#' (hours, days, months); if no frequency is found, the units will be ’hours +#' since’. When the time units are 'hours since' the time ateps are assumed to +#' be equally spaced. #'} -#'\item{\code{single_file = FALSE}}{ +#'\item{\code{single_file is FALSE}}{ #' The data array is subset and stored into multiple files. Each file #' contains the data subset for each start date, variable and dataset. Files -#' with different variables and Datasets are stored in separated directories -#' within the following directory tree: destination/Dataset/variable/. -#' The name of each file will be: -#' __.nc. +#' with different variables and datasets are stored in separated directories +#' within the following directory tree: 'destination/Dataset/variable/'. +#' The name of each file will be by default: '_.nc'. +#' The forecast time units are calculated from each start date (if sdate_dim +#' is not NULL) or from the time step. The forecast time units will be 'hours +#' since '. #'} #' #'@examples @@ -260,24 +276,24 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', #'Datasets <- lonlat_temp_st$exp$attrs$Datasets #'varname <- 'tas' #'Dates <- lonlat_temp_st$exp$attrs$Dates -#'destination = './' #'metadata <- lonlat_temp_st$exp$attrs$Variable$metadata -#'SaveExp(data = data, destination = destination, coords = coords, -#' Datasets = Datasets, varname = varname, Dates = Dates, -#' metadata = metadata, single_file = TRUE, ftime_dim = 'ftime', -#' var_dim = 'var', dat_dim = 'dataset') +#'SaveExp(data = data, coords = coords, Datasets = Datasets, varname = varname, +#' Dates = Dates, metadata = metadata, single_file = TRUE, +#' ftime_dim = 'ftime', var_dim = 'var', dat_dim = 'dataset') #'} #' -#'@import ncdf4 +#'@import easyNCDF #'@importFrom s2dv Reorder #'@import multiApply #'@importFrom ClimProjDiags Subset #'@export -SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, +SaveExp <- function(data, destination = "./", coords = NULL, + Dates = NULL, time_bounds = NULL, startdates = NULL, varname = NULL, metadata = NULL, Datasets = NULL, - startdates = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', - ftime_dim = 'time', var_dim = 'var', memb_dim = 'member', - drop_dims = NULL, single_file = FALSE, extra_string = NULL) { + sdate_dim = 'sdate', ftime_dim = 'time', + memb_dim = 'member', dat_dim = 'dataset', var_dim = 'var', + drop_dims = NULL, single_file = FALSE, extra_string = NULL, + global_attrs = NULL, units_hours_since = FALSE) { ## Initial checks # data if (is.null(data)) { @@ -287,21 +303,15 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, if (is.null(dimnames)) { stop("Parameter 'data' must be an array with named dimensions.") } + if (!is.null(attributes(data)$dimensions)) { + attributes(data)$dimensions <- NULL + } # destination if (!is.character(destination) | length(destination) > 1) { stop("Parameter 'destination' must be a character string of one element ", "indicating the name of the file (including the folder if needed) ", "where the data will be saved.") } - # Dates - if (!is.null(Dates)) { - if (!inherits(Dates, "POSIXct") & !inherits(Dates, "Date")) { - stop("Parameter 'Dates' must be of 'POSIXct' or 'Dates' class.") - } - if (is.null(dim(Dates))) { - stop("Parameter 'Dates' must have dimension names.") - } - } # drop_dims if (!is.null(drop_dims)) { if (!is.character(drop_dims) | any(!drop_dims %in% names(dim(data)))) { @@ -310,6 +320,10 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, } else if (!all(dim(data)[drop_dims] %in% 1)) { warning("Parameter 'drop_dims' can only contain dimension names ", "that are of length 1. It will not be used.") + } else if (any(drop_dims %in% c(ftime_dim, sdate_dim, dat_dim, memb_dim, var_dim))) { + warning("Parameter 'drop_dims' contains dimensions used in the computation. ", + "It will not be used.") + drop_dims <- NULL } else { data <- Subset(x = data, along = drop_dims, indices = lapply(1:length(drop_dims), function(x) 1), @@ -319,28 +333,17 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, } # coords if (!is.null(coords)) { - if (!all(names(coords) %in% dimnames)) { - coords <- coords[-which(!names(coords) %in% dimnames)] - } - for (i_coord in dimnames) { - if (i_coord %in% names(coords)) { - if (length(coords[[i_coord]]) != dim(data)[i_coord]) { - warning(paste0("Coordinate '", i_coord, "' has different lenght as ", - "its dimension and it will not be used.")) - coords[[i_coord]] <- 1:dim(data)[i_coord] - } - } else { - warning(paste0("Coordinate '", i_coord, "' is not provided ", - "and it will be set as index in element coords.")) - coords[[i_coord]] <- 1:dim(data)[i_coord] - } + if (!inherits(coords, 'list')) { + stop("Parameter 'coords' must be a named list of coordinates.") + } + if (is.null(names(coords))) { + stop("Parameter 'coords' must have names corresponding to coordinates.") } } else { coords <- sapply(dimnames, function(x) 1:dim(data)[x]) } # varname if (is.null(varname)) { - warning("Parameter 'varname' is NULL. It will be assigned to 'X'.") varname <- 'X' } else if (length(varname) > 1) { multiple_vars <- TRUE @@ -351,11 +354,6 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, stop("Parameter 'varname' must be a character string with the ", "variable names.") } - # metadata - if (is.null(metadata)) { - warning("Parameter 'metadata' is not provided so the metadata saved ", - "will be incomplete.") - } # single_file if (!inherits(single_file, 'logical')) { warning("Parameter 'single_file' must be a logical value. It will be ", @@ -368,6 +366,12 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, stop("Parameter 'extra_string' must be a character string.") } } + # global_attrs + if (!is.null(global_attrs)) { + if (!inherits(global_attrs, 'list')) { + stop("Parameter 'global_attrs' must be a list.") + } + } ## Dimensions checks # Spatial coordinates @@ -378,16 +382,6 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, } else { lon_dim <- dimnames[which(dimnames %in% .KnownLonNames())] lat_dim <- dimnames[which(dimnames %in% .KnownLatNames())] - if (length(lon_dim) > 1) { - warning("Found more than one longitudinal dimension. Only the first one ", - "will be used.") - lon_dim <- lon_dim[1] - } - if (length(lat_dim) > 1) { - warning("Found more than one latitudinal dimension. Only the first one ", - "will be used.") - lat_dim <- lat_dim[1] - } } # ftime_dim if (!is.null(ftime_dim)) { @@ -395,12 +389,8 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, stop("Parameter 'ftime_dim' must be a character string.") } if (!all(ftime_dim %in% dimnames)) { - stop("Parameter 'ftime_dim' is not found in 'data' dimension.") - } - if (length(ftime_dim) > 1) { - warning("Parameter 'ftime_dim' has length greater than 1 and ", - "only the first element will be used.") - ftime_dim <- ftime_dim[1] + stop("Parameter 'ftime_dim' is not found in 'data' dimension. Set it ", + "as NULL if there is no forecast time dimension.") } } # sdate_dim @@ -408,11 +398,6 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, if (!is.character(sdate_dim)) { stop("Parameter 'sdate_dim' must be a character string.") } - if (length(sdate_dim) > 1) { - warning("Parameter 'sdate_dim' has length greater than 1 and ", - "only the first element will be used.") - sdate_dim <- sdate_dim[1] - } if (!all(sdate_dim %in% dimnames)) { stop("Parameter 'sdate_dim' is not found in 'data' dimension.") } @@ -436,11 +421,6 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, stop("Parameter 'dat_dim' is not found in 'data' dimension. Set it ", "as NULL if there is no Datasets dimension.") } - if (length(dat_dim) > 1) { - warning("Parameter 'dat_dim' has length greater than 1 and ", - "only the first element will be used.") - dat_dim <- dat_dim[1] - } n_datasets <- dim(data)[dat_dim] } else { n_datasets <- 1 @@ -454,11 +434,6 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, stop("Parameter 'var_dim' is not found in 'data' dimension. Set it ", "as NULL if there is no variable dimension.") } - if (length(var_dim) > 1) { - warning("Parameter 'var_dim' has length greater than 1 and ", - "only the first element will be used.") - var_dim <- var_dim[1] - } n_vars <- dim(data)[var_dim] } else { n_vars <- 1 @@ -473,29 +448,120 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, single_file <- TRUE } } - # Dates dimension check + # Dates (1): initial checks if (!is.null(Dates)) { - if (all(names(dim(Dates)) == c(ftime_dim, sdate_dim)) | - all(names(dim(Dates)) == c(sdate_dim, ftime_dim))) { - if (is.null(startdates)) { - startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') - } else if ((!inherits(startdates, "POSIXct") & !inherits(startdates, "Date")) && - (!is.character(startdates) | (any(nchar(startdates) > 10) | any(nchar(startdates) < 1)))) { - warning("Parameter 'startdates' should be a character string containing ", - "the start dates in the format 'yyyy-mm-dd', 'yyyymmdd', 'yyyymm', ", - "'POSIXct' or 'Dates' class. Files will be named with Dates instead.") - startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') - if (!is.null(format(startdates, "%Y%m%d"))) { - startdates <- format(startdates, "%Y%m%d") - } + if (!any(inherits(Dates, "POSIXct"), inherits(Dates, "Date"))) { + stop("Parameter 'Dates' must be of 'POSIXct' or 'Dates' class.") + } + if (is.null(dim(Dates))) { + stop("Parameter 'Dates' must have dimension names.") + } + if (all(is.null(ftime_dim), is.null(sdate_dim))) { + warning("Parameters 'ftime_dim' and 'sdate_dim' can't both be NULL ", + "if 'Dates' are used. 'Dates' will not be used.") + Dates <- NULL + } + # sdate_dim in Dates + if (!is.null(sdate_dim)) { + if (!sdate_dim %in% names(dim(Dates))) { + warning("Parameter 'sdate_dim' is not found in 'Dates' dimension. ", + "Dates will not be used.") + Dates <- NULL + } + } + # ftime_dim in Dates + if (!is.null(ftime_dim)) { + if (!ftime_dim %in% names(dim(Dates))) { + warning("Parameter 'ftime_dim' is not found in 'Dates' dimension. ", + "Dates will not be used.") + Dates <- NULL } - } else if (all(dim(Dates)[!names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] == 1)) { - dim(Dates) <- dim(Dates)[names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] + } + } + # time_bounds + if (!is.null(time_bounds)) { + if (!inherits(time_bounds, 'list')) { + stop("Parameter 'time_bounds' must be a list with two dates arrays.") + } + time_bounds_dims <- lapply(time_bounds, function(x) dim(x)) + if (!identical(time_bounds_dims[[1]], time_bounds_dims[[2]])) { + stop("Parameter 'time_bounds' must have 2 arrays with same dimensions.") + } + if (is.null(Dates)) { + time_bounds <- NULL } else { - stop("Parameter 'Dates' must have start date dimension and ", - "forecast time dimension.") + name_tb <- sort(names(time_bounds_dims[[1]])) + name_dt <- sort(names(dim(Dates))) + if (!identical(dim(Dates)[name_dt], time_bounds_dims[[1]][name_tb])) { + stop(paste0("Parameter 'Dates' and 'time_bounds' must have same length ", + "of all dimensions.")) + } } } + # Dates (2): Check dimensions + if (!is.null(Dates)) { + if (any(dim(Dates)[!names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] != 1)) { + stop("Parameter 'Dates' can have only 'sdate_dim' and 'ftime_dim' ", + "dimensions of length greater than 1.") + } + # drop dimensions of length 1 different from sdate_dim and ftime_dim + dim(Dates) <- dim(Dates)[names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] + + # add ftime if needed + if (is.null(ftime_dim)) { + warning("A 'time' dimension of length 1 will be added to 'Dates'.") + dim(Dates) <- c(time = 1, dim(Dates)) + dim(data) <- c(time = 1, dim(data)) + dimnames <- names(dim(data)) + ftime_dim <- 'time' + if (!is.null(time_bounds)) { + time_bounds <- lapply(time_bounds, function(x) { + dim(x) <- c(time = 1, dim(x)) + return(x) + }) + } + units_hours_since <- TRUE + } + # add sdate if needed + if (is.null(sdate_dim)) { + if (!single_file) { + dim(Dates) <- c(dim(Dates), sdate = 1) + dim(data) <- c(dim(data), sdate = 1) + dimnames <- names(dim(data)) + sdate_dim <- 'sdate' + if (!is.null(time_bounds)) { + time_bounds <- lapply(time_bounds, function(x) { + dim(x) <- c(dim(x), sdate = 1) + return(x) + }) + } + if (!is.null(startdates)) { + if (length(startdates) != 1) { + warning("Parameter 'startdates' must be of length 1 if 'sdate_dim' is NULL.", + "They won't be used.") + startdates <- NULL + } + } + } + units_hours_since <- TRUE + } + } + # startdates + if (!is.null(Dates)) { + # check startdates + if (is.null(startdates)) { + startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + } else if (any(nchar(startdates) > 10, nchar(startdates) < 1)) { + warning("Parameter 'startdates' should be a character string containing ", + "the start dates in the format 'yyyy-mm-dd', 'yyyymmdd', 'yyyymm', ", + "'POSIXct' or 'Dates' class. Files will be named with Dates instead.") + startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + } + } else if (!single_file) { + warning("Dates must be provided if 'data' must be saved in separated files. ", + "All data will be saved in a single file.") + single_file <- TRUE + } # startdates if (is.null(startdates)) { if (is.null(sdate_dim)) { @@ -504,20 +570,21 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, startdates <- rep('XXX', dim(data)[sdate_dim]) } } else { - if (is.null(sdate_dim)) { - if (length(startdates) != 1) { - warning("Parameter 'startdates' has length more than 1. Only first ", - "value will be used.") - startdates <- startdates[[1]] + if (any(inherits(startdates, "POSIXct"), inherits(startdates, "Date"))) { + startdates <- format(startdates, "%Y%m%d") + } + if (!is.null(sdate_dim)) { + if (dim(data)[sdate_dim] != length(startdates)) { + warning(paste0("Parameter 'startdates' doesn't have the same length ", + "as dimension '", sdate_dim,"', it will not be used.")) + startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + startdates <- format(startdates, "%Y%m%d") } } } + # Datasets if (is.null(Datasets)) { - if (!single_file) { - warning("Parameter 'Datasets' is NULL. Files will be saved with a ", - "directory name of 'XXX'.") - } Datasets <- rep('XXX', n_datasets ) } if (inherits(Datasets, 'list')) { @@ -533,127 +600,74 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, Datasets <- Datasets[1:n_datasets] } + ## NetCDF dimensions definition + excluded_dims <- var_dim + if (!is.null(Dates)) { + excluded_dims <- c(excluded_dims, sdate_dim, ftime_dim) + } + if (!single_file) { + excluded_dims <- c(excluded_dims, dat_dim) + } + ## Unknown dimensions check - alldims <- c(dat_dim, var_dim, sdate_dim, lon_dim, lat_dim, memb_dim, ftime_dim) + alldims <- c(dat_dim, var_dim, sdate_dim, lon_dim, lat_dim, ftime_dim, memb_dim) if (!all(dimnames %in% alldims)) { unknown_dims <- dimnames[which(!dimnames %in% alldims)] memb_dim <- c(memb_dim, unknown_dims) - alldims <- c(dat_dim, var_dim, sdate_dim, lon_dim, lat_dim, memb_dim, ftime_dim) - } - # Reorder - if (any(dimnames != alldims)) { - data <- Reorder(data, alldims) - dimnames <- names(dim(data)) - if (!is.null(attr(data, 'dimensions'))) { - attr(data, 'dimensions') <- dimnames - } } - ## NetCDF dimensions definition - defined_dims <- NULL - extra_info_dim <- NULL - if (is.null(Dates)) { - filedims <- dimnames[which(!dimnames %in% c(dat_dim, var_dim))] - } else { - filedims <- dimnames[which(!dimnames %in% c(dat_dim, var_dim, sdate_dim, ftime_dim))] - } + filedims <- c(dat_dim, var_dim, sdate_dim, lon_dim, lat_dim, ftime_dim, memb_dim) + filedims <- filedims[which(!filedims %in% excluded_dims)] + + # Delete unneded coords + coords[c(names(coords)[!names(coords) %in% filedims])] <- NULL + out_coords <- NULL for (i_coord in filedims) { - dim_info <- list() # vals if (i_coord %in% names(coords)) { - if (is.numeric(coords[[i_coord]])) { - dim_info[['vals']] <- as.vector(coords[[i_coord]]) + if (length(coords[[i_coord]]) != dim(data)[i_coord]) { + warning(paste0("Coordinate '", i_coord, "' has different lenght as ", + "its dimension and it will not be used.")) + out_coords[[i_coord]] <- 1:dim(data)[i_coord] + } else if (is.numeric(coords[[i_coord]])) { + out_coords[[i_coord]] <- as.vector(coords[[i_coord]]) } else { - dim_info[['vals']] <- 1:dim(data)[i_coord] + out_coords[[i_coord]] <- 1:dim(data)[i_coord] } } else { - dim_info[['vals']] <- 1:dim(data)[i_coord] - } - # name - dim_info[['name']] <- i_coord - # len - dim_info[['len']] <- as.numeric(dim(data)[i_coord]) - # unlim - dim_info[['unlim']] <- FALSE - # create_dimvar - dim_info[['create_dimvar']] <- TRUE + out_coords[[i_coord]] <- 1:dim(data)[i_coord] + } + dim(out_coords[[i_coord]]) <- dim(data)[i_coord] + ## metadata if (i_coord %in% names(metadata)) { if ('variables' %in% names(attributes(metadata[[i_coord]]))) { # from Start: 'lon' or 'lat' - attrs <- attributes(metadata[[i_coord]])[['variables']][[i_coord]] - i_coord_info <- attrs[!sapply(attrs, inherits, 'list')] + attrs <- attributes(metadata[[i_coord]])[['variables']] + attrs[[i_coord]]$dim <- NULL + attr(out_coords[[i_coord]], 'variables') <- attrs } else if (inherits(metadata[[i_coord]], 'list')) { # from Start and Load: main var - i_coord_info <- metadata[[i_coord]] + attr(out_coords[[i_coord]], 'variables') <- list(metadata[[i_coord]]) + names(attributes(out_coords[[i_coord]])$variables) <- i_coord } else if (!is.null(attributes(metadata[[i_coord]]))) { # from Load - i_coord_info <- attributes(metadata[[i_coord]]) - } else { - stop("Metadata is not correct.") - } - # len - if ('size' %in% names(i_coord_info)) { - if (i_coord_info[['size']] != dim(data)[i_coord]) { - dim_info[['original_len']] <- i_coord_info[['size']] - i_coord_info[['size']] <- NULL - } + attrs <- attributes(metadata[[i_coord]]) + # We remove because some attributes can't be saved + attrs <- NULL + attr(out_coords[[i_coord]], 'variables') <- list(attrs) + names(attributes(out_coords[[i_coord]])$variables) <- i_coord } - # units - if (!('units' %in% names(i_coord_info))) { - dim_info[['units']] <- '' - } else { - dim_info[['units']] <- i_coord_info[['units']] - i_coord_info[['units']] <- NULL - } - # calendar - if (!('calendar' %in% names(i_coord_info))) { - dim_info[['calendar']] <- NA - } else { - dim_info[['calendar']] <- i_coord_info[['calendar']] - i_coord_info[['calendar']] <- NULL - } - # longname - if ('long_name' %in% names(i_coord_info)) { - dim_info[['longname']] <- i_coord_info[['long_name']] - i_coord_info[['long_name']] <- NULL - } else if ('longname' %in% names(i_coord_info)) { - dim_info[['longname']] <- i_coord_info[['longname']] - i_coord_info[['longname']] <- NULL - } else { - if (i_coord %in% .KnownLonNames()) { - dim_info[['longname']] <- 'longitude' - } else if (i_coord %in% .KnownLatNames()) { - dim_info[['longname']] <- 'latitude' - } - } - # extra information - if (!is.null(names(i_coord_info))) { - extra_info_dim[[i_coord]] <- i_coord_info - } - } else { - # units - dim_info[['units']] <- "adim" - # longname - dim_info[['longname']] <- i_coord - # calendar - dim_info[['calendar']] <- NA - } - new_dim <- list(ncdim_def(name = dim_info[['name']], units = dim_info[['units']], - vals = dim_info[['vals']], unlim = dim_info[['unlim']], - create_dimvar = dim_info[['create_dimvar']], - calendar = dim_info[['calendar']], - longname = dim_info[['longname']])) - names(new_dim) <- i_coord - defined_dims <- c(defined_dims, new_dim) + } } - defined_vars <- list() if (!single_file) { for (i in 1:n_datasets) { path <- file.path(destination, Datasets[i], varname) for (j in 1:n_vars) { - dir.create(path[j], recursive = TRUE) + if (!dir.exists(path[j])) { + dir.create(path[j], recursive = TRUE) + } startdates <- gsub("-", "", startdates) dim(startdates) <- c(length(startdates)) names(dim(startdates)) <- sdate_dim @@ -666,297 +680,240 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, } else { data_subset <- Subset(data, c(dat_dim, var_dim), list(i, j), drop = 'selected') } + target <- names(dim(data_subset))[which(!names(dim(data_subset)) %in% c(sdate_dim, ftime_dim))] + target_dims_data <- c(target, ftime_dim) if (is.null(Dates)) { input_data <- list(data_subset, startdates) - target_dims <- list(c(lon_dim, lat_dim, memb_dim, ftime_dim), NULL) + target_dims <- list(target_dims_data, NULL) + } else if (!is.null(time_bounds)) { + input_data <- list(data_subset, startdates, Dates, + time_bounds[[1]], time_bounds[[2]]) + target_dims = list(target_dims_data, NULL, + ftime_dim, ftime_dim, ftime_dim) } else { input_data <- list(data_subset, startdates, Dates) - target_dims = list(c(lon_dim, lat_dim, memb_dim, ftime_dim), NULL, ftime_dim) + target_dims = list(target_dims_data, NULL, ftime_dim) } Apply(data = input_data, target_dims = target_dims, - fun = .saveExp, + fun = .saveexp, destination = path[j], - defined_dims = defined_dims, + coords = out_coords, ftime_dim = ftime_dim, varname = varname[j], metadata_var = metadata[[varname[j]]], - extra_info_dim = extra_info_dim, - extra_string = extra_string) + extra_string = extra_string, + global_attrs = global_attrs) } } } else { - # Datasets definition - # From here - if (!is.null(dat_dim)) { - new_dim <- list(ncdim_def(name = dat_dim, units = "adim", - vals = 1 : dim(data)[dat_dim], - longname = 'Datasets', create_dimvar = TRUE)) - names(new_dim) <- dat_dim - defined_dims <- c(new_dim, defined_dims) - extra_info_dim[[dat_dim]] <- list(Datasets = paste(Datasets, collapse = ', ')) - } - first_sdate <- last_sdate <- NULL + # time_bnds + if (!is.null(time_bounds)) { + time_bnds <- c(time_bounds[[1]], time_bounds[[2]]) + } + # Dates + remove_metadata_dim <- TRUE if (!is.null(Dates)) { - # sdate definition - sdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') - differ <- as.numeric((sdates - sdates[1])/3600) - new_dim <- list(ncdim_def(name = sdate_dim, units = paste('hours since', sdates[1]), - vals = differ, - longname = sdate_dim, create_dimvar = TRUE)) - names(new_dim) <- sdate_dim - defined_dims <- c(defined_dims, new_dim) - first_sdate <- sdates[1] - last_sdate <- sdates[length(sdates)] - # ftime definition - Dates <- Reorder(Dates, c(ftime_dim, sdate_dim)) - differ_ftime <- apply(Dates, 2, function(x){as.numeric((x - x[1])/3600)}) - dim(differ_ftime) <- dim(Dates) - differ_ftime_subset <- Subset(differ_ftime, along = sdate_dim, 1, drop = 'selected') - if (all(apply(differ_ftime, 1, function(x){length(unique(x)) == 1}))) { - if (all(diff(differ_ftime_subset/24) == 1)) { + if (is.null(sdate_dim)) { + sdates <- Dates[1] + # ftime definition + leadtimes <- as.numeric(difftime(Dates, sdates, units = "hours")) + } else { + # sdate definition + sdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + differ <- as.numeric(difftime(sdates, sdates[1], units = "hours")) + dim(differ) <- dim(data)[sdate_dim] + differ <- list(differ) + names(differ) <- sdate_dim + out_coords <- c(differ, out_coords) + attrs <- list(units = paste('hours since', sdates[1]), + calendar = 'proleptic_gregorian', longname = sdate_dim) + attr(out_coords[[sdate_dim]], 'variables')[[sdate_dim]] <- attrs + # ftime definition + Dates <- Reorder(Dates, c(ftime_dim, sdate_dim)) + differ_ftime <- array(dim = dim(Dates)) + for (i in 1:length(sdates)) { + differ_ftime[, i] <- as.numeric(difftime(Dates[, i], Dates[1, i], + units = "hours")) + } + dim(differ_ftime) <- dim(Dates) + leadtimes <- Subset(differ_ftime, along = sdate_dim, 1, drop = 'selected') + if (!all(apply(differ_ftime, 1, function(x){length(unique(x)) == 1}))) { + warning("Time steps are not equal for all start dates. Only ", + "forecast time values for the first start date will be saved ", + "correctly.") + } + } + if (all(!units_hours_since, is.null(time_bounds))) { + if (all(diff(leadtimes/24) == 1)) { # daily values - dim_time <- list(ncdim_def(name = ftime_dim, units = 'days', - vals = round(differ_ftime_subset/24) + 1, - calendar = 'proleptic_gregorian', - longname = ftime_dim, unlim = TRUE)) - names(dim_time) <- ftime_dim - defined_dims <- c(defined_dims, dim_time) - } else if (all(diff(differ_ftime_subset/24) %in% c(28, 29, 30, 31))) { + units <- 'days' + leadtimes_vals <- round(leadtimes/24) + 1 + } else if (all(diff(leadtimes/24) %in% c(28, 29, 30, 31))) { # monthly values - dim_time <- list(ncdim_def(name = ftime_dim, units = 'months', - vals = round(differ_ftime_subset/730) + 1, - calendar = 'proleptic_gregorian', - longname = ftime_dim, unlim = TRUE)) - names(dim_time) <- ftime_dim - defined_dims <- c(defined_dims, dim_time) + units <- 'months' + leadtimes_vals <- round(leadtimes/(30.437*24)) + 1 } else { # other frequency - dim_time <- list(ncdim_def(name = ftime_dim, units = 'hours', - vals = differ_ftime_subset + 1, - calendar = 'proleptic_gregorian', - longname = ftime_dim, unlim = TRUE)) - names(dim_time) <- ftime_dim - defined_dims <- c(defined_dims, dim_time) + units <- 'hours' + leadtimes_vals <- leadtimes + 1 } } else { - warning("Time steps are not equal for all start dates. Only ", - "forecast time values for the first start date will be saved ", - "correctly.") - dim_time <- list(ncdim_def(name = ftime_dim, - units = paste('hours since', - paste(sdates, collapse = ', ')), - vals = differ_ftime_subset, - calendar = 'proleptic_gregorian', - longname = ftime_dim, unlim = TRUE)) - names(dim_time) <- ftime_dim - defined_dims <- c(defined_dims, dim_time) + units <- paste('hours since', paste(sdates, collapse = ', ')) + leadtimes_vals <- leadtimes } - } + # Add time_bnds + if (!is.null(time_bounds)) { + if (is.null(sdate_dim)) { + sdates <- Dates[1] + time_bnds <- c(time_bounds[[1]], time_bounds[[2]]) + leadtimes_bnds <- as.numeric(difftime(time_bnds, sdates, units = "hours")) + dim(leadtimes_bnds) <- c(dim(Dates), bnds = 2) + } else { + # assuming they have sdate and ftime + time_bnds <- lapply(time_bounds, function(x) { + x <- Reorder(x, c(ftime_dim, sdate_dim)) + return(x) + }) + time_bnds <- c(time_bounds[[1]], time_bounds[[2]]) + dim(time_bnds) <- c(dim(Dates), bnds = 2) + differ_bnds <- array(dim = c(dim(time_bnds))) + for (i in 1:length(sdates)) { + differ_bnds[, i, ] <- as.numeric(difftime(time_bnds[, i, ], Dates[1, i], + units = "hours")) + } + # NOTE (TODO): Add a warning when they are not equally spaced? + leadtimes_bnds <- Subset(differ_bnds, along = sdate_dim, 1, drop = 'selected') + } + # Add time_bnds + leadtimes_bnds <- Reorder(leadtimes_bnds, c('bnds', ftime_dim)) + leadtimes_bnds <- list(leadtimes_bnds) + names(leadtimes_bnds) <- 'time_bnds' + out_coords <- c(leadtimes_bnds, out_coords) + attrs <- list(units = paste('hours since', paste(sdates, collapse = ', ')), + calendar = 'proleptic_gregorian', + long_name = 'time bounds', unlim = FALSE) + attr(out_coords[['time_bnds']], 'variables')$time_bnds <- attrs + } + # Add ftime var + dim(leadtimes_vals) <- dim(data)[ftime_dim] + leadtimes_vals <- list(leadtimes_vals) + names(leadtimes_vals) <- ftime_dim + out_coords <- c(leadtimes_vals, out_coords) + attrs <- list(units = units, calendar = 'proleptic_gregorian', + longname = ftime_dim, + dim = list(list(name = ftime_dim, unlim = TRUE))) + if (!is.null(time_bounds)) { + attrs$bounds = 'time_bnds' + } + attr(out_coords[[ftime_dim]], 'variables')[[ftime_dim]] <- attrs + for (j in 1:n_vars) { + remove_metadata_dim <- FALSE + metadata[[varname[j]]]$dim <- list(list(name = ftime_dim, unlim = TRUE)) + } + # Reorder ftime_dim to last + if (length(dim(data)) != which(names(dim(data)) == ftime_dim)) { + order <- c(names(dim(data))[which(!names(dim(data)) %in% c(ftime_dim))], ftime_dim) + data <- Reorder(data, order) + } + } # var definition - defined_vars <- list() extra_info_var <- NULL for (j in 1:n_vars) { - var_info <- list() - i_var_info <- metadata[[varname[j]]][!sapply(metadata[[varname[j]]], inherits, 'list')] - ## Define metadata - # name - var_info[['name']] <- varname[j] - # units - if ('units' %in% names(i_var_info)) { - var_info[['units']] <- i_var_info[['units']] - i_var_info[['units']] <- NULL - } else { - var_info[['units']] <- '' - } - # dim - var_info[['dim']] <- defined_dims - # missval - if ('missval' %in% names(i_var_info)) { - var_info[['missval']] <- i_var_info[['missval']] - i_var_info[['missval']] <- NULL + varname_j <- varname[j] + metadata_j <- metadata[[varname_j]] + if (is.null(var_dim)) { + out_coords[[varname_j]] <- data } else { - var_info[['missval']] <- NULL + out_coords[[varname_j]] <- Subset(data, var_dim, j, drop = 'selected') } - # longname - if (any(c('longname', 'long_name') %in% names(i_var_info))) { - longname <- names(i_var_info)[which(names(i_var_info) %in% c('longname', 'long_name'))] - var_info[['longname']] <- i_var_info[[longname]] - i_var_info[[longname]] <- NULL - } else { - var_info[['longname']] <- varname[j] - } - # prec - if ('prec' %in% names(i_var_info)) { - var_info[['prec']] <- i_var_info[['prec']] - i_var_info[['prec']] <- NULL - } else { - prec <- typeof(data) - if (prec == 'character') { - var_info[['prec']] <- 'char' - } - if (any(prec %in% c('short', 'float', 'double', 'integer', 'char', 'byte'))) { - var_info[['prec']] <- prec - } else { - var_info[['prec']] <- 'double' - } + if (!is.null(metadata_j)) { + if (remove_metadata_dim) metadata_j$dim <- NULL + attr(out_coords[[varname_j]], 'variables') <- list(metadata_j) + names(attributes(out_coords[[varname_j]])$variables) <- varname_j } - # extra information - if (!is.null(names(i_var_info))) { - extra_info_var[[varname[j]]] <- i_var_info + # Add global attributes + if (!is.null(global_attrs)) { + attributes(out_coords[[varname_j]])$global_attrs <- global_attrs } - new_var <- list(ncvar_def(name = var_info[['name']], - units = var_info[['units']], - dim = var_info[['dim']], - missval = var_info[['missval']], - longname = var_info[['longname']], - prec = var_info[['prec']])) - - names(new_var) <- varname[j] - defined_vars <- c(defined_vars, new_var) } if (is.null(extra_string)) { + first_sdate <- startdates[1] + last_sdate <- startdates[length(startdates)] gsub("-", "", first_sdate) file_name <- paste0(paste(c(varname, gsub("-", "", first_sdate), gsub("-", "", last_sdate)), collapse = '_'), ".nc") } else { - file_name <- paste0(paste(c(varname, extra_string, - gsub("-", "", first_sdate), - gsub("-", "", last_sdate)), - collapse = '_'), ".nc") - } - full_filename <- file.path(destination, file_name) - file_nc <- nc_create(full_filename, defined_vars) - if (is.null(var_dim)) { - ncvar_put(file_nc, varname, vals = data) - } else { - for (j in 1:n_vars) { - ncvar_put(file_nc, defined_vars[[j]]$name, - vals = Subset(data, var_dim, j, drop = 'selected')) - } - } - # Additional dimension attributes - for (dim in names(defined_dims)) { - if (dim %in% names(extra_info_dim)) { - for (info_dim in names(extra_info_dim[[dim]])) { - add_info_dim <- paste0(extra_info_dim[[dim]][[info_dim]], collapse = ', ') - ncatt_put(file_nc, dim, info_dim, add_info_dim) - } - } - } - # Additional dimension attributes - for (var in names(defined_vars)) { - if (var %in% names(extra_info_var)) { - for (info_var in names(extra_info_var[[var]])) { - add_info_var <- paste0(extra_info_var[[var]][[info_var]], collapse = ', ') - ncatt_put(file_nc, var, info_var, add_info_var) - } + nc <- substr(extra_string, nchar(extra_string)-2, nchar(extra_string)) + if (nc == ".nc") { + file_name <- extra_string + } else { + file_name <- paste0(extra_string, ".nc") } } - nc_close(file_nc) + full_filename <- file.path(destination, file_name) + ArrayToNc(out_coords, full_filename) } } -.saveExp <- function(data, startdates = NULL, dates = NULL, destination = "./", - defined_dims, ftime_dim = 'time', varname = 'var', - metadata_var = NULL, extra_info_dim = NULL, - extra_string = NULL) { - # ftime_dim +.saveexp <- function(data, coords, destination = "./", + startdates = NULL, dates = NULL, + time_bnds1 = NULL, time_bnds2 = NULL, + ftime_dim = 'time', varname = 'var', + metadata_var = NULL, extra_string = NULL, + global_attrs = NULL) { + remove_metadata_dim <- TRUE if (!is.null(dates)) { - differ <- as.numeric((dates - dates[1])/3600) - dim_time <- list(ncdim_def(name = ftime_dim, units = paste('hours since', dates[1]), - vals = differ, calendar = 'proleptic_gregorian', - longname = ftime_dim, unlim = TRUE)) - names(dim_time) <- ftime_dim - defined_dims <- c(defined_dims, dim_time) + if (!any(is.null(time_bnds1), is.null(time_bnds2))) { + time_bnds <- c(time_bnds1, time_bnds2) + time_bnds <- as.numeric(difftime(time_bnds, dates[1], units = "hours")) + dim(time_bnds) <- c(dim(data)[ftime_dim], bnds = 2) + time_bnds <- Reorder(time_bnds, c('bnds', ftime_dim)) + time_bnds <- list(time_bnds) + names(time_bnds) <- 'time_bnds' + coords <- c(time_bnds, coords) + attrs <- list(units = paste('hours since', dates[1]), + calendar = 'proleptic_gregorian', + longname = 'time bounds') + attr(coords[['time_bnds']], 'variables')$time_bnds <- attrs + } + # Add ftime_dim + differ <- as.numeric(difftime(dates, dates[1], units = "hours")) + dim(differ) <- dim(data)[ftime_dim] + differ <- list(differ) + names(differ) <- ftime_dim + coords <- c(differ, coords) + attrs <- list(units = paste('hours since', dates[1]), + calendar = 'proleptic_gregorian', + longname = ftime_dim, + dim = list(list(name = ftime_dim, unlim = TRUE))) + if (!is.null(time_bnds1)) { + attrs$bounds = 'time_bnds' + } + attr(coords[[ftime_dim]], 'variables')[[ftime_dim]] <- attrs + metadata_var$dim <- list(list(name = ftime_dim, unlim = TRUE)) + remove_metadata_dim <- FALSE + } + # Add data + coords[[varname]] <- data + if (!is.null(metadata_var)) { + if (remove_metadata_dim) metadata_var$dim <- NULL + attr(coords[[varname]], 'variables') <- list(metadata_var) + names(attributes(coords[[varname]])$variables) <- varname + } + # Add global attributes + if (!is.null(global_attrs)) { + attributes(coords[[varname]])$global_attrs <- global_attrs } - ## Define var metadata - var_info <- NULL - extra_info_var <- NULL - i_var_info <- metadata_var[!sapply(metadata_var, inherits, 'list')] - - # name - var_info[['name']] <- varname - # units - if ('units' %in% names(i_var_info)) { - var_info[['units']] <- i_var_info[['units']] - i_var_info[['units']] <- NULL - } else { - var_info[['units']] <- '' - } - # dim - var_info[['dim']] <- defined_dims - # missval - if ('missval' %in% names(i_var_info)) { - var_info[['missval']] <- i_var_info[['missval']] - i_var_info[['missval']] <- NULL - } else { - var_info[['missval']] <- NULL - } - # longname - if (any(c('longname', 'long_name') %in% names(i_var_info))) { - longname <- names(i_var_info)[which(names(i_var_info) %in% c('longname', 'long_name'))] - var_info[['longname']] <- i_var_info[[longname]] - i_var_info[[longname]] <- NULL - } else { - var_info[['longname']] <- varname - } - # prec - if ('prec' %in% names(i_var_info)) { - var_info[['prec']] <- i_var_info[['prec']] - i_var_info[['prec']] <- NULL - } else { - prec <- typeof(data) - if (prec == 'character') { - var_info[['prec']] <- 'char' - } - if (any(prec %in% c('short', 'float', 'double', 'integer', 'char', 'byte'))) { - var_info[['prec']] <- prec - } else { - var_info[['prec']] <- 'double' - } - } - # extra information - if (!is.null(names(i_var_info))) { - extra_info_var <- i_var_info - } - - datanc <- ncvar_def(name = var_info[['name']], - units = var_info[['units']], - dim = var_info[['dim']], - missval = var_info[['missval']], - longname = var_info[['longname']], - prec = var_info[['prec']]) - if (is.null(extra_string)) { file_name <- paste0(varname, "_", startdates, ".nc") } else { file_name <- paste0(varname, "_", extra_string, "_", startdates, ".nc") } full_filename <- file.path(destination, file_name) - file_nc <- nc_create(full_filename, datanc) - ncvar_put(file_nc, datanc, data) - - # Additional attributes - for (dim in names(defined_dims)) { - if (dim %in% names(extra_info_dim)) { - for (info_dim in names(extra_info_dim[[dim]])) { - add_info_dim <- paste0(extra_info_dim[[dim]][[info_dim]], collapse = ', ') - ncatt_put(file_nc, dim, info_dim, add_info_dim) - } - } - } - # Additional dimension attributes - if (!is.null(extra_info_var)) { - for (info_var in names(extra_info_var)) { - add_info_var <- paste0(extra_info_var[[info_var]], collapse = ', ') - ncatt_put(file_nc, varname, info_var, add_info_var) - } - } - - nc_close(file_nc) -} + ArrayToNc(coords, full_filename) +} \ No newline at end of file diff --git a/R/CST_SplitDim.R b/R/CST_SplitDim.R index 71f51bab4bdc27e7567241e9758c6a0e08d07937..bac378be1202675fd1fb08c080023e717d0d4184 100644 --- a/R/CST_SplitDim.R +++ b/R/CST_SplitDim.R @@ -10,7 +10,7 @@ #' #'@param data A 's2dv_cube' object #'@param split_dim A character string indicating the name of the dimension to -#' split. +#' split. It is set as 'time' by default. #'@param indices A vector of numeric indices or dates. If left at NULL, the #' dates provided in the s2dv_cube object (element Dates) will be used. #'@param freq A character string indicating the frequency: by 'day', 'month' and @@ -21,6 +21,12 @@ #' dimension. #'@param insert_ftime An integer indicating the number of time steps to add at #' the begining of the time series. +#'@param ftime_dim A character string indicating the name of the forecast time +#' dimension. It is set as 'time' by default. +#'@param sdate_dim A character string indicating the name of the start date +#' dimension. It is set as 'sdate' by default. +#'@param return_indices A logical value that if it is TRUE, the indices +#' used in splitting the dimension will be returned. It is FALSE by default. #' #'@details Parameter 'insert_ftime' has been included for the case of using #'daily data, requiring split the temporal dimensions by months (or similar) and @@ -51,10 +57,12 @@ #'new_data <- CST_SplitDim(data, indices = time, freq = 'year') #'@import abind #'@importFrom ClimProjDiags Subset +#'@importFrom s2dv Reorder #'@export CST_SplitDim <- function(data, split_dim = 'time', indices = NULL, freq = 'monthly', new_dim_name = NULL, - insert_ftime = NULL) { + insert_ftime = NULL, ftime_dim = 'time', + sdate_dim = 'sdate', return_indices = FALSE) { # Check 's2dv_cube' if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of the class 's2dv_cube'.") @@ -62,51 +70,84 @@ CST_SplitDim <- function(data, split_dim = 'time', indices = NULL, if (!is.null(insert_ftime)) { if (!is.numeric(insert_ftime)) { stop("Parameter 'insert_ftime' should be an integer.") + } + if (length(insert_ftime) > 1) { + warning("Parameter 'insert_ftime' must be of length 1, and only the", + " first element will be used.") + insert_ftime <- insert_ftime[1] + } + # Check Dates + if (is.null(dim(data$attrs$Dates))) { + warning("Parameter 'Dates' must have dimensions, 'insert_ftime' won't ", + "be used.") + insert_ftime <- NULL + } + } + if (!is.null(insert_ftime)) { + # adding NAs at the begining of the data in ftime dim + ftimedim <- which(names(dim(data$data)) == ftime_dim) + dims <- dim(data$data) + dims[ftimedim] <- insert_ftime + empty_array <- array(NA, dims) + data$data <- abind(empty_array, data$data, along = ftimedim) + names(dim(data$data)) <- names(dims) + # Reorder dates + data$attrs$Dates <- Reorder(data$attrs$Dates, c(ftime_dim, sdate_dim)) + dates <- data$attrs$Dates + dates_subset <- Subset(dates, sdate_dim, 1) + # adding dates to Dates for the new NAs introduced + if ((dates_subset[2] - dates_subset[1]) == 1) { + timefreq <- 'days' } else { - if (length(insert_ftime) > 1) { - warning("Parameter 'insert_ftime' must be of length 1, and only the", - " first element will be used.") - insert_ftime <- insert_ftime[1] - } - # adding NAs at the begining of the data in ftime dim - ftimedim <- which(names(dim(data$data)) == 'ftime') - dims <- dim(data$data) - dims[ftimedim] <- insert_ftime - empty_array <- array(NA, dims) - data$data <- abind(empty_array, data$data, along = ftimedim) - names(dim(data$data)) <- names(dims) - # adding dates to Dates for the new NAs introduced - if ((data$attrs$Dates[2] - data$attrs$Dates[1]) == 1) { - timefreq <- 'days' - } else { - timefreq <- 'months' - warning("Time frequency of forecast time is considered monthly.") - } - start <- data$attrs$Dates - dim(start) <- c(ftime = length(start)/dims['sdate'], sdate = dims['sdate']) - # new <- array(NA, prod(dim(data$data)[c('ftime', 'sdate')])) - # Pending fix transform to UTC when concatenaiting - data$attrs$Dates <- do.call(c, lapply(1:dim(start)[2], function(x) { - seq(start[1,x] - as.difftime(insert_ftime, - units = timefreq), - start[dim(start)[1],x], by = timefreq, tz = "UTC")})) + timefreq <- 'months' + warning("Time frequency of forecast time is considered monthly.") } + + dim(dates) <- c(length(dates)/dims[sdate_dim], dims[sdate_dim]) + names(dim(dates)) <- c(ftime_dim, sdate_dim) + # new <- array(NA, prod(dim(data$data)[c('ftime', 'sdate')])) + # Pending fix transform to UTC when concatenaiting + data$attrs$Dates <- do.call(c, lapply(1:dim(dates)[2], function(x) { + seq(dates[1,x] - as.difftime(insert_ftime, + units = timefreq), + dates[dim(dates)[1],x], by = timefreq, tz = "UTC")})) } if (is.null(indices)) { - if (any(split_dim %in% c('ftime', 'time', 'sdate'))) { + if (any(split_dim %in% c(ftime_dim, sdate_dim))) { indices <- data$attrs$Dates - if (any(names(dim(data$data)) %in% 'sdate')) { + if (any(names(dim(data$data)) %in% sdate_dim)) { if (!any(names(dim(data$data)) %in% split_dim)) { stop("Parameter 'split_dims' must be one of the dimension ", "names in parameter 'data'.") } - indices <- indices[1 : dim(data$data)[which(names(dim(data$data)) == split_dim)]] + indices <- indices[1:dim(data$data)[which(names(dim(data$data)) == split_dim)]] } } } - data$data <- SplitDim(data$data, split_dim = split_dim, indices = indices, - freq = freq, new_dim_name = new_dim_name) - return(data) + # Call the function + res <- SplitDim(data = data$data, split_dim = split_dim, + indices = indices, freq = freq, + new_dim_name = new_dim_name, + dates = data$attrs$Dates, + return_indices = return_indices) + if (inherits(res, 'list')) { + data$data <- res$data + # Split dim on Dates + if (!is.null(res$dates)) { + data$attrs$Dates <- res$dates + } + } else { + data$data <- res + } + data$dims <- dim(data$data) + + # Coordinates + # TO DO: Subset splitted coordinate and add the new dimension coordinate. + if (return_indices) { + return(list(data = data, indices = res$indices)) + } else { + return(data) + } } #'Function to Split Dimension #' @@ -128,6 +169,11 @@ CST_SplitDim <- function(data, split_dim = 'time', indices = NULL, #' the length in which to subset the dimension. #'@param new_dim_name A character string indicating the name of the new #' dimension. +#'@param dates An optional parameter containing an array of dates of class +#' 'POSIXct' with the corresponding time dimensions of 'data'. It is NULL +#' by default. +#'@param return_indices A logical value that if it is TRUE, the indices +#' used in splitting the dimension will be returned. It is FALSE by default. #'@examples #'data <- 1 : 20 #'dim(data) <- c(time = 10, lat = 2) @@ -144,7 +190,8 @@ CST_SplitDim <- function(data, split_dim = 'time', indices = NULL, #'@importFrom ClimProjDiags Subset #'@export SplitDim <- function(data, split_dim = 'time', indices, freq = 'monthly', - new_dim_name = NULL) { + new_dim_name = NULL, dates = NULL, + return_indices = FALSE) { # check data if (is.null(data)) { stop("Parameter 'data' cannot be NULL.") @@ -166,7 +213,7 @@ SplitDim <- function(data, split_dim = 'time', indices, freq = 'monthly', "one and only the first element will be used.") } if (!any(names(dims) %in% split_dim)) { - stop("Parameter 'split_dims' must be one of the dimension ", + stop("Parameter 'split_dim' must be one of the dimension ", "names in parameter 'data'.") } pos_split <- which(names(dims) == split_dim) @@ -209,8 +256,8 @@ SplitDim <- function(data, split_dim = 'time', indices, freq = 'monthly', }) if ('try-error' %in% class(indices) | sum(is.na(indices)) == length(indices)) { - stop("Dates provided in parameter 'indices' must be of class", - " 'POSIXct' or convertable to 'POSIXct'.") + stop("Dates provided in parameter 'indices' must be of class ", + "'POSIXct' or convertable to 'POSIXct'.") } } } @@ -229,7 +276,7 @@ SplitDim <- function(data, split_dim = 'time', indices, freq = 'monthly', } else if (freq == 'year') { indices <- as.numeric(strftime(indices, format = "%Y")) repited <- unique(indices) - } else if (freq == 'monthly' ) { + } else if (freq == 'monthly') { indices <- as.numeric(strftime(indices, format = "%m%Y")) repited <- unique(indices) } else { @@ -254,15 +301,41 @@ SplitDim <- function(data, split_dim = 'time', indices, freq = 'monthly', data <- lapply(repited, function(x) {rebuild(x, data, along = split_dim, indices = indices, max_times)}) data <- abind(data, along = length(dims) + 1) - if (is.character(freq)) { - names(dim(data)) <- c(names(dims), freq) - } else { - names(dim(data)) <- c(names(dims), 'index') + + # Add new dim name + if (is.null(new_dim_name)) { + if (is.character(freq)) { + new_dim_name <- freq + } else { + new_dim_name <- 'index' + } } - if (!is.null(new_dim_name)) { - names(dim(data)) <- c(names(dims), new_dim_name) + names(dim(data)) <- c(names(dims), new_dim_name) + + # Split also Dates + dates_exist <- FALSE + if (!is.null(dates)) { + if (any(split_dim %in% names(dim(dates)))) { + datesdims <- dim(dates) + dates <- lapply(repited, function(x) {rebuild(x, dates, along = split_dim, + indices = indices, max_times)}) + dates <- abind(dates, along = length(datesdims) + 1) + dates <- as.POSIXct(dates, origin = '1970-01-01', tz = "UTC") + names(dim(dates)) <- c(names(datesdims), new_dim_name) + } + dates_exist <- TRUE + } + + # Return objects + if (all(dates_exist, return_indices)) { + return(list(data = data, dates = dates, indices = indices)) + } else if (all(dates_exist, !return_indices)) { + return(list(data = data, dates = dates)) + } else if (all(!dates_exist, return_indices)) { + return(list(data = data, indices = indices)) + } else { + return(data) } - return(data) } rebuild <- function(x, data, along, indices, max_times) { diff --git a/R/CST_Subset.R b/R/CST_Subset.R index 2e69c1f9fe2d7de0d2d87170f1806d20d0b9737a..098df3dff90af695fe097e6aca83cdb06b26d418 100644 --- a/R/CST_Subset.R +++ b/R/CST_Subset.R @@ -79,15 +79,15 @@ CST_Subset <- function(x, along, indices, drop = FALSE, var_dim = NULL, } # Subset data - x$data <- ClimProjDiags::Subset(x$data, along = along, - indices = indices, - drop = drop) + x$data <- Subset(x$data, along = along, + indices = indices, + drop = drop) # Adjust dimensions x$dims <- dim(x$data) # Adjust coordinates for (dimension in 1:length(along)) { dim_name <- along[dimension] - index <- indices[[dimension]] + index <- indices[dimension] # Only rename coordinates that have not been dropped if (dim_name %in% names(x$dims)) { # Subset coordinate by indices @@ -113,10 +113,10 @@ CST_Subset <- function(x, along, indices, drop = FALSE, var_dim = NULL, } if ((!is.null(x$attrs$source_files)) && (dim_name %in% names(dim(x$attrs$source_files)))) { - x$attrs$source_files <- ClimProjDiags::Subset(x$attrs$source_files, - along = dim_name, - indices = index, - drop = drop) + x$attrs$source_files <- Subset(x$attrs$source_files, + along = dim_name, + indices = index, + drop = drop) } } # Remove metadata from variables that were dropped @@ -128,10 +128,10 @@ CST_Subset <- function(x, along, indices, drop = FALSE, var_dim = NULL, if (!(length(time_along) == 0)) { time_indices <- indices[match(time_along, along)] original_dates <- x$attrs$Dates - x$attrs$Dates <- ClimProjDiags::Subset(x$attrs$Dates, - along = time_along, - indices = time_indices, - drop = drop) + x$attrs$Dates <- Subset(x$attrs$Dates, + along = time_along, + indices = time_indices, + drop = drop) } # Subset metadata for (variable in 1:length(names(x$attrs$Variable$metadata))) { @@ -150,12 +150,12 @@ CST_Subset <- function(x, along, indices, drop = FALSE, var_dim = NULL, # Function to subset with attributes .subset_with_attrs <- function(x, ...) { args_subset <- list(...) - if (is.null(dim(x)) | length(dim(x)) == 1) { + if (any(is.null(dim(x)), length(dim(x)) == 1)) { l <- x[args_subset[['indices']][[1]]] } else { - l <- ClimProjDiags::Subset(x, along = args_subset[['along']], - indices = args_subset[['indices']], - drop = args_subset[['drop']]) + l <- Subset(x, along = args_subset[['along']], + indices = args_subset[['indices']], + drop = args_subset[['drop']]) } attr.names <- names(attributes(x)) attr.names <- attr.names[attr.names != 'names'] diff --git a/README.md b/README.md index 2dfbb5b7e20d691c468c889909a7c50180ba9297..8f56d9564949aba418ea3fca93206ed9bcaa05d3 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ A part from this GitLab project, that allows you to monitor CSTools progress, to - The CRAN repository [https://CRAN.R-project.org/package=CSTools](https://CRAN.R-project.org/package=CSTools) which includes the user manual and vignettes. - Video tutorials [https://www.medscope-project.eu/products/tool-box/cstools-video-tutorials/](https://www.medscope-project.eu/products/tool-box/cstools-video-tutorials/). - Other resources are under-development such [training material](https://earth.bsc.es/gitlab/external/cstools/-/tree/MEDCOF2022/inst/doc/MEDCOF2022) and a [full reproducible use case for forecast calibration](https://earth.bsc.es/gitlab/external/cstools/-/tree/develop-CalibrationVignette/FOCUS_7_2). +- See and run package [**use cases**](inst/doc/usecase.md) Installation ------------ @@ -46,43 +47,53 @@ Overview The CSTools package functions can be distributed in the following methods: -- **Data retrieval and formatting:** CST_Load, CST_Anomaly, CST_MergeDims, CST_SplitDims, CST_Subset, as.s2dv_cube, s2dv_cube, CST_SaveExp. +- **Data retrieval and formatting:** CST_Start, CST_SaveExp, CST_MergeDims, CST_SplitDim, CST_Subset, CST_InsertDim, CST_ChangeDimNames, as.s2dv_cube and s2dv_cube. - **Classification:** CST_MultiEOF, CST_WeatherRegimes, CST_RegimsAssign, CST_CategoricalEnsCombination, CST_EnsClustering. - **Downscaling:** CST_Analogs, CST_RainFARM, CST_RFTemp, CST_AdamontAnalog, CST_AnalogsPredictors. -- **Correction:** CST_BEI_Weighting, CST_BiasCorrection, CST_Calibration, CST_QuantileMapping, CST_DynBiasCorrection. +- **Correction and transformation:** CST_BiasCorrection, CST_Calibration, CST_QuantileMapping, CST_Anomaly, CST_BEI_Weighting, CST_DynBiasCorrection. - **Assessment:** CST_MultiMetric, CST_MultivarRMSE - **Visualization:** PlotCombinedMap, PlotForecastPDF, PlotMostLikelyQuantileMap, PlotPDFsOLE, PlotTriangles4Categories, PlotWeeklyClim. -This package is designed to be compatible with other R packages such as [s2dv](https://CRAN.R-project.org/package=s2dv), [startR](https://CRAN.R-project.org/package=startR), [CSIndicators](https://CRAN.R-project.org/package=CSIndicators), [CSDownscale](https://earth.bsc.es/gitlab/es/csdownscale). Functions with the prefix **CST_** deal with a common object called `s2dv_cube` as inputs. Also, this object can be created from Load (s2dv) and from Start (startR) directly. Multiple functions from different packages can operate on this common data structure to easily define a complete post-processing workflow. - -The class `s2dv_cube` is mainly a list of named elements to keep data and metadata in a single object. Basic structure of the object: +An `s2dv_cube` is an object to store ordered multidimensional array with named dimensions, specific coordinates and stored metadata (in-memory representation of a NetCDF file). Its “methods” are the **CST** prefix functions. The basic structure of the class `s2dv_cube` is a list of lists. The first level elements are: `data`, `dims`, `coords` and `attrs`. To access any specific element it will be done using the `$` operator. +As an example, this is how an `s2dv_cube` looks like (see `lonlat_temp_st$exp`): ```r -$ data: [data array] -$ dims: [dimensions vector] -$ coords: [List of coordinates vectors] - $ sdate - $ time - $ lon - [...] -$ attrs: [List of the attributes] - $ Variable: - $ varName - $ metadata - $ Datasets - $ Dates - $ source_files - $ when - $ load_parameters +'s2dv_cube' +Data [ 279.99, 280.34, 279.45, 281.99, 280.92, ... ] +Dimensions ( dataset = 1, var = 1, member = 15, sdate = 6, ftime = 3, lat = 22, lon = 53 ) +Coordinates + * dataset : dat1 + * var : tas + member : 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + * sdate : 20001101, 20011101, 20021101, 20031101, 20041101, 20051101 + ftime : 1, 2, 3 + * lat : 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, ... + * lon : 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ... +Attributes + Dates : 2000-11-01 2001-11-01 2002-11-01 2003-11-01 2004-11-01 ... + varName : tas + metadata : + lat + units : degrees_north + long name : latitude + lon + units : degrees_east + long name : longitude + ftime + units : hours since 2000-11-01 00:00:00 + tas + units : K + long name : 2 metre temperature + Datasets : dat1 + when : 2023-10-02 10:11:06 + source_files : "/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20001101.nc" ... + load_parameters : + ( dat1 ) : dataset = dat1, var = tas, sdate = 20001101 ... ``` -More information about the `s2dv_cube` object class can be found here: [description of the s2dv_cube object structure document](https://docs.google.com/document/d/1ko37JFl_h6mOjDKM5QSQGikfLBKZq1naL11RkJIwtMM/edit?usp=sharing). - -The current `s2dv_cube` object (CSTools 5.0.0) differs from the original object used in the previous versions of the packages. If you have **questions** on this change you can follow some of the points below: +This package is designed to be compatible with other R packages such as [s2dv](https://CRAN.R-project.org/package=s2dv), [startR](https://CRAN.R-project.org/package=startR), [CSIndicators](https://CRAN.R-project.org/package=CSIndicators), [CSDownscale](https://earth.bsc.es/gitlab/es/csdownscale). -- [New s2dv_cube object discussion](https://earth.bsc.es/gitlab/external/cstools/-/issues/94) -- [How to deal with the compatibility break](https://earth.bsc.es/gitlab/external/cstools/-/issues/112) -- [Testing issue and specifications](https://earth.bsc.es/gitlab/external/cstools/-/issues/110) +> **Note:** The current `s2dv_cube` object (CSTools version > 5.0.0) differs from the original object used in the previous versions of the packages. If you have doubts on this change you can follow some of the issues: [New s2dv_cube object discussion](https://earth.bsc.es/gitlab/external/cstools/-/issues/94), [How to deal with the compatibility break](https://earth.bsc.es/gitlab/external/cstools/-/issues/112) and [Testing issue and specifications](https://earth.bsc.es/gitlab/external/cstools/-/issues/110). More information can be found in this document: [About the new ‘s2dv_cube’](https://docs.google.com/document/d/1ko37JFl_h6mOjDKM5QSQGikfLBKZq1naL11RkJIwtMM/edit?usp=sharing). Contribute ---------- diff --git a/inst/CITATION b/inst/CITATION new file mode 100644 index 0000000000000000000000000000000000000000..455af82343e6fd1ad559f33c6183be966e45ab9c --- /dev/null +++ b/inst/CITATION @@ -0,0 +1,24 @@ +citHeader("To cite package 'CSTools' in publications use:") + +yr <- sub('.*(2[[:digit:]]{3})-.*', '\\1', meta$Date, perl = TRUE) +if (length(yr) == 0) yr <- format(Sys.Date(), '%Y') + +bibentry( + bibtype = 'Manual', + title = paste0(meta$Package, ': ', meta$Title), + author = Filter(function(p) 'aut' %in% p$role, as.person(meta$Author)), + year = yr, + note = paste('R package version', meta$Version), + url = meta$URL +) + +bibentry( + bibtype = "Article", + author = c(person("Núria", "Pérez-Zanón", email = "nuria.perez@bsc.es"), person("", "et al.")), + title = "Climate Services Toolbox (CSTools) v4.0: from climate forecasts to climate forecast information", + doi = "10.5194/gmd-15-6115-2022", + url = "https://gmd.copernicus.org/articles/15/6115/2022/", + journal = "Geoscientific Model Development", + publisher = "European Geosciences Union", + year = "2022" +) diff --git a/inst/doc/tutorial/PATC2023/Figures/hcst_anom_cal.png b/inst/doc/tutorial/PATC2023/Figures/hcst_anom_cal.png new file mode 100644 index 0000000000000000000000000000000000000000..90e676fddac41bae2331599c93c99edd3bb4a344 Binary files /dev/null and b/inst/doc/tutorial/PATC2023/Figures/hcst_anom_cal.png differ diff --git a/inst/doc/tutorial/PATC2023/Figures/hcst_anom_raw.png b/inst/doc/tutorial/PATC2023/Figures/hcst_anom_raw.png new file mode 100644 index 0000000000000000000000000000000000000000..62ad9574a39aa07731ef06e07c5aaa9c9f8690e0 Binary files /dev/null and b/inst/doc/tutorial/PATC2023/Figures/hcst_anom_raw.png differ diff --git a/inst/doc/tutorial/PATC2023/Figures/skill_cal.png b/inst/doc/tutorial/PATC2023/Figures/skill_cal.png new file mode 100644 index 0000000000000000000000000000000000000000..e902ce9e43fbc3c2a1647f05d468c1924daecb93 Binary files /dev/null and b/inst/doc/tutorial/PATC2023/Figures/skill_cal.png differ diff --git a/inst/doc/tutorial/PATC2023/Figures/skill_raw.png b/inst/doc/tutorial/PATC2023/Figures/skill_raw.png new file mode 100644 index 0000000000000000000000000000000000000000..0d5f0f7ec0e4adfa9ebf652f2ecc787b8cc289af Binary files /dev/null and b/inst/doc/tutorial/PATC2023/Figures/skill_raw.png differ diff --git a/inst/doc/tutorial/PATC2023/handson_2-data-assesment.md b/inst/doc/tutorial/PATC2023/handson_2-data-assesment.md new file mode 100644 index 0000000000000000000000000000000000000000..f02ed13896dff9d4fe196dab87a4049c47ad3095 --- /dev/null +++ b/inst/doc/tutorial/PATC2023/handson_2-data-assesment.md @@ -0,0 +1,291 @@ +# Hands-on 2: Data assesment + +**Goal:** Use CSTools and s2dv to perform a quality assesment of a climate model. + +**Load packages** +```r +library(CSTools) +library(s2dv) +``` + +## 1. Load the data + +In this section we will use the function **Start** to load the data. Then, we will transfrom the output **startR_array** to an **s2dv_cube** object in order that the data is easy to use within **CSTools** functions. + +The **s2dv_cube** object is a structured list that contains the information needed to work with multidimensional arrays of data. Coordinates, dimensions, and metadata are neatly combined to allow for a more intuitive, concise, and less error-prone experience. + +> **Note:** If you have already loaded the data with **Start**, go directly to section **b)**. + +### a) Load the data + +The following section is taken from [PATC 2023 startR tutorial](https://earth.bsc.es/gitlab/es/startR/-/blob/doc-bsc_training_2023/inst/doc/tutorial/PATC2023/handson_1-data-loading.md?ref_type=heads). The experiment data are Meteo-France System 7 from ECMWF, and the observation ones are ERA5 from ECMWF for near-surface temperature (short name: tas). We will focus on the Europe region (roughly 20W-40E, 20N-80N). The hindcast years are 1993 to 2016. + +```r +# Use this one if on workstation or nord3 (have access to /esarchive) +path_exp <- "/esarchive/exp/meteofrance/system7c3s/monthly_mean/$var$_f6h/$var$_$syear$.nc" +#---------------------------------------------------------------------- +# Run these two lines if you're on Marenostrum4 and log in with training account +prefix <- '/gpfs/scratch/nct01/nct01001/d2_handson_R/' +path_exp <- paste0(prefix, path_exp) +#---------------------------------------------------------------------- + +sdate_hcst <- paste0(1993:2016, '1101') + +hcst <- CST_Start(dat = path_exp, + var = 'tas', + syear = sdate_hcst, + ensemble = 'all', + time = 1:2, + latitude = startR::values(list(20, 80)), + latitude_reorder = startR::Sort(), + longitude = startR::values(list(-20, 40)), + longitude_reorder = startR::CircularSort(-180, 180), + transform = startR::CDORemapper, + transform_params = list(grid = 'r360x181', method = 'bilinear'), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'syear', + longitude = NULL, latitude = NULL), + retrieve = TRUE) + +``` + +```r +# Save Dates dimensions +dates_dim <- dim(hcst$attrs$Dates) +# Adjust the day to the correct month +hcst$attrs$Dates <- hcst$attrs$Dates - lubridate::days(1) +# Add again Dates dimensions +dim(hcst$attrs$Dates) <- dates_dim + +date_string <- format(hcst$attrs$Dates, '%Y%m') +sdate_obs <- array(date_string, dim = c(syear = 24, time = 2)) +``` + +```r +path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc' +#---------------------------------------------------------------------- +# Run these two lines if you're on Marenostrum4 and log in with training account +prefix <- '/gpfs/scratch/nct01/nct01001/d2_handson_R/' +path_obs <- paste0(prefix, path_obs) +#---------------------------------------------------------------------- + +obs <- CST_Start(dat = path_obs, + var = 'tas', + syear = sdate_obs, + split_multiselected_dims = TRUE, + latitude = startR::values(list(20, 80)), + latitude_reorder = startR::Sort(), + longitude = startR::values(list(-20, 40)), + longitude_reorder = startR::CircularSort(-180, 180), + transform = startR::CDORemapper, + transform_params = list(grid = 'r360x181', method = 'bilinear'), + transform_vars = c('latitude', 'longitude'), + synonims = list(syear = c('syear', 'sdate'), + latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'syear', + longitude = NULL, latitude = NULL), + retrieve = TRUE) + +``` + +### b) Create `s2dv_cube`: + +Now we convert the **hindcast and observations data** (**startR_array**) into an **s2dv_cube** object with the function **as.s2dv_cube** from **CSTools** package: + +```r +hcst <- as.s2dv_cube(hcst) +obs <- as.s2dv_cube(obs) +``` +By printing the object, we can see the object structure. The first level elements are: +- **Data**: A multidimensional array containing the data +- **Dimensions**: A vector with the data dimensions. +- **Coordinates**: A list with vector coordinates. +- **Attributes**: A list containing the metadata. + +```r +> hcst +'s2dv_cube' +Data [ 294.975204467773, 295.99658203125, 296.999153137207, 296.874618530273, 297.662521362305, 297.113525390625, 296.145011901855, 295.981201171875 ... ] +Dimensions ( dat = 1, var = 1, syear = 24, ensemble = 25, time = 2, latitude = 61, longitude = 61 ) +Coordinates + * dat : dat1 + * var : tas + * syear : 19931101, 19941101, 19951101, 19961101, 19971101, 19981101, 19991101, 20001101, 20011101, 20021101, 20031101, 20041101, 20051101, 20061101, 20071101, 20081101, 20091101, 20101101, 20111101, 20121101, 20131101, 20141101, 20151101, 20161101 + ensemble : 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 + time : 1, 2 + * latitude : 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80 + * longitude : -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40 +Attributes + Dates : 1993-11-30 1994-11-30 1995-11-30 1996-11-30 1997-11-30 ... + varName : tas + metadata : + time + other : class, tzone + longitude + other : dim + latitude + other : dim + tas + units : K + long name : 2 metre temperature + other : prec, dim, unlim, make_missing_value, missval, hasAddOffset, hasScaleFact, code, table + Datasets : dat1 + when : 2023-10-26 14:43:30 + source_files : /esarchive/exp/meteofrance/system7c3s/monthly_mean/tas_f6h/tas_19931101.nc ... + load_parameters : + ( dat1 ) : dat = dat1, var = tas, syear = 19931101 ... + ... +``` +> **Note:** An **s2dv_cube** object is an structured list in base R. To acces the elements, you need to use the `$` operator (e.g. `hcst$data`, `hcst$dims`, `hcst$coords`, `hcst$attrs`, ...) + +#### Exercise 1 +**Goal:** To find **s2dv_cube** information of **hindcast** data. + +1. What type of object is an **s2dv_cube** in base R? Use the function **class()** and **typeof()**: + +2. What type of object is the element `hcst$data` (common language)? Use the function **dim()** and **typeof()** to check `hcst$data`: + +3. What are the **time dimensions** of the **hindcast** data? The Dates of an **s2dv_cube** can be found in element: `hcst$attrs$Dates`. + +4. What are the coordinates names in the **hindcast**? Use the function **names()** to check. The coordinates in the **s2dv_cube** are stored in element `hcst$coords`. + +5. In which **latitude** and **longitude** we have loaded the data? + +6. What is the **start date** dimension name of the `hcst`? What is the **ensemble member** dimension name? + +7. How many **ensemble members** have the **hindcast** and **observations** datasets? + +8. What is the full variable name of the loaded data? Find out the information in `hcst$attrs$Variable$metadata` with the function **str()**. + +9. From what season is the data loaded from? You can use the function **months()**. + +10. What are the **units** of the data? + +## 2. Calibrate the data + +The first step to perform a quality assesment is to correct biases as well as dispersion errors of the model. The function **Calibration** from **CSTools** allows us to chose from different calibration member-by-member techniques. + +In this case, we are going to chose a method called **"evmos"** which applies a variance inflation technique to ensure the correction of the bias and the correspondence of variance between forecast and observation (Van Schaeybroeck and Vannitsem, 2011). + +> **Note:** The functions of **CSTools** whose name starts with the prefix **CST** work directly with **s2dv_cube** objects. If we are not using **s2dv_cube** we can use the standard version without the prefix that work with arrays (e.g. use **Calibration**, **Anomaly** instead of **CST_Calibration**, **CST_Anomaly**...). + +To calibrate the hindcast, we need to use also the observations and to specify some parameters, such as the adjustment specifications and the dimension names. + +#### Exercise 2: + +**Goal:** Calibrate the hindcast with completing the ensemble member dimension names and the start date dimension names of data. + +```r +hcst_cal <- CST_Calibration(exp = hcst, obs = obs, + cal.method = "evmos", + eval.method = "leave-one-out", + multi.model = FALSE, + na.fill = TRUE, + na.rm = TRUE, + apply_to = NULL, + alpha = NULL, + memb_dim = ____, + sdate_dim = ____, + ncores = 10) +``` + +## 3. Compute Anomalies +In this section we will compute the hindcast anomalies from the calibrated data in the previous step. + +Anomalies are deviations from the average weather conditions over a long period. A positive anomaly indicates that the conditions are higher than the average while negative indicates that is lower. Calculating anomalies is an important step in the model quality assesment for several reasons such as removing seasonal variations, visualization and for policy and decision-making among others. + +We are going to use the function **CST_Anomaly** from **CSTools**. This function computes the anomalies relative to a climatology computed along the selected dimension (in our case starting dates). The computation is carried out independently for experimental and observational datasets. + +#### Exercise 3: +**Goal:** Calculate the hindcast anomalies from the calibrated hindcast and observations dataset. You can take a look on the [CSTools package documentation](https://cran.r-project.org/web/packages/CSTools/CSTools.pdf) on page 40 to find the missing parameters. + +```r +hcst_anom <- CST_Anomaly(exp = ____, + obs = ____, + cross = TRUE, + memb = TRUE, + memb_dim = ____, + dim_anom = 'syear', + dat_dim = c('dat', 'ensemble'), + ftime_dim = ____, + ncores = 10) + +``` + + +## 4. Compute skill: RPSS +To trust the climate models we need to evaluate its skill. To do it, we are going to use the **Ranked Probability Skill Score** (RPSS; Wilks, 2011). Is the skill score based on the **Ranked Probability Score** (RPS; Wilks, 2011). It can be used to assess whether a forecast presents an improvement or worsening with respect to a reference forecast. + +The **RPSS** ranges between minus infinite and 1. If the **RPSS is positive**, it indicates that the **forecast has higher skill than the reference forecast**, while a **negative** value means that it has a **lower skill**. It is computed as `RPSS = 1 - RPS_exp / RPS_ref`. The statistical significance is obtained based on a Random Walk test at the specified confidence level (DelSole and Tippett, 2016). + +Next, we compute the RPSS for anomalies: +```r +skill <- RPSS(exp = hcst_anom$exp$data, + obs = hcst_anom$obs$data, + time_dim = 'syear', + memb_dim = 'ensemble', + Fair = FALSE, + cross.val = TRUE, + ncores = 10) +``` + +The output of the **RPSS** function is a list of two elements. The first element is the RPSS; the second element, `sign` is a logical array of the statistical significance of the RPSS with the same dimensions as `rpss`. + +#### Exercise 4: + +**Goal:** Compare the RPSS results with **calibrated** and **raw anomalies**. + +```r +hcst_anom <- CST_Anomaly(exp = ____, + obs = ____, + cross = ____, + memb = ____, + memb_dim = ____, + dim_anom = ____, + dat_dim = ____, + ftime_dim = ____, + ncores = ____) +skill_raw <- RPSS(exp = ____, + obs = ____, + time_dim = ____, + memb_dim = ____, + Fair = ____, + cross.val = ____, + ncores = ____) + +summary(____) + +``` + +## 5. Additional Exercises: Visualization + +#### Exercise 5 +**Goal:** Use the function **PlotEquiMap** from **s2dv** to compare the raw and calibrated data. + +We are going to plot the **last year** of the hindcast period (**2016**) for the last timestep (**December**). Also, we are going to use the **last ensemble member** (arbirtrary choice). + +```r +lat <- hcst$coords$lat +lon <- hcst$coords$lon + +PlotEquiMap(var = hcst_anom$exp$data[24, , 25, , 2, , ], lat = lat, lon = lon, + filled.continents = FALSE, + fileout = ____) + +PlotEquiMap(var = ____, lat = ____, lon = ____, + filled.continents = ____, + fileout = ____) +``` + +#### Exercise 6 +**Goal:** Use the function **PlotEquiMap** from **s2dv** to compare the RPSS results with calibrated and raw data. + +```r +PlotEquiMap(var = ____, lat = ____, lon = ____, + brks = seq(-1, 1, by = 0.1), + filled.continents = ____, + fileout = ____) +``` \ No newline at end of file diff --git a/inst/doc/tutorial/PATC2023/handson_2-data-assesment_ans.md b/inst/doc/tutorial/PATC2023/handson_2-data-assesment_ans.md new file mode 100644 index 0000000000000000000000000000000000000000..48b679ffd6f33a3341d75627119fb8b8bbbf6c26 --- /dev/null +++ b/inst/doc/tutorial/PATC2023/handson_2-data-assesment_ans.md @@ -0,0 +1,366 @@ +# Hands-on 2: Data assesment + +**Goal:** Use CSTools and s2dv to perform a quality assesment of a climate model. + +**Load packages** +```r +library(CSTools) +library(s2dv) +``` + +## 1. Load the data + +In this section we will use the function **Start** to load the data. Then, we will transfrom the output **startR_array** to an **s2dv_cube** object in order that the data is easy to use within **CSTools** functions. + +The **s2dv_cube** object is a structured list that contains the information needed to work with multidimensional arrays of data. Coordinates, dimensions, and metadata are neatly combined to allow for a more intuitive, concise, and less error-prone experience. + +> **Note:** If you have already loaded the data with **Start**, go directly to section **b)**. + +### a) Load the data + +The following section is taken from [PATC 2023 startR tutorial](https://earth.bsc.es/gitlab/es/startR/-/blob/doc-bsc_training_2023/inst/doc/tutorial/PATC2023/handson_1-data-loading.md?ref_type=heads). The experiment data are Meteo-France System 7 from ECMWF, and the observation ones are ERA5 from ECMWF for near-surface temperature (short name: tas). We will focus on the Europe region (roughly 20W-40E, 20N-80N). The hindcast years are 1993 to 2016. + +```r +# Use this one if on workstation or nord3 (have access to /esarchive) +path_exp <- "/esarchive/exp/meteofrance/system7c3s/monthly_mean/$var$_f6h/$var$_$syear$.nc" +#---------------------------------------------------------------------- +# Run these two lines if you're on Marenostrum4 and log in with training account +prefix <- '/gpfs/scratch/nct01/nct01001/d2_handson_R/' +path_exp <- paste0(prefix, path_exp) +#---------------------------------------------------------------------- + +sdate_hcst <- paste0(1993:2016, '1101') + +hcst <- CST_Start(dat = path_exp, + var = 'tas', + syear = sdate_hcst, + ensemble = 'all', + time = 1:2, + latitude = startR::values(list(20, 80)), + latitude_reorder = startR::Sort(), + longitude = startR::values(list(-20, 40)), + longitude_reorder = startR::CircularSort(-180, 180), + transform = startR::CDORemapper, + transform_params = list(grid = 'r360x181', method = 'bilinear'), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'syear', + longitude = NULL, latitude = NULL), + retrieve = TRUE) + +``` + +```r +# Save Dates dimensions +dates_dim <- dim(hcst$attrs$Dates) +# Adjust the day to the correct month +hcst$attrs$Dates <- hcst$attrs$Dates - lubridate::days(1) +# Add again Dates dimensions +dim(hcst$attrs$Dates) <- dates_dim + +date_string <- format(hcst$attrs$Dates, '%Y%m') +sdate_obs <- array(date_string, dim = c(syear = 24, time = 2)) +``` + +```r +path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc' +#---------------------------------------------------------------------- +# Run these two lines if you're on Marenostrum4 and log in with training account +prefix <- '/gpfs/scratch/nct01/nct01001/d2_handson_R/' +path_obs <- paste0(prefix, path_obs) +#---------------------------------------------------------------------- + +obs <- CST_Start(dat = path_obs, + var = 'tas', + syear = sdate_obs, + split_multiselected_dims = TRUE, + latitude = startR::values(list(20, 80)), + latitude_reorder = startR::Sort(), + longitude = startR::values(list(-20, 40)), + longitude_reorder = startR::CircularSort(-180, 180), + transform = startR::CDORemapper, + transform_params = list(grid = 'r360x181', method = 'bilinear'), + transform_vars = c('latitude', 'longitude'), + synonims = list(syear = c('syear', 'sdate'), + latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(time = 'syear', + longitude = NULL, latitude = NULL), + retrieve = TRUE) + +``` + +### b) Create `s2dv_cube`: + +Now we convert the **hindcast and observations data** (**startR_array**) into an **s2dv_cube** object with the function **as.s2dv_cube** from **CSTools** package: + +```r +hcst <- as.s2dv_cube(hcst) +obs <- as.s2dv_cube(obs) +``` +By printing the object, we can see the object structure. The first level elements are: +- **Data**: A multidimensional array containing the data +- **Dimensions**: A vector with the data dimensions. +- **Coordinates**: A list with vector coordinates. +- **Attributes**: A list containing the metadata. + +```r +> hcst +'s2dv_cube' +Data [ 294.975204467773, 295.99658203125, 296.999153137207, 296.874618530273, 297.662521362305, 297.113525390625, 296.145011901855, 295.981201171875 ... ] +Dimensions ( dat = 1, var = 1, syear = 24, ensemble = 25, time = 2, latitude = 61, longitude = 61 ) +Coordinates + * dat : dat1 + * var : tas + * syear : 19931101, 19941101, 19951101, 19961101, 19971101, 19981101, 19991101, 20001101, 20011101, 20021101, 20031101, 20041101, 20051101, 20061101, 20071101, 20081101, 20091101, 20101101, 20111101, 20121101, 20131101, 20141101, 20151101, 20161101 + ensemble : 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 + time : 1, 2 + * latitude : 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80 + * longitude : -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40 +Attributes + Dates : 1993-11-30 1994-11-30 1995-11-30 1996-11-30 1997-11-30 ... + varName : tas + metadata : + time + other : class, tzone + longitude + other : dim + latitude + other : dim + tas + units : K + long name : 2 metre temperature + other : prec, dim, unlim, make_missing_value, missval, hasAddOffset, hasScaleFact, code, table + Datasets : dat1 + when : 2023-10-26 14:43:30 + source_files : /esarchive/exp/meteofrance/system7c3s/monthly_mean/tas_f6h/tas_19931101.nc ... + load_parameters : + ( dat1 ) : dat = dat1, var = tas, syear = 19931101 ... + ... +``` +> **Note:** An **s2dv_cube** object is an structured list in base R. To acces the elements, you need to use the `$` operator (e.g. `hcst$data`, `hcst$dims`, `hcst$coords`, `hcst$attrs`, ...) + +#### Exercise 1 +**Goal:** To find **s2dv_cube** information of **hindcast** data. + +1. What type of object is an **s2dv_cube** in base R? +```r +class(hcst) +# "s2dv_cube" +typeof(hcst) +# "list" +``` +2. What type of object is the element `hcst$data` (common language)? Use the function **dim()** and **typeof()** to check `hcst$data`: +```r +typeof(hcst$data) # base type +# [1] "double" +dim(hcst$data) # dimensions +# dat var syear ensemble time latitude longitude +# 1 1 24 25 2 61 61 + +# Answer: Multi-dimensional array / N-dimensional array / Tensor. +``` +3. What are the **time dimensions** of the **hindcast** data? The Dates of an **s2dv_cube** can be found in element: `hcst$attrs$Dates`. +```r +dim(hcst$attrs$Dates) +# syear time +# 24 2 +``` +4. What are the coordinates names in the **hindcast**? Use the function **names()** to check. The coordinates in the **s2dv_cube** are stored in element `hcst$coords`. +```r +names(hcst$coords) +# [1] "dat" "var" "syear" "ensemble" "time" "latitude" +# [7] "longitude" +``` +5. In which **latitude** and **longitude** we have loaded the data? +```r +hcst$coords$lat +# latitude: 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 +# [26] 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 +# [51] 70 71 72 73 74 75 76 77 78 79 80 + +hcst$coords$lon +# [1] -20 -19 -18 -17 -16 -15 -14 -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 +# [20] -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 +# [39] 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 +# [58] 37 38 39 40 +``` +6. What is the **start date** dimension name of the **hindcast**? What is the **ensemble member** dimension name? +```r +hcst$dims +# syear +# ensemble +``` + +7. How many **ensemble members** have the **hindcast** and **observations** datasets? +```r +hcst$dims[['ensemble']] +# [1] 25 +fcst$dims[['ensemble']] +# [1] 51 +obs$dims +# No ensemble member in obs +``` +8. What is the full variable name of the loaded data? Find out the information in `hcst$attrs$Variable$metadata` with the function **str()**. +```r +str(hcst$attrs$Variable) +str(hcst$attrs$Variable$metadata$tas$long_name) +# chr "2 metre temperature" +``` +9. From what season is the data loaded from? You can use the function **months()**. +```r +dim(hcst$attrs$Dates) +hcst$attrs$Dates[1,] +months(hcst$attrs$Dates[1,]) +# "December" "January" +``` +10. What are the **units** of the data? +```r +hcst$attrs$Variable$metadata$tas$units +# K +``` + +## 2. Calibrate the data + +The first step to perform a quality assesment is to correct biases as well as dispersion errors of the model. The function **Calibration** from **CSTools** allows us to chose from different calibration member-by-member techniques. + +In this case, we are going to chose a method called **"evmos"** which applies a variance inflation technique to ensure the correction of the bias and the correspondence of variance between forecast and observation (Van Schaeybroeck and Vannitsem, 2011). + +> **Note:** The functions of **CSTools** whose name starts with the prefix **CST** work directly with **s2dv_cube** objects. If we are not using **s2dv_cube** we can use the standard version without the prefix that work with arrays (e.g. use **Calibration**, **Anomaly** instead of **CST_Calibration**, **CST_Anomaly**...). + +To calibrate the hindcast, we need to use also the observations and to specify some parameters, such as the adjustment specifications and the dimension names. + +#### Exercise 2: + +**Goal:** Calibrate the hindcast with completing the ensemble member dimension names and the start date dimension names of data. + +```r +hcst_cal <- CST_Calibration(exp = hcst, obs = obs, + cal.method = "evmos", + eval.method = "leave-one-out", + multi.model = FALSE, + na.fill = TRUE, + na.rm = TRUE, + apply_to = NULL, + alpha = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = 10) +``` + +## 3. Compute Anomalies + +In this section we will compute the hindcast anomalies from the calibrated data in the previous step. + +Anomalies are deviations from the average weather conditions over a long period. A positive anomaly indicates that the conditions are higher than the average while negative indicates that is lower. Calculating anomalies is an important step in the model quality assesment for several reasons such as removing seasonal variations, visualization and for policy and decision-making among others. + +We are going to use the function **CST_Anomaly** from **CSTools**. This function computes the anomalies relative to a climatology computed along the selected dimension (in our case starting dates). The computation is carried out independently for experimental and observational datasets. + +#### Exercise 3: +**Goal:** Calculate the hindcast anomalies from the calibrated hindcast and observations dataset. You can take a look on the [CSTools package documentation](https://cran.r-project.org/web/packages/CSTools/CSTools.pdf) on page 40 to find the missing parameters. + +```r +hcst_anom <- CST_Anomaly(exp = hcst_cal, + obs = obs, + cross = TRUE, + memb = TRUE, + memb_dim = 'ensemble', + dim_anom = 'syear', + dat_dim = c('dat', 'ensemble'), + ftime_dim = 'time', + ncores = 10) + +``` + + +## 4. Compute skill: RPSS + +To trust the climate models we need to evaluate its skill. To do it, we are going to use the **Ranked Probability Skill Score** (RPSS; Wilks, 2011). Is the skill score based on the **Ranked Probability Score** (RPS; Wilks, 2011). It can be used to assess whether a forecast presents an improvement or worsening with respect to a reference forecast. + +The **RPSS** ranges between minus infinite and 1. If the **RPSS is positive**, it indicates that the **forecast has higher skill than the reference forecast**, while a **negative** value means that it has a **lower skill**. It is computed as `RPSS = 1 - RPS_exp / RPS_ref`. The statistical significance is obtained based on a Random Walk test at the specified confidence level (DelSole and Tippett, 2016). + +Next, we compute the RPSS for anomalies: +```r +skill <- RPSS(exp = hcst_anom$exp$data, + obs = hcst_anom$obs$data, + time_dim = 'syear', + memb_dim = 'ensemble', + Fair = FALSE, + cross.val = TRUE, + ncores = 10) +``` + +The output of the **RPSS** function is a list of two elements. The first element is the RPSS; the second element, `sign` is a logical array of the statistical significance of the RPSS with the same dimensions as `rpss`. + +#### Exercise 4: +**Goal:** Compare the RPSS results with **calibrated** and **raw anomalies**. + +```r +hcst_anom_raw <- CST_Anomaly(exp = hcst, + obs = obs, + cross = TRUE, + memb = TRUE, + memb_dim = 'ensemble', + dim_anom = 'syear', + dat_dim = c('dat', 'ensemble'), + ftime_dim = 'time', + ncores = 10) +skill_raw <- RPSS(exp = hcst_anom_raw$exp$data, + obs = hcst_anom_raw$obs$data, + time_dim = 'syear', + memb_dim = 'ensemble', + Fair = FALSE, + cross.val = TRUE, + ncores = 10) +> summary(skill$rpss) + Min. 1st Qu. Median Mean 3rd Qu. Max. +-0.54005 -0.11225 -0.03170 -0.03722 0.04480 0.44376 +> summary(skill$sign) + Mode FALSE TRUE +logical 6798 402 +> summary(skill_raw$rpss) + Min. 1st Qu. Median Mean 3rd Qu. Max. +-0.59113 -0.11045 -0.03069 -0.03585 0.04637 0.44305 +> summary(skill_raw$sign) + Mode FALSE TRUE +logical 7055 387 +``` + +## 5. Additional Exercises: Visualization + +#### Exercise 5 +**Goal:** Use the function **PlotEquiMap** from **s2dv** to compare the raw and calibrated data. + +We are going to plot the **last year** of the hindcast period (**2016**) for the last timestep (**December**). Also, we are going to use the **last ensemble member** (arbirtrary choice). + +```r +lat <- hcst$coords$lat +lon <- hcst$coords$lon + +PlotEquiMap(hcst_anom$exp$data[24, , 25, , 2, , ], lat = lat, lon = lon, + filled.continents = FALSE, + toptitle = "Calibrated Hindcast Anomalies") + +PlotEquiMap(hcst_anom_raw$exp$data[24, , 25, , 2, , ], lat = lat, lon = lon, + filled.continents = FALSE, + toptitle = "Raw Hindcast Anomalies") +``` +![](./Figures/hcst_anom_cal.png) +![](./Figures/hcst_anom_raw.png) + +#### Exercise 6 +**Goal:** Use the function **PlotEquiMap** from **s2dv** to compare the RPSS results with calibrated and raw data. + +```r +PlotEquiMap(skill$rpss[ , , 2, , ], lat = lat, lon = lon, + brks = seq(-1, 1, by = 0.1), + filled.continents = FALSE, + toptitle = "RPSS from Calibrated Hindcast Anomalies") +PlotEquiMap(skill_raw$rpss[ , , 2, , ], lat = lat, lon = lon, + brks = seq(-1, 1, by = 0.1), + filled.continents = FALSE, + toptitle = "RPSS from Raw Hindcast Anomalies") +``` +![](./Figures/skill_cal.png) +![](./Figures/skill_raw.png) \ No newline at end of file diff --git a/inst/doc/usecase.md b/inst/doc/usecase.md new file mode 100644 index 0000000000000000000000000000000000000000..272964abe77352b93a6691b8b4b544705cdb561c --- /dev/null +++ b/inst/doc/usecase.md @@ -0,0 +1,15 @@ +# Use case and example scripts + +In this document, you will find example scripts of the package. The first ones are use cases of cliimate data assessment. The second ones are example scripts on the use of the 's2dv_cube' object. + +1. **Use cases of climate data assesment and downscaling** + 1. [Bias adjustment for assessment of an extreme event](inst/doc/usecase/UseCase1_WindEvent_March2018.R) + 2. [Precipitation Downscaling with RainFARM RF 4](inst/doc/usecase/UseCase2_PrecipitationDownscaling_RainFARM_RF4.R) + 3. [Precipitation Downscaling with RainFARM RF 100](inst/doc/usecase/UseCase2_PrecipitationDownscaling_RainFARM_RF100.R) + 4. [Seasonal forecasts for a river flow](inst/doc/usecase/UseCase3_data_preparation_SCHEME_model.R) + +2. **Examples on how to use 's2dv_cube'** + 1. [Create an 's2dv_cube'](inst/doc/usecase/ex1_create.R) + 2. [Save 's2dv_cube'](inst/doc/usecase/ex2_save.R) + 3. [Modify any 's2dv_cube' dimension](inst/doc/usecase/ex3_modify_dims.R) + 4. [Subset any 's2dv_cube' dimension](inst/doc/usecase/ex4_subset.R) diff --git a/inst/doc/usecase/ex1_create.R b/inst/doc/usecase/ex1_create.R new file mode 100644 index 0000000000000000000000000000000000000000..3c07db4118167144b1bf633c28c334915838c21f --- /dev/null +++ b/inst/doc/usecase/ex1_create.R @@ -0,0 +1,212 @@ +#******************************************************************************* +# Title: Example script to create 's2dv_cube' objects +# Author: Eva Rifà Rovira +# Date: 16/01/2024 +#******************************************************************************* +# This example shows how to create an 's2dv_cube' object. +# There are two ways of creating an 's2dv_cube' object. +# (1) With the function s2dv_cube(): create it from scratch with any data. +# (2) With the function CST_Start(). This function returns an 's2dv_cube' +# from an 'startR_array'. + +# Needed packages +library(CSTools) +library(startR) +################################################################################ +#----------------------------------------------------- +# Example 1: Function s2dv_cube() from defined data +#----------------------------------------------------- +# Minimal use case, with s2dv_cube function. + +# In this example we use the function s2dv_cube() to create an object of class +# 's2dv_cube' with the correct structure. + +# (1) We define the array with named dimensions: +dat <- array(1:100, dim = c(time = 10, lat = 4, lon = 10)) +# (2) We define the coordinates as a list of vectors: +coords <- list(time = 1:10, lat = 43:40, lon = 0:9) +# (3) The metadata: +metadata <- list(tas = list(level = '2m'), + lon = list(cdo_grid_name = 'r360x181'), + lat = list(cdo_grid_name = 'r360x181')) +# (4) The creation of Dates array. +# First the initial date: +ini_date <- as.POSIXct('2010-01-01', format = '%Y-%m-%d') +# The sequence of dates +dates <- seq(ini_date, by = 'days', length.out = 10) +# We define the dates dimensions +dim(dates) <- c(time = 10) +# (5) We call the function s2dv_cube() +dat_cube <- s2dv_cube(data = dat, coords = coords, + varName = 'tas', metadata = metadata, + Dates = dates, + when = "2019-10-23 19:15:29 CET", + source_files = c("/path/to/file1.nc", "/path/to/file2.nc"), + Datasets = 'test_dataset') + +# We print the result to see the 's2dv_cube' structure: +# > dat_cube +# 's2dv_cube' +# Data [ 1, 2, 3, 4, 5, 6, 7, 8 ... ] +# Dimensions ( time = 10, lat = 4, lon = 10 ) +# Coordinates +# * time : 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 +# * lat : 43, 42, 41, 40 +# * lon : 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 +# Attributes +# Dates : 2010-01-01 2010-01-02 2010-01-03 2010-01-04 2010-01-05 ... +# varName : tas +# metadata : +# tas +# other : level +# lon +# other : cdo_grid_name +# lat +# other : cdo_grid_name +# Datasets : test_dataset +# when : 2019-10-23 19:15:29 CET +# source_files : /path/to/file1.nc ... + +#----------------------------------------------------- +# Example 2: Function as.s2dv_cube() +#----------------------------------------------------- +# (1) Example using CST_Start + +# NOTE 1: CST_Start() is just a wrapper of function Start() with the +# transformation to 's2dv_cube' object. +# NOTE 2: In order that the input argument auxiliary functions from startR +# work, we need to call them explicitly the startR namespace. +# (e.g. startR::indices()) + +# We just need to define a CST_Start call with all the information: + +repos1 <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + +res <- CST_Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos1)), + var = c('tas', 'sfcWind'), + sdate = c('20160101', '20170101'), + ensemble = startR::indices(1:2), + time = startR::indices(1:2), + lat = startR::indices(1:10), + lon = startR::indices(1:10), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = 'dat', + latitude = 'dat'), + metadata_dims = c('dat', 'var'), + retrieve = TRUE) + + +# Now we can explore the object: + +# 1st level +names(res) +# "data" "dims" "coords" "attrs" + +dim(res$data) +# dat var sdate ensemble time lat lon +# 2 2 2 2 2 10 10 + +res$coords$lon +# [1] 0.000000 0.703125 1.406250 2.109375 2.812500 3.515625 4.218750 4.921875 +# [9] 5.625000 6.328125 +attr(res$coords$lon, 'indices') +# [1] FALSE +# NOTE: The attribute 'indices' is FALSE, it means that the longitude elements +# are the actual values of longitude coordinate. + +res$coords$ensemble +# [1] 1 2 +# attr(,"indices") +# [1] TRUE + +# Now we take a look into the Dates array. It must have the time dimensions +# of the data. +dim(res$attrs$Dates) +# sdate time +# 2 2 + +# To see the nested list structure of the object, we just need to use the +# function str(): +str(res) + +#----------------------------------------------------- + +# (2) Example using as.s2dv_cube() function + +# We'll load the data with Start and then we'll transform the 'startR_array' +# to 's2dv_cube' object with the function as.s2dv_cube(). We are going +# to load the same data as before, with the same call: + +repos1 <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + +res <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos1)), + var = c('tas', 'sfcWind'), + sdate = c('20160101', '20170101'), + ensemble = startR::indices(1:2), + time = startR::indices(1:2), + lat = startR::indices(1:10), + lon = startR::indices(1:10), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = 'dat', + latitude = 'dat'), + metadata_dims = c('dat', 'var'), + retrieve = TRUE) + +# Now, we use the function as.s2dv_cube() to transform the 'startR_array' +# into an 's2dv_cube': +res_cube <- as.s2dv_cube(res) + +# If we call directly the object directly into the terminal, we can see +# all the elements nicely: + +# > res_cube +# 's2dv_cube' +# Data [ 248.241973876953, 247.365753173828, 6.80753087997437, 5.46453714370728, 247.256896972656, 248.500869750977, 6.25862503051758, 5.76889991760254 ... ] +# Dimensions ( dat = 2, var = 2, sdate = 2, ensemble = 2, time = 2, lat = 10, lon = 10 ) +# Coordinates +# * dat : system4_m1, system5_m1 +# * var : tas, sfcWind +# * sdate : 20160101, 20170101 +# ensemble : 1, 2 +# time : 1, 2 +# * lat : 89.4628215685774, 88.7669513528422, 88.0669716474306, 87.366063433082, 86.6648030134408, 85.9633721608804, 85.2618460607126, 84.5602613830534, 83.8586381286076, 83.1569881285417 +# * lon : 0, 0.703125, 1.40625, 2.109375, 2.8125, 3.515625, 4.21875, 4.921875, 5.625, 6.328125 +# Attributes +# Dates : 2016-02-01 2017-02-01 2016-03-01 2017-03-01 +# varName : tas sfcWind +# metadata : +# time +# units : hours since 2016-01-01 00:00:00 +# other : ndims, size, standard_name, calendar +# lon +# units : degrees_east +# long name : longitude +# other : ndims, size, standard_name, axis +# lat +# units : degrees_north +# long name : latitude +# other : ndims, size, standard_name, axis +# tas +# units : K +# long name : 2 metre temperature +# other : prec, dim, unlim, make_missing_value, missval, hasAddOffset, hasScaleFact, code, table, grid_type +# sfcWind +# units : m s**-1 +# long name : 10 meter windspeed +# other : prec, dim, unlim, make_missing_value, missval, hasAddOffset, hasScaleFact, code, table, grid_type +# Datasets : system4_m1 ... +# when : 2024-01-17 11:38:27 +# source_files : /esarchive/exp/ecmwf/system4_m1/monthly_mean/tas_f6h/tas_20160101.nc ... +# load_parameters : +# ( system4_m1 ) : dat = system4_m1, var = tas ..., sdate = 20160101 ... +# ... + +################################################################################ \ No newline at end of file diff --git a/inst/doc/usecase/ex2_save.R b/inst/doc/usecase/ex2_save.R new file mode 100644 index 0000000000000000000000000000000000000000..7fedd3492a6cde143b595995173406d0d1bb8c9e --- /dev/null +++ b/inst/doc/usecase/ex2_save.R @@ -0,0 +1,463 @@ +#******************************************************************************* +# Title: Example script to save 's2dv_cube' to NetCDF using CST_SaveExp +# Author: Eva Rifà Rovira +# Date: 29/11/2024 +#******************************************************************************* +# In this script, we'll see multiple ways to store the 's2dv_cube' (CST_SaveExp) +# or the multidimensional array (SaveExp) to NetCDF. + +# Needed packages: +library(CSTools) +library(CSIndicators) +library(s2dv) +library(startR) + +################################################################################ +#----------------------------------------------------- +# Example 1: Multidimensional array and Dates, without metadata and coordinates +#----------------------------------------------------- +# (1.1) Minimal use case, without Dates +data <- array(1:5, dim = c(sdate = 5, lon = 4, lat = 4)) +SaveExp(data, ftime_dim = NULL, memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, single_file = TRUE) +SaveExp(data, ftime_dim = NULL, memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, sdate_dim = NULL, single_file = FALSE) # same result + +# (1.2) Forecast time dimension, without Dates +data <- array(1:5, dim = c(ftime = 5, lon = 4, lat = 4)) +SaveExp(data, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, sdate_dim = NULL, single_file = TRUE) + +# (1.3) Start date dimension, without Dates +data <- array(1:5, dim = c(sdate = 5, lon = 4, lat = 4)) +SaveExp(data, ftime_dim = NULL, memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, sdate_dim = 'sdate', single_file = TRUE) + +# (1.4) Only forecast time dimension (no sdate), with Dates +data <- array(1:5, dim = c(ftime = 5, lon = 4, lat = 4)) +dates <- c('20000101', '20010102', '20020103', '20030104', '20040105') +dates <- as.Date(dates, format = "%Y%m%d", tz = "UTC") +dim(dates) <- c(ftime = 5) +SaveExp(data, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, sdate_dim = NULL, Dates = dates, single_file = FALSE) +SaveExp(data, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, sdate_dim = NULL, Dates = dates, single_file = TRUE) +# For this case we have the same result using: single_file = FALSE /TRUE. + +# (1.5) Forecast time and 1 sdate, with Dates +data <- array(1:5, dim = c(sdate = 1, ftime = 5, lon = 4, lat = 4)) +dates <- c('20000101', '20010102', '20020103', '20030104', '20040105') +dates <- as.Date(dates, format = "%Y%m%d", tz = "UTC") +dim(dates) <- c(sdate = 1, ftime = 5) +SaveExp(data, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, sdate_dim = 'sdate', Dates = dates, single_file = FALSE) +SaveExp(data, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, sdate_dim = 'sdate', Dates = dates, single_file = TRUE) + +# (1.6) Test global attributes +SaveExp(data, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, sdate_dim = 'sdate', Dates = dates, single_file = TRUE, + extra_string = 'test', + global_attrs = list(system = 'tes1', reference = 'test2')) +# (1.7) Test global attributes +SaveExp(data, ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, + var_dim = NULL, sdate_dim = 'sdate', Dates = dates, single_file = FALSE, + extra_string = 'test', + global_attrs = list(system = 'tes1', reference = 'test2')) +#----------------------------------------------------- +# Example 2: Test sample data from Start and from Load +#----------------------------------------------------- + +# (2.1) Test SaveExp +exp <- CSTools::lonlat_prec_st +data <- exp$data +Dates = exp$attrs$Dates +coords = exp$coords +varname = exp$attrs$Variable$varName +metadata = exp$attrs$Variable$metadata +SaveExp(data = data, Dates = Dates, coords = coords, varname = varname, + metadata = metadata, ftime_dim = 'ftime', startdates = 1:4, + var_dim = 'var', memb_dim = 'member', dat_dim = 'dataset', + sdate_dim = 'sdate', single_file = FALSE) +SaveExp(data = data, Dates = Dates, coords = coords, varname = varname, + metadata = metadata, ftime_dim = 'ftime', startdates = 1:4, + var_dim = 'var', memb_dim = 'member', dat_dim = 'dataset', + sdate_dim = 'sdate', single_file = TRUE) + +# (2.2) lonlat_temp_st$exp in a single file with units 'hours since' +# (2.2.1) We save the data +data <- lonlat_temp_st$exp +CST_SaveExp(data = data, ftime_dim = 'ftime', + var_dim = 'var', dat_dim = 'dataset', sdate_dim = 'sdate', + units_hours_since = TRUE, single_file = TRUE) + +# (2.2.2) Now we read the output with Start: +sdate <- as.vector(lonlat_temp_st$exp$coords$sdate) +path <- paste0(getwd(),"/$var$_", sdate[1], "_", sdate[length(sdate)], ".nc") +out <- Start(dat = path, + var = 'tas', + member = 'all', + sdate = 'all', + ftime = 'all', + lat = 'all', + lon = 'all', + return_vars = list(lon = 'dat', + lat = 'dat', + ftime = NULL, + sdate = NULL), + retrieve = TRUE) + +attributes(out)$Variables$common$ftime +out_cube <- as.s2dv_cube(out) +out_cube <- CST_ChangeDimNames(out_cube, + original_names = c("dat"), + new_names = c("dataset")) +all.equal(data$data, out_cube$data) +identical(data$data, out_cube$data) + +# Plot the results and compare +PlotEquiMap(out_cube$data[,,1,1,1,,], lon = out_cube$coords$lon, + lat = out_cube$coords$lat, filled.continents = FALSE) + +PlotEquiMap(lonlat_temp_st$exp$data[,,1,1,1,,], lon = out_cube$coords$lon, + lat = out_cube$coords$lat, filled.continents = FALSE) + +# (2.3) lonlat_temp_st$exp in a single file with units of time frequency +# (2.3.1) we save the data +data <- lonlat_temp_st$exp +CST_SaveExp(data = data, ftime_dim = 'ftime', + var_dim = 'var', dat_dim = 'dataset', sdate_dim = 'sdate', + single_file = TRUE, units_hours_since = FALSE) +dates <- lonlat_temp_st$exp$attrs$Dates +# (2.3.2) Now we read the output with Start: +sdate <- as.vector(lonlat_temp_st$exp$coords$sdate) +path <- paste0(getwd(),"/$var$_", sdate[1], "_", sdate[length(sdate)], ".nc") +out <- Start(dat = path, + var = 'tas', + lon = 'all', + lat = 'all', + ftime = 'all', + sdate = 'all', + member = 'all', + return_vars = list( + lon = 'dat', + lat = 'dat', + ftime = NULL, + sdate = NULL), + retrieve = TRUE) + +attributes(out)$Variables$common$ftime +# [1] "1 months" "2 months" "3 months" +out_cube2 <- as.s2dv_cube(out) + +# (2.4) lonlat_temp_st$exp in separated files with units of hours since +# (2.4.1) we save the data +data <- lonlat_temp_st$exp +CST_SaveExp(data = data, ftime_dim = 'ftime', + var_dim = 'var', dat_dim = 'dataset', sdate_dim = 'sdate', + single_file = FALSE) +# (2.4.2) we load the data +sdate <- as.vector(lonlat_temp_st$exp$coords$sdate) +path <- paste0(getwd(),"/dat1/$var$/$var$_$sdate$.nc") + +out <- Start(dat = path, var = 'tas', + sdate = sdate, + lon = 'all', + lat = 'all', + ftime = 'all', + member = 'all', + return_vars = list(lon = 'dat', + lat = 'dat', + ftime = 'sdate'), + retrieve = TRUE) +out_cube1 <- as.s2dv_cube(out) +# (2.5) lonlat_prec_st$exp in a single file with units of time frequency +# (2.5.1) we save the data +data <- lonlat_prec_st +CST_SaveExp(data = data, ftime_dim = 'ftime', + var_dim = 'var', dat_dim = 'dataset', sdate_dim = 'sdate', + single_file = TRUE, units_hours_since = FALSE) + +# (2.5.2) we read the data +sdate <- as.vector(data$coords$sdate) +path <- paste0(getwd(),"/$var$_", sdate[1], "_", sdate[length(sdate)], ".nc") +out <- Start(dat = path, + var = 'prlr', + lon = 'all', + lat = 'all', + ftime = 'all', + sdate = 'all', + member = 'all', + return_vars = list( + lon = 'dat', + lat = 'dat', + ftime = NULL, + sdate = NULL), + retrieve = TRUE) + +attributes(out)$Variables$common$ftime +# [1] "1 days" "2 days" "3 days" "4 days" "5 days" "6 days" "7 days" +# [8] "8 days" "9 days" "10 days" "11 days" "12 days" "13 days" "14 days" +# [15] "15 days" "16 days" "17 days" "18 days" "19 days" "20 days" "21 days" +# [22] "22 days" "23 days" "24 days" "25 days" "26 days" "27 days" "28 days" +# [29] "29 days" "30 days" "31 days" +out_cube <- as.s2dv_cube(out) + +# (2.6) Test observations: lonlat_temp +# (2.6.1) Save the data +data <- lonlat_temp$obs +CST_SaveExp(data = data, ftime_dim = 'ftime', memb_dim = NULL, + var_dim = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', + single_file = TRUE, units_hours_since = FALSE) +# (2.6.2) Now we read the output with Start: +sdate <- c('20001101', '20051101') +path <- paste0(getwd(),"/$var$_", sdate[1], "_", sdate[length(sdate)], ".nc") +out <- Start(dat = path, + var = 'tas', # tas + lon = 'all', + lat = 'all', + ftime = 'all', + member = 1, + sdate = 'all', + return_vars = list( + lon = 'dat', + lat = 'dat', + ftime = NULL, + sdate = NULL), + retrieve = TRUE) +dim(out) +attributes(out)$Variables$common$ftime + +# (2.7) Test lonlat_prec +# (2.7.1) Save the data +data <- lonlat_prec +CST_SaveExp(data = data, ftime_dim = 'ftime', memb_dim = NULL, + var_dim = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', + single_file = TRUE, units_hours_since = FALSE) +# (2.7.2) Now we read the output with Start: +sdate <- as.vector(data$coords$sdate) +path <- paste0(getwd(),"/$var$_", sdate[1], "_", sdate[length(sdate)], ".nc") +out <- Start(dat = path, + var = 'prlr', # tas + lon = 'all', + lat = 'all', + ftime = 'all', + sdate = 'all', + member = 'all', + return_vars = list( + lon = 'dat', + lat = 'dat', + ftime = NULL, + sdate = NULL), + retrieve = TRUE) +dim(out) +lonlat_prec$dims + +# (2.8) Test with ftime_dim NULL +data <- lonlat_temp$exp +data <- CST_Subset(data, along = 'ftime', indices = 1, drop = 'selected') + +CST_SaveExp(data = data, ftime_dim = NULL, + var_dim = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', + single_file = FALSE, units_hours_since = FALSE) + +#----------------------------------------------------- +# Example 3: Special cases +#----------------------------------------------------- + +# (3.1) Two variables and two datasets in separated files +# (3.1.1) We load the data from Start +repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" + +data3 <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = c('tas', 'sfcWind'), + sdate = c('20160101', '20170101'), + ensemble = indices(1), + time = indices(1:2), + lat = indices(1:10), + lon = indices(1:10), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = 'dat', + latitude = 'dat'), + metadata_dims = c('dat', 'var'), + retrieve = T) +cube3 <- as.s2dv_cube(data3) + +# (3.1.2) We save the data +CST_SaveExp(data = cube3, ftime_dim = 'time', var_dim = 'var', + memb_dim = 'ensemble', dat_dim = 'dat') + +# (3.1.3) We read again the data with start +repos <- paste0(getwd(), "/system4_m1/$var$/$var$_$sdate$.nc") +repos2 <- paste0(getwd(), "/system5_m1/$var$/$var$_$sdate$.nc") + +data3out <- Start(dat = list(list(name = 'system4_m1', path = repos2), + list(name = 'system5_m1', path = repos)), + var = c('tas', 'sfcWind'), + sdate = c('20160101', '20170101'), + ensemble = indices(1), + time = indices(1:2), + lat = indices(1:10), + lon = indices(1:10), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(time = 'sdate', + longitude = 'dat', + latitude = 'dat'), + metadata_dims = c('dat', 'var'), + retrieve = T) + +summary(data3out) +summary(data3) + +dim(data3) +dim(data3out) + +# (3.2) Two variables and two datasets in the same file + +CST_SaveExp(data = cube3, ftime_dim = 'time', var_dim = 'var', + memb_dim = 'ensemble', dat_dim = 'dat', + single_file = TRUE) +# TODO: Read the output with Start + +# (3.3) Observations (from startR usecase) +repos_exp <- paste0('/esarchive/exp/ecearth/a1tr/cmorfiles/CMIP/EC-Earth-Consortium/', + 'EC-Earth3/historical/r24i1p1f1/Amon/$var$/gr/v20190312/', + '$var$_Amon_EC-Earth3_historical_r24i1p1f1_gr_$sdate$01-$sdate$12.nc') + +exp <- Start(dat = repos_exp, + var = 'tas', + sdate = as.character(c(2005:2008)), + time = indices(1:3), + lat = 1:10, + lat_reorder = Sort(), + lon = 1:10, + lon_reorder = CircularSort(0, 360), + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(lon = NULL, + lat = NULL, + time = 'sdate'), + retrieve = FALSE) +dates <- attr(exp, 'Variables')$common$time +repos_obs <- '/esarchive/recon/ecmwf/erainterim/monthly_mean/$var$_f6h/$var$_$date$.nc' + +obs <- Start(dat = repos_obs, + var = 'tas', + date = unique(format(dates, '%Y%m')), + time = values(dates), #dim: [sdate = 4, time = 3] + lat = 1:10, + lat_reorder = Sort(), + lon = 1:10, + lon_reorder = CircularSort(0, 360), + time_across = 'date', + merge_across_dims = TRUE, + split_multiselected_dims = TRUE, + synonims = list(lat = c('lat', 'latitude'), + lon = c('lon', 'longitude')), + return_vars = list(lon = NULL, + lat = NULL, + time = 'date'), + retrieve = TRUE) +obscube <- as.s2dv_cube(obs) +CST_SaveExp(data = obscube, ftime_dim = 'time', var_dim = 'var', + memb_dim = NULL, dat_dim = 'dat', + single_file = TRUE, extra_string = 'obs_tas') +CST_SaveExp(data = obscube, ftime_dim = 'time', var_dim = 'var', + memb_dim = NULL, dat_dim = 'dat', + single_file = FALSE, extra_string = 'obs_tas') + +#----------------------------------------------------- +# Example 4: Time bounds: +#----------------------------------------------------- + +# example: /esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20231128.nc +library(CSIndicators) +exp <- CSTools::lonlat_prec_st +exp$attrs$Dates <- Reorder(exp$attrs$Dates, c(2,1)) +res <- CST_PeriodAccumulation(data = exp, time_dim = 'ftime', + start = list(10, 03), end = list(20, 03)) +# > dim(res$attrs$Dates) +# sdate +# 3 +# (4.1) All data in a single file +CST_SaveExp(data = res, ftime_dim = NULL, var_dim = 'var', + memb_dim = 'member', dat_dim = 'dataset', + startdates = res$attrs$Dates, single_file = TRUE) +# (4.1.1) Same with SaveExp +SaveExp(data = res$data, coords = res$coords, + Dates = NULL, time_bounds = res$attrs$time_bounds, + ftime_dim = NULL, var_dim = 'var', + varname = res$attrs$Variable$varName, + metadata = res$attrs$Variable$metadata, + memb_dim = 'member', dat_dim = 'dataset', + startdates = res$attrs$Dates, single_file = TRUE) +# (4.2) All data in separated files +CST_SaveExp(data = res, ftime_dim = NULL, var_dim = 'var', + memb_dim = 'member', dat_dim = 'dataset', + startdates = res$attrs$Dates, single_file = FALSE) +# (4.2.1) Same with SaveExp +SaveExp(data = res$data, coords = res$coords, + Dates = res$attrs$Dates, time_bounds = res$attrs$time_bounds, + ftime_dim = NULL, var_dim = 'var', + varname = res$attrs$Variable$varName, + metadata = res$attrs$Variable$metadata, + memb_dim = 'member', dat_dim = 'dataset', + startdates = res$attrs$Dates, single_file = FALSE) +# (4.3) +CST_SaveExp(data = res, ftime_dim = NULL, var_dim = 'var', + memb_dim = 'member', dat_dim = 'dataset', + startdates = 1:4, single_file = FALSE) + +# (4.4) We change the time dimensions to ftime and sdate_dim = NULL +dim(res$attrs$time_bounds[[1]]) <- c(time = 3) +dim(res$attrs$time_bounds[[2]]) <- c(time = 3) +dim(res$attrs$Dates) <- c(time = 3) +dim(res$data) <- c(dataset = 1, var = 1, member = 6, time = 3, lat = 4, lon = 4) + +# (4.4.1) All data in a single file +CST_SaveExp(data = res, ftime_dim = 'time', var_dim = 'var', + memb_dim = 'member', dat_dim = 'dataset', sdate_dim = NULL, + startdates = res$attrs$Dates, single_file = TRUE) +# (4.4.2) All data in separated files +CST_SaveExp(data = res, ftime_dim = 'time', var_dim = 'var', + memb_dim = 'member', dat_dim = 'dataset', sdate_dim = NULL, + startdates = res$attrs$Dates, single_file = FALSE) + +# (4.5) Forecast time units +CST_SaveExp(data = res, ftime_dim = 'time', var_dim = 'var', + memb_dim = 'member', dat_dim = 'dataset', sdate_dim = NULL, + startdates = res$attrs$Dates, single_file = TRUE, + units_hours_since = FALSE) + +#----------------------------------------------------- +# Example 5: Read data with Load +#----------------------------------------------------- + +data <- lonlat_temp$exp +# data <- lonlat_temp$obs +# data <- lonlat_prec +CST_SaveExp(data = data, ftime_dim = 'ftime', + var_dim = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', + single_file = FALSE, units_hours_since = FALSE) +# Now we read the output with Load: +# startDates <- c('20001101', '20011101', '20021101', +# '20031101', '20041101', '20051101') + +# infile <- list(path = paste0(getwd(), +# '/system5c3s/$VAR_NAME$/$VAR_NAME$_$START_DATE$.nc')) +# out_lonlat_temp <- CST_Load(var = 'tas', exp = list(infile), obs = NULL, +# sdates = startDates, +# nmember = 15, +# leadtimemax = 3, +# latmin = 27, latmax = 48, +# lonmin = -12, lonmax = 40, +# output = "lonlat") + +# NOTE: This case hasn't been developed since the function to load data +# that will be maintianed will be CST_Start. +################################################################################ \ No newline at end of file diff --git a/inst/doc/usecase/ex3_modify_dims.R b/inst/doc/usecase/ex3_modify_dims.R new file mode 100644 index 0000000000000000000000000000000000000000..1cf2984bed9cfd8598b1a283f980769d0646e589 --- /dev/null +++ b/inst/doc/usecase/ex3_modify_dims.R @@ -0,0 +1,166 @@ +#******************************************************************************* +# Title: Script to modify the dimensions of the 's2dv_cube' +# Author: Eva Rifà Rovira +# Date: 18/01/2024 +#******************************************************************************* +# In this example, we will explore different methods to modify the dimensions +# of the 's2dv_cube': +# (1) Changing dimension names +# (2) Adding new dimensions +# (3) Merge 2 dimensions +# (4) Split a dimension + +# Needed packages: +library(CSTools) + +################################################################################ +#----------------------------------------------------- +# Example 1: Change dimension names with CST_ChangeDimNames +#----------------------------------------------------- +# With using this function, we can change the dimension names in all elements +# of the 's2dv_cube' object: + +# (1) Check original dimensions and coordinates +lonlat_temp$exp$dims +names(lonlat_temp$exp$coords) +dim(lonlat_temp$exp$attrs$Dates) +# (2) Change 'dataset' to 'dat' and 'ftime' to 'time' +exp <- CST_ChangeDimNames(lonlat_temp$exp, + original_names = c("dataset", "ftime", "lon", "lat"), + new_names = c("dat", "time", "longitude", "latitude")) +# (3) Check new dimensions and coordinates +exp$dims +names(exp$coords) +dim(exp$attrs$Dates) + +#----------------------------------------------------- +# Example 2: Insert a new dimension with CST_InsertDim +#----------------------------------------------------- +# With this function, we can add a dimension into the 's2dv_cube'. +# NOTE: When the dimension that we want to add has length greater than 1, the +# values of the data are repeated for that new dimension. + +# (1) Check original dimensions and coordinates +lonlat_temp$exp$dims +names(lonlat_temp$exp$coords) +# (2) Add 'variable' dimension +exp <- CST_InsertDim(lonlat_temp$exp, + posdim = 2, + lendim = 2, + name = "variable", + values = c("tas", "tos")) +# (3) Check new dimensions and coordinates +exp$dims +exp$coords$variable +# We see that the values will be repeated along the new dimension: +exp$data[, , 1, 1, 1, 1, 1] + +#----------------------------------------------------- +# Example 3: Merge two dimensions with CST_MergeDims +#----------------------------------------------------- +# In this example, we will merge the dimensions corresponding to the latitude +# and the longitude of the data. The new dimension will be named 'grid'. + +# (1) Call the function: +new_data <- CST_MergeDims(lonlat_temp$exp, merge_dims = c('lat', 'lon'), + rename_dim = 'grid') + +# (2) Check the dimensions of the data: +dim(new_data$data) +# dataset member sdate ftime grid +# 1 15 6 3 1166 + +# (3) Check the names of the coordinates: +names(new_data$coords) +# [1] "dataset" "member" "sdate" "ftime" "grid" + +# (4) Explore the object by printing it in the terminal: +new_data +# NOTE: Be aware that when we print the object, we see that its name in +# "Coordinates" field appears without the asterisk (*) at its left. This means +# that the values of that coordinate, are indices, not the actual values. We +# can also find this information with the attribute "indices": +attributes(new_data$coords$grid) +# $indices +# [1] TRUE + +# (5) Now, we want to merge time dimensions start date and forecast time: +new_data <- CST_MergeDims(data = lonlat_temp_st$exp, merge_dims = c('sdate', 'ftime')) +# In this case, the Dates dimensions will be merged too. +# (6) Check the dimensions of Dates: +dim(new_data$attrs$Dates) +# sdate +# 18 + +# NOTE: When we want to merge temporal and other dimensions nature, +# the Dates dimensions are kept as the original. In this case, the function +# returns a Warning Message, we must pay attention! +new_data <- CST_MergeDims(data = lonlat_temp$exp, + merge_dims = c('lat', 'ftime'), + rename_dim = 'newdim') + +#----------------------------------------------------- +# Example 4: Split two dimensions with SplitDim and CST_SplitDim +#----------------------------------------------------- +# In this example, we will start working with the function SplitDim, +# that it can be used to split dimensions of an array. + +# NOTE: Take into account that time dimensions will be treated differently than +# other dimensions: + +# (1) Decadal example: We define an array of consecutive days of different years: +dates <- seq(as.Date("01-01-2000", "%d-%m-%Y", tz = 'UTC'), + as.Date("31-12-2005","%d-%m-%Y", tz = 'UTC'), "day") +dim(dates) <- c(time = 2192) + +# (2) Now, we will split the array in a new 'year' dimension: +dates_year <- SplitDim(dates, indices = dates, + split_dim = 'time', freq = 'year') +dim(dates_year) +# time year +# 366 6 + +# (3) Now, we can try: freq = 'month' and 'day' +dates_month <- SplitDim(dates, indices = dates, + split_dim = 'time', freq = 'month') + +dates_day <- SplitDim(dates, indices = dates, + split_dim = 'time', freq = 'day') + +# (4) Finnally, we need to convert them again from numeric to 'POSIXct': +dates_year <- as.POSIXct(dates_year * 24 * 3600, origin = '1970-01-01', tz = 'UTC') +dates_month <- as.POSIXct(dates_month * 24 * 3600, origin = '1970-01-01', tz = 'UTC') +dates_day <- as.POSIXct(dates_day * 24 * 3600, origin = '1970-01-01', tz = 'UTC') + +#----------------------------------------------------- + +# In the following example, we will use the sample data of the package. We +# will use lonlat_prec_st because it is daily data: + +# NOTE: By Jan 2024, a development is needed regarding updates in other fields +# of the 's2dv_cube' + +# (1) Call the function CST_SplitDim with adding 'day' dimension: +data_day <- CST_SplitDim(lonlat_prec_st, indices = lonlat_prec_st$attrs$Dates[1, ], + split_dim = 'ftime', freq = 'day') +# (2) Explore the dimensions of the data array +dim(data_day$data) +# dataset var member sdate ftime lat lon day +# 1 1 6 3 1 4 4 31 + +# (3) Call the function CST_SplitDim with adding 'month' dimension: +data_month <- CST_SplitDim(lonlat_prec_st, indices = lonlat_prec_st$attrs$Dates[1,], + split_dim = 'ftime', freq = 'month') + +dim(data_month$data) +# dataset var member sdate ftime lat lon month +# 1 1 6 3 31 4 4 1 + +# (4) Call the function CST_SplitDim with adding 'year' dimension: +data_year <- CST_SplitDim(lonlat_prec_st, indices = lonlat_prec_st$attrs$Dates[,1], + split_dim = 'sdate', freq = 'year') +dim(data_year$data) +# dataset var member sdate ftime lat lon year +# 1 1 6 1 31 4 4 3 + +################################################################################ \ No newline at end of file diff --git a/inst/doc/usecase/ex4_subset.R b/inst/doc/usecase/ex4_subset.R new file mode 100644 index 0000000000000000000000000000000000000000..360ca08fc479f42b12d421ce8ffa5d82b8a12e47 --- /dev/null +++ b/inst/doc/usecase/ex4_subset.R @@ -0,0 +1,140 @@ +#******************************************************************************* +# Title: Example script to subset any dimension of an 's2dv_cube' +# Author: Eva Rifà Rovira +# Date: 16/11/2024 +#******************************************************************************* +# This example shows how to subset any dimension of an 's2dv_cube'. To do it, +# we will use the function CST_Subset. This function is the 's2dv_cube' method +# version of Subset from the package ClimProjDiags. +# (1) First we will see how Subset works. +# (2) Then, we will use CST_Subset with an 's2dv_cube' + +# Needed packages: +library(CSTools) +library(ClimProjDiags) + +################################################################################ +#----------------------------------------------------- +# Example 1: Subset an example array +#----------------------------------------------------- +# This is a minimal use case about spatial coordinates subset. + +# (1) We create the array amd we print it: +dat <- array(1:100, dim = c(lat = 10, lon = 10)) +dat +# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] +# [1,] 1 11 21 31 41 51 61 71 81 91 +# [2,] 2 12 22 32 42 52 62 72 82 92 +# [3,] 3 13 23 33 43 53 63 73 83 93 +# [4,] 4 14 24 34 44 54 64 74 84 94 +# [5,] 5 15 25 35 45 55 65 75 85 95 +# [6,] 6 16 26 36 46 56 66 76 86 96 +# [7,] 7 17 27 37 47 57 67 77 87 97 +# [8,] 8 18 28 38 48 58 68 78 88 98 +# [9,] 9 19 29 39 49 59 69 79 89 99 +# [10,] 10 20 30 40 50 60 70 80 90 100 + +# (2) We call the function Subset from ClimProjDiags and we see the result: +dat_subset <- Subset(x = dat, along = c('lat', 'lon'), indices = list(1:5, 1:7), + drop = 'all') +dat_subset +# [,1] [,2] [,3] [,4] [,5] [,6] [,7] +# [1,] 1 11 21 31 41 51 61 +# [2,] 2 12 22 32 42 52 62 +# [3,] 3 13 23 33 43 53 63 +# [4,] 4 14 24 34 44 54 64 +# [5,] 5 15 25 35 45 55 65 + +#----------------------------------------------------- +# Example 2: Subset an 's2dv_cube' using sample data +#----------------------------------------------------- +# In this example we will not drop any dimension, we will select only the first +# member, the first and the second start dates, and also subset the longitude and +# keep only the values from [0, 21]: + +# (1) Explore the sample data: +dat <- lonlat_temp_st$exp + +dat$dims +# dataset var member sdate ftime lat lon +# 1 1 15 6 3 22 53 + +dat +# 's2dv_cube' +# Data [ 279.994110107422, 280.337463378906, 279.450866699219, ... ] +# Dimensions ( dataset = 1, var = 1, member = 15, sdate = 6, ftime = 3, +# lat = 22, lon = 53 ) +# Coordinates +# * dataset : dat1 +# * var : tas +# member : 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +# * sdate : 20001101, 20011101, 20021101, 20031101, 20041101, 20051101 +# ftime : 1, 2, 3 +# * lat : 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, ... +# * lon : 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ... +# Attributes +# Dates : 2000-11-01 2001-11-01 2002-11-01 2003-11-01 2004-11-01 ... +# varName : tas +# metadata : +# lat +# units : degrees_north +# long name : latitude +# lon +# units : degrees_east +# long name : longitude +# ftime +# units : hours since 2000-11-01 00:00:00 +# tas +# units : K +# long name : 2 metre temperature +# Datasets : dat1 +# when : 2023-10-02 10:11:06 +# source_files : /monthly_mean/tas_f6h/tas_20001101.nc ... +# load_parameters : +# ( dat1 ) : dataset = dat1, var = tas, sdate = 20001101 ... +# ... + +# (2) Call the function CST_Subset: +dat_subset <- CST_Subset(x = dat, along = c('member', 'sdate', 'lon'), + indices = list(1, 1:2, 1:22), drop = 'none') + +# (3) Explore the 's2dv_cube' +dat_subset +# 's2dv_cube' +# Data [ 279.994110107422, 277.161102294922, 278.825836181641, 276.8271484375, 276.052703857422, 276.950805664062, 280.677215576172, 277.285247802734 ... ] +# Dimensions ( dataset = 1, var = 1, member = 1, sdate = 2, ftime = 3, lat = 22, lon = 22 ) +# Coordinates +# * dataset : dat1 +# * var : tas +# member : 1 +# * sdate : 20001101, 20011101 +# ftime : 1, 2, 3 +# * lat : 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27 +# * lon : 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 +# Attributes +# Dates : 2000-11-01 2001-11-01 2000-12-01 2001-12-01 2001-01-01 ... +# varName : tas +# metadata : +# ftime +# units : hours since 2000-11-01 00:00:00 +# other : ndims, size, standard_name, calendar +# lat +# units : degrees_north +# long name : latitude +# other : ndims, size, standard_name, axis +# lon +# units : degrees_east +# long name : longitude +# other : ndims, size, standard_name, axis +# tas +# units : K +# long name : 2 metre temperature +# other : prec, dim, unlim, make_missing_value, missval, hasAddOffset, hasScaleFact, code, table +# Datasets : dat1 +# when : 2023-10-02 10:11:06 +# source_files : /esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20001101.nc ... +# load_parameters : +# ( dat1 ) : dataset = dat1, var = tas, sdate = 20001101 ... +# ... + +################################################################################ \ No newline at end of file diff --git a/man/CST_ChangeDimNames.Rd b/man/CST_ChangeDimNames.Rd new file mode 100644 index 0000000000000000000000000000000000000000..86806be048819a4cfec10e20b8c82e7986a4be68 --- /dev/null +++ b/man/CST_ChangeDimNames.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_ChangeDimNames.R +\name{CST_ChangeDimNames} +\alias{CST_ChangeDimNames} +\title{Change the name of one or more dimensions for an object of class s2dv_cube} +\usage{ +CST_ChangeDimNames(data, original_names, new_names) +} +\arguments{ +\item{data}{An object of class \code{s2dv_cube} whose dimension names +should be changed.} + +\item{original_names}{A single character string or a vector indicating the +dimensions to be renamed.} + +\item{new_names}{A single character string or a vector indicating the new +dimension names, in the same order as the dimensions in 'original_names'.} +} +\value{ +An object of class \code{s2dv_cube} with similar data, coordinates and +attributes as the \code{data} input, but with modified dimension names. +} +\description{ +Change the names of the dimensions specified in 'original_names' to the names +in 'new_names'. The coordinate names and the dimensions of any attributes +are also modified accordingly. +} +\examples{ +# Example with sample data: +# Check original dimensions and coordinates +lonlat_temp$exp$dims +names(lonlat_temp$exp$coords) +dim(lonlat_temp$exp$attrs$Dates) +# Change 'dataset' to 'dat' and 'ftime' to 'time' +exp <- CST_ChangeDimNames(lonlat_temp$exp, + original_names = c("dataset", "ftime"), + new_names = c("dat", "time")) +# Check new dimensions and coordinates +exp$dims +names(exp$coords) +dim(exp$attrs$Dates) + +} +\author{ +Agudetse Roures Victoria, \email{victoria.agudetse@bsc.es} +} diff --git a/man/CST_SaveExp.Rd b/man/CST_SaveExp.Rd index 9352e03604b75409efd9764194e12d43a11e7169..c7976bccf0f5e4c69e81cebdae0484a951c8fb25 100644 --- a/man/CST_SaveExp.Rd +++ b/man/CST_SaveExp.Rd @@ -7,15 +7,17 @@ CST_SaveExp( data, destination = "./", + startdates = NULL, sdate_dim = "sdate", ftime_dim = "time", + memb_dim = "member", dat_dim = "dataset", var_dim = "var", - memb_dim = "member", - startdates = NULL, drop_dims = NULL, single_file = FALSE, - extra_string = NULL + extra_string = NULL, + global_attrs = NULL, + units_hours_since = FALSE ) } \arguments{ @@ -23,85 +25,107 @@ CST_SaveExp( \item{destination}{A character string containing the directory name in which to save the data. NetCDF file for each starting date are saved into the -folder tree: \cr -destination/Dataset/variable/. By default the function -creates and saves the data into the working directory.} +folder tree: 'destination/Dataset/variable/'. By default the function +saves the data into the working directory.} + +\item{startdates}{A vector of dates that will be used for the filenames +when saving the data in multiple files (single_file = FALSE). It must be a +vector of the same length as the start date dimension of data. It must be a +vector of class \code{Dates}, \code{'POSIXct'} or character with lenghts +between 1 and 10. If it is NULL, the coordinate corresponding the the start +date dimension or the first Date of each time step will be used as the name +of the files. It is NULL by default.} \item{sdate_dim}{A character string indicating the name of the start date dimension. By default, it is set to 'sdate'. It can be NULL if there is no start date dimension.} \item{ftime_dim}{A character string indicating the name of the forecast time -dimension. By default, it is set to 'time'. It can be NULL if there is no -forecast time dimension.} +dimension. If 'Dates' are used, it can't be NULL. If there is no forecast +time dimension, 'Dates' will be set to NULL and will not be used. By +default, it is set to 'time'.} + +\item{memb_dim}{A character string indicating the name of the member +dimension. It can be NULL if there is no member dimension. By default, it is + set to 'member'.} \item{dat_dim}{A character string indicating the name of dataset dimension. -By default, it is set to 'dataset'. It can be NULL if there is no dataset -dimension.} +It can be NULL if there is no dataset dimension. By default, it is set to +'dataset'.} \item{var_dim}{A character string indicating the name of variable dimension. -By default, it is set to 'var'. It can be NULL if there is no variable -dimension.} - -\item{memb_dim}{A character string indicating the name of the member dimension. -By default, it is set to 'member'. It can be NULL if there is no member -dimension.} - -\item{startdates}{A vector of dates that will be used for the filenames -when saving the data in multiple files. It must be a vector of the same -length as the start date dimension of data. It must be a vector of class -\code{Dates}, \code{'POSIXct'} or character with lenghts between 1 and 10. -If it is NULL, the coordinate corresponding the the start date dimension or -the first Date of each time step will be used as the name of the files. -It is NULL by default.} +It can be NULL if there is no variable dimension. By default, it is set to +'var'.} -\item{drop_dims}{A vector of character strings indicating the dimension names -of length 1 that need to be dropped in order that they don't appear in the -netCDF file. It is NULL by default (optional).} +\item{drop_dims}{(optional) A vector of character strings indicating the +dimension names of length 1 that need to be dropped in order that they don't +appear in the netCDF file. Only is allowed to drop dimensions that are not +used in the computation. The dimensions used in the computation are the ones +specified in: sdate_dim, ftime_dim, dat_dim, var_dim and memb_dim. It is +NULL by default.} \item{single_file}{A logical value indicating if all object is saved in a single file (TRUE) or in multiple files (FALSE). When it is FALSE, -the array is separated for Datasets, variable and start date. It is FALSE -by default.} +the array is separated for datasets, variable and start date. When there are +no specified time dimensions, the data will be saved in a single file by +default. The output file name when 'single_file' is TRUE is a character +string containing: '__.nc'; when it is FALSE, +it is '_.nc'. It is FALSE by default.} + +\item{extra_string}{(Optional) A character string to be included as part of +the file name, for instance, to identify member or realization. When +single_file is TRUE, the 'extra_string' will substitute all the default +file name; when single_file is FALSE, the 'extra_string' will be added +in the file name as: '__.nc'. It is NULL by +default.} + +\item{global_attrs}{(Optional) A list with elements containing the global +attributes to be saved in the NetCDF.} -\item{extra_string}{A character string to be include as part of the file name, -for instance, to identify member or realization. It would be added to the -file name between underscore characters.} +\item{units_hours_since}{(Optional) A logical value only available for the +case: 'Dates' have forecast time and start date dimension, 'single_file' is +TRUE and 'time_bounds' are not used. When it is TRUE, it saves the forecast +time with units of 'hours since'; if it is FALSE, the time units will be a +number of time steps with its corresponding frequency (e.g. n days, n months +or n hours). It is FALSE by default.} } \value{ Multiple or single NetCDF files containing the data array.\cr -\item{\code{single_file = TRUE}}{ +\item{\code{single_file is TRUE}}{ All data is saved in a single file located in the specified destination - path with the following name: - ___.nc. Multiple - variables are saved separately in the same file. The forecast time units - is extracted from the frequency of the time steps (hours, days, months). - The first value of forecast time is 1. If no frequency is found, the units - will be 'hours since' each start date and the time steps are assumed to be - equally spaced. + path with the following name (by default): + '__.nc'. Multiple variables + are saved separately in the same file. The forecast time units + are calculated from each start date (if sdate_dim is not NULL) or from + the time step. If 'units_hours_since' is TRUE, the forecast time units + will be 'hours since '. If 'units_hours_since' is FALSE, + the forecast time units are extracted from the frequency of the time steps + (hours, days, months); if no frequency is found, the units will be ’hours + since’. When the time units are 'hours since' the time ateps are assumed to + be equally spaced. } -\item{\code{single_file = FALSE}}{ +\item{\code{single_file is FALSE}}{ The data array is subset and stored into multiple files. Each file contains the data subset for each start date, variable and dataset. Files - with different variables and Datasets are stored in separated directories - within the following directory tree: destination/Dataset/variable/. - The name of each file will be: - __.nc. + with different variables and datasets are stored in separated directories + within the following directory tree: 'destination/Dataset/variable/'. + The name of each file will be by default: '_.nc'. + The forecast time units are calculated from each start date (if sdate_dim + is not NULL) or from the time step. The forecast time units will be 'hours + since '. } } \description{ This function allows to divide and save a object of class 's2dv_cube' into a NetCDF file, allowing to reload the saved data using -\code{Start} function from StartR package. If the original 's2dv_cube' object -has been created from \code{CST_Load()}, then it can be reloaded with -\code{Load()}. +\code{CST_Start} or \code{CST_Load} functions. It also allows to save any +'s2dv_cube' object that follows the NetCDF attributes conventions. } \examples{ \dontrun{ data <- lonlat_temp_st$exp -destination <- "./" -CST_SaveExp(data = data, destination = destination, ftime_dim = 'ftime', - var_dim = 'var', dat_dim = 'dataset') +CST_SaveExp(data = data, ftime_dim = 'ftime', var_dim = 'var', + dat_dim = 'dataset', sdate_dim = 'sdate') } } diff --git a/man/CST_SplitDim.Rd b/man/CST_SplitDim.Rd index b07d9897ceac08db2b876ffa67d30322c87d34b5..4b55d6da3b2a761be6deee2519c26bff146cc785 100644 --- a/man/CST_SplitDim.Rd +++ b/man/CST_SplitDim.Rd @@ -10,14 +10,17 @@ CST_SplitDim( indices = NULL, freq = "monthly", new_dim_name = NULL, - insert_ftime = NULL + insert_ftime = NULL, + ftime_dim = "time", + sdate_dim = "sdate", + return_indices = FALSE ) } \arguments{ \item{data}{A 's2dv_cube' object} \item{split_dim}{A character string indicating the name of the dimension to -split.} +split. It is set as 'time' by default.} \item{indices}{A vector of numeric indices or dates. If left at NULL, the dates provided in the s2dv_cube object (element Dates) will be used.} @@ -32,6 +35,15 @@ dimension.} \item{insert_ftime}{An integer indicating the number of time steps to add at the begining of the time series.} + +\item{ftime_dim}{A character string indicating the name of the forecast time +dimension. It is set as 'time' by default.} + +\item{sdate_dim}{A character string indicating the name of the start date +dimension. It is set as 'sdate' by default.} + +\item{return_indices}{A logical value that if it is TRUE, the indices +used in splitting the dimension will be returned. It is FALSE by default.} } \description{ This function split a dimension in two. The user can select the diff --git a/man/SaveExp.Rd b/man/SaveExp.Rd index c690d97edf7d4aab44d38b529cf0fcaf86aeb8db..6ec767a06471eb85ddafd07c2b5f9e5fc73a84fa 100644 --- a/man/SaveExp.Rd +++ b/man/SaveExp.Rd @@ -7,20 +7,23 @@ SaveExp( data, destination = "./", - Dates = NULL, coords = NULL, + Dates = NULL, + time_bounds = NULL, + startdates = NULL, varname = NULL, metadata = NULL, Datasets = NULL, - startdates = NULL, - dat_dim = "dataset", sdate_dim = "sdate", ftime_dim = "time", - var_dim = "var", memb_dim = "member", + dat_dim = "dataset", + var_dim = "var", drop_dims = NULL, single_file = FALSE, - extra_string = NULL + extra_string = NULL, + global_attrs = NULL, + units_hours_since = FALSE ) } \arguments{ @@ -29,15 +32,30 @@ SaveExp( \item{destination}{A character string indicating the path where to store the NetCDF files.} -\item{Dates}{A named array of dates with the corresponding sdate and forecast -time dimension.} - \item{coords}{A named list with elements of the coordinates corresponding to the dimensions of the data parameter. The names and length of each element must correspond to the names of the dimensions. If any coordinate is not provided, it is set as an index vector with the values from 1 to the length of the corresponding dimension.} +\item{Dates}{A named array of dates with the corresponding sdate and forecast +time dimension. If there is no sdate_dim, you can set it to NULL. +It must have ftime_dim dimension.} + +\item{time_bounds}{(Optional) A list of two arrays of dates containing +the lower (first array) and the upper (second array) time bounds +corresponding to Dates. Each array must have the same dimensions as Dates. +If 'Dates' parameter is NULL, 'time_bounds' are not used. It is NULL by +default.} + +\item{startdates}{A vector of dates that will be used for the filenames +when saving the data in multiple files (single_file = FALSE). It must be a +vector of the same length as the start date dimension of data. It must be a +vector of class \code{Dates}, \code{'POSIXct'} or character with lenghts +between 1 and 10. If it is NULL, the coordinate corresponding the the start +date dimension or the first Date of each time step will be used as the name +of the files. It is NULL by default.} + \item{varname}{A character string indicating the name of the variable to be saved.} @@ -48,17 +66,6 @@ lists for each variable.} \item{Datasets}{A vector of character string indicating the names of the datasets.} -\item{startdates}{A vector of dates that will be used for the filenames -when saving the data in multiple files. It must be a vector of the same -length as the start date dimension of data. It must be a vector of class -\code{Dates}, \code{'POSIXct'} or character with lenghts between 1 and 10. -If it is NULL, the first Date of each time step will be used as the name of -the files. It is NULL by default.} - -\item{dat_dim}{A character string indicating the name of dataset dimension. -By default, it is set to 'dataset'. It can be NULL if there is no dataset -dimension.} - \item{sdate_dim}{A character string indicating the name of the start date dimension. By default, it is set to 'sdate'. It can be NULL if there is no start date dimension.} @@ -67,46 +74,74 @@ start date dimension.} dimension. By default, it is set to 'time'. It can be NULL if there is no forecast time dimension.} -\item{var_dim}{A character string indicating the name of variable dimension. -By default, it is set to 'var'. It can be NULL if there is no variable +\item{memb_dim}{A character string indicating the name of the member +dimension. By default, it is set to 'member'. It can be NULL if there is no +member dimension.} + +\item{dat_dim}{A character string indicating the name of dataset dimension. +By default, it is set to 'dataset'. It can be NULL if there is no dataset dimension.} -\item{memb_dim}{A character string indicating the name of the member dimension. -By default, it is set to 'member'. It can be NULL if there is no member +\item{var_dim}{A character string indicating the name of variable dimension. +By default, it is set to 'var'. It can be NULL if there is no variable dimension.} -\item{drop_dims}{A vector of character strings indicating the dimension names -of length 1 that need to be dropped in order that they don't appear in the -netCDF file. It is NULL by default (optional).} +\item{drop_dims}{(optional) A vector of character strings indicating the +dimension names of length 1 that need to be dropped in order that they don't +appear in the netCDF file. Only is allowed to drop dimensions that are not +used in the computation. The dimensions used in the computation are the ones +specified in: sdate_dim, ftime_dim, dat_dim, var_dim and memb_dim. It is +NULL by default.} \item{single_file}{A logical value indicating if all object is saved in a -unique file (TRUE) or in separated directories (FALSE). When it is FALSE, -the array is separated for Datasets, variable and start date. It is FALSE -by default (optional).} - -\item{extra_string}{A character string to be include as part of the file name, -for instance, to identify member or realization. It would be added to the -file name between underscore characters (optional).} +single file (TRUE) or in multiple files (FALSE). When it is FALSE, +the array is separated for datasets, variable and start date. When there are +no specified time dimensions, the data will be saved in a single file by +default. The output file name when 'single_file' is TRUE is a character +string containing: '__.nc'; when it is FALSE, +it is '_.nc'. It is FALSE by default.} + +\item{extra_string}{(Optional) A character string to be included as part of +the file name, for instance, to identify member or realization. When +single_file is TRUE, the 'extra_string' will substitute all the default +file name; when single_file is FALSE, the 'extra_string' will be added +in the file name as: '__.nc'. It is NULL by +default.} + +\item{global_attrs}{(Optional) A list with elements containing the global +attributes to be saved in the NetCDF.} + +\item{units_hours_since}{(Optional) A logical value only available for the +case: Dates have forecast time and start date dimension, single_file is +TRUE and 'time_bounds' is NULL. When it is TRUE, it saves the forecast time +with units of 'hours since'; if it is FALSE, the time units will be a number +of time steps with its corresponding frequency (e.g. n days, n months or n +hours). It is FALSE by default.} } \value{ Multiple or single NetCDF files containing the data array.\cr -\item{\code{single_file = TRUE}}{ +\item{\code{single_file is TRUE}}{ All data is saved in a single file located in the specified destination - path with the following name: - ___.nc. Multiple - variables are saved separately in the same file. The forecast time units - is extracted from the frequency of the time steps (hours, days, months). - The first value of forecast time is 1. If no frequency is found, the units - will be 'hours since' each start date and the time steps are assumed to be - equally spaced. + path with the following name (by default): + '__.nc'. Multiple variables + are saved separately in the same file. The forecast time units + are calculated from each start date (if sdate_dim is not NULL) or from + the time step. If 'units_hours_since' is TRUE, the forecast time units + will be 'hours since '. If 'units_hours_since' is FALSE, + the forecast time units are extracted from the frequency of the time steps + (hours, days, months); if no frequency is found, the units will be ’hours + since’. When the time units are 'hours since' the time ateps are assumed to + be equally spaced. } -\item{\code{single_file = FALSE}}{ +\item{\code{single_file is FALSE}}{ The data array is subset and stored into multiple files. Each file contains the data subset for each start date, variable and dataset. Files - with different variables and Datasets are stored in separated directories - within the following directory tree: destination/Dataset/variable/. - The name of each file will be: - __.nc. + with different variables and datasets are stored in separated directories + within the following directory tree: 'destination/Dataset/variable/'. + The name of each file will be by default: '_.nc'. + The forecast time units are calculated from each start date (if sdate_dim + is not NULL) or from the time step. The forecast time units will be 'hours + since '. } } \description{ @@ -124,12 +159,10 @@ coords <- list(lon = lon, lat = lat) Datasets <- lonlat_temp_st$exp$attrs$Datasets varname <- 'tas' Dates <- lonlat_temp_st$exp$attrs$Dates -destination = './' metadata <- lonlat_temp_st$exp$attrs$Variable$metadata -SaveExp(data = data, destination = destination, coords = coords, - Datasets = Datasets, varname = varname, Dates = Dates, - metadata = metadata, single_file = TRUE, ftime_dim = 'ftime', - var_dim = 'var', dat_dim = 'dataset') +SaveExp(data = data, coords = coords, Datasets = Datasets, varname = varname, + Dates = Dates, metadata = metadata, single_file = TRUE, + ftime_dim = 'ftime', var_dim = 'var', dat_dim = 'dataset') } } diff --git a/man/SplitDim.Rd b/man/SplitDim.Rd index a0dc8bc69bc65b0e729491a0e27c132f2854ee76..b0785b58a584cacfa67b9ca5ab4388c1fad0d92e 100644 --- a/man/SplitDim.Rd +++ b/man/SplitDim.Rd @@ -9,7 +9,9 @@ SplitDim( split_dim = "time", indices, freq = "monthly", - new_dim_name = NULL + new_dim_name = NULL, + dates = NULL, + return_indices = FALSE ) } \arguments{ @@ -28,6 +30,13 @@ the length in which to subset the dimension.} \item{new_dim_name}{A character string indicating the name of the new dimension.} + +\item{dates}{An optional parameter containing an array of dates of class +'POSIXct' with the corresponding time dimensions of 'data'. It is NULL +by default.} + +\item{return_indices}{A logical value that if it is TRUE, the indices +used in splitting the dimension will be returned. It is FALSE by default.} } \description{ This function split a dimension in two. The user can select the diff --git a/tests/testthat/test-CST_ChangeDimNames.R b/tests/testthat/test-CST_ChangeDimNames.R new file mode 100644 index 0000000000000000000000000000000000000000..ceef7375df0fc3ba4f427de326076ee2bab7cb5c --- /dev/null +++ b/tests/testthat/test-CST_ChangeDimNames.R @@ -0,0 +1,95 @@ +############################################## +test_that("1. Input checks", { + expect_error( + CST_ChangeDimNames(1), + "Parameter 'data' must be an object of class 's2dv_cube'." + ) + expect_error( + CST_ChangeDimNames(lonlat_prec_st, 1, 'bbb'), + paste0("Parameter 'original_names' must be a character string or a ", + "vector of character strings.") + ) + expect_error( + CST_ChangeDimNames(lonlat_prec_st, 'aaa', 1), + paste0("Parameter 'new_names' must be a character string or a ", + "vector of character strings.") + ) + expect_error( + CST_ChangeDimNames(lonlat_prec_st, 'aaa', c('aaa', 'bbb')), + paste0("The number of dimension names in 'new_names' must be the same ", + "as in 'original_names'.") + ) + expect_error( + CST_ChangeDimNames(lonlat_prec_st, 'aaa', 'bbb'), + paste0("Some of the dimensions in 'original_names' could not be found in ", + "'data'.") + ) +}) +############################################## +test_that("2. Output checks", { + exp <- CST_ChangeDimNames(lonlat_temp_st$exp, + original_names = c("lon", 'ftime', 'sdate'), + new_names = c("lons", 'ftimes', 'sdates')) + # dims + expect_equal( + dim(exp$data), + c(dataset = 1, var = 1, member = 15, sdates = 6, ftimes = 3, lat = 22, lons = 53) + ) + expect_equal( + exp$dims, + dim(exp$data) + ) + expect_equal( + as.vector(exp$data), + as.vector(lonlat_temp_st$exp$data) + ) + # coords + expect_equal( + names(exp$coords), + c("dataset", "var", "member", "sdates", "ftimes", "lat", "lons") + ) + # dim Dates + expect_equal( + dim(exp$attrs$Dates), + c(sdates = 6, ftimes = 3) + ) + # variable metadata + expect_equal( + names(exp$attrs$Variable$metadata), + c("lat", "lons", "ftimes", "tas" ) + ) + # source_files + expect_equal( + dim(exp$attrs$source_files), + c(dataset = 1, var = 1, sdates = 6) + ) + # Dates 'dim' attribute + dat <- CST_ChangeDimNames(lonlat_prec, + original_names = c("lon", 'ftime', 'sdate', 'member'), + new_names = c("lons", 'ftimes', 'sdates', 'members')) + expect_equal( + as.vector(lonlat_prec$data), + as.vector(dat$data) + ) + expect_equal( + attributes(dat$attrs$Dates)$dim, + c(ftimes = 31, sdates = 3) + ) + expect_equal( + attributes(exp$attrs$Dates)$dim, + c(sdates = 6, ftimes = 3) + ) + expect_equal( + as.vector(dat$attrs$Dates), + as.vector(lonlat_prec$attrs$Dates) + ) + # attribute dimensions + expect_equal( + attributes(dat$data)$dimensions, + names(dim(dat$data)) + ) + expect_equal( + attributes(exp$data)$dimensions, + NULL + ) +}) \ No newline at end of file diff --git a/tests/testthat/test-CST_MergeDims.R b/tests/testthat/test-CST_MergeDims.R index f7eac6acf5369988d1bdff0f0a38921810b8f215..a99d5eba65e56aec072727dd582230f5685d34f3 100644 --- a/tests/testthat/test-CST_MergeDims.R +++ b/tests/testthat/test-CST_MergeDims.R @@ -1,65 +1,96 @@ ############################################## -test_that("Sanity checks", { +# data1 +data1 <- list(data = 1:10) +class(data1) <- 's2dv_cube' + +# data2 +data <- 1 : 20 +dim(data) <- c(time = 20) +data2 <- list(data = data) +data2$dims <- dim(data) +data2$coords <- list(time = 1:20) +attr(data2$coords$time, 'indices') <- TRUE +class(data2) <- 's2dv_cube' + +# exp +exp <- 1 : 20 +dim(exp) <- c(time = 10, lat = 2) +exp <- list(data = exp) +class(exp) <- 's2dv_cube' + +# data3 +data3 <- data2 +names(dim(data3$data)) <- 'Dim1' +data3$dims <- dim(data3$data) +names(data3$coords) <- 'Dim1' + +############################################## + +test_that("1. Sanity checks", { expect_error( CST_MergeDims(data = 1), - paste0("Parameter 'data' must be of the class 's2dv_cube'.")) -data <- list(data = 1:10) -class(data) <- 's2dv_cube' + paste0("Parameter 'data' must be of the class 's2dv_cube'.") + ) expect_error( - CST_MergeDims(data = data), - paste0("Parameter 'data' must have dimensions.")) + CST_MergeDims(data = data1), + paste0("Parameter 'data' must have dimensions.") + ) - data <- 1 : 20 - dim(data) <- c(time = 20) - data <- list(data = data) - class(data) <- 's2dv_cube' expect_error( - CST_MergeDims(data = data), - "Parameter 'merge_dims' must match with dimension names in parameter 'data'.") + CST_MergeDims(data = data2), + "Parameter 'merge_dims' must match with dimension names in parameter 'data'." + ) expect_error( - CST_MergeDims(data = data, merge_dims = 1), - paste0("Parameter 'merge_dims' must be a character vector indicating the names", - " of the dimensions to be merged.")) + CST_MergeDims(data = data2, merge_dims = 1), + paste0("Parameter 'merge_dims' must be a character vector indicating the ", + "names of the dimensions to be merged.") + ) expect_error( - CST_MergeDims(data = data, merge_dims = 'time'), - "Parameter 'merge_dims' must be of length two.") + CST_MergeDims(data = data2, merge_dims = 'time'), + "Parameter 'merge_dims' must be of length two." + ) expect_error( - CST_MergeDims(data = data, merge_dims = c('time', 'sdates')), + CST_MergeDims(data = data2, merge_dims = c('time', 'sdates')), paste0("Parameter 'merge_dims' must match with dimension ", - "names in parameter 'data'.")) + "names in parameter 'data'.") + ) +}) + +############################################## - exp <- 1 : 20 - dim(exp) <- c(time = 10, lat = 2) - exp <- list(data = exp) - class(exp) <- 's2dv_cube' +test_that("2. Output checks", { expect_equal( - CST_MergeDims(data = exp, merge_dims = c('time', 'lat')), data) - + CST_MergeDims(data = exp, merge_dims = c('time', 'lat')), + data2 + ) expect_warning( CST_MergeDims(data = exp, merge_dims = c('time', 'lat', 'lon')), paste0("Only two dimensions can be merge, only the first two dimension", " will be used. To merge further dimensions consider to use this ", - "function multiple times.")) + "function multiple times.") + ) expect_warning( - CST_MergeDims(data = exp, merge_dims = c('time', 'lat'), rename_dim = c('lat', 'lon')), + CST_MergeDims(data = exp, merge_dims = c('time', 'lat'), + rename_dim = c('lat', 'lon')), paste0("Parameter 'rename_dim' has length greater than 1 and only the ", - "first element will be used.")) - names(dim(data$data)) <- 'Dim1' + "first element will be used.") + ) expect_equal( - CST_MergeDims(data = exp, merge_dims = c('time', 'lat'), rename_dim = 'Dim1'), - data) - + CST_MergeDims(data = exp, merge_dims = c('time', 'lat'), + rename_dim = 'Dim1'), + data3 + ) expect_equal( CST_MergeDims(data = exp, merge_dims = c('time', 'lat'), - rename_dim = 'Dim1', na.rm = TRUE), data) - - exp$data[1,] <- NA - data <- c(2 : 10, 12 : 20) - dim(data) <- c(Dim1 = 18) - data <- list(data = data) - class(data) <- 's2dv_cube' + rename_dim = 'Dim1', na.rm = TRUE), + data3 + ) expect_equal( CST_MergeDims(data = exp, merge_dims = c('time', 'lat'), - rename_dim = 'Dim1', na.rm = TRUE), data) + rename_dim = 'Dim1', na.rm = TRUE), + data3 + ) }) + +############################################## \ No newline at end of file diff --git a/tests/testthat/test-CST_SaveExp.R b/tests/testthat/test-CST_SaveExp.R index f39dffe9e147739b101725af556378572cd8db71..385d2793e9821c43b14f5c1d8e898778d6dca865 100644 --- a/tests/testthat/test-CST_SaveExp.R +++ b/tests/testthat/test-CST_SaveExp.R @@ -31,8 +31,10 @@ cube3 <- cube1 # dat0 dates0 <- as.Date('2022-02-01', format = "%Y-%m-%d") dim(dates0) <- c(sdate = 1) + # dat1 dat1 <- array(1, dim = c(test = 1)) + # dat2 dat2 <- array(1:5, dim = c(sdate = 5, lon = 4, lat = 4, ftime = 1)) coords2 <- list(sdate = c('20000101', '20010102', '20020103', '20030104', '20040105'), @@ -43,6 +45,31 @@ dates2 <- c('20000101', '20010102', '20020103', '20030104', '20040105') dates2 <- as.Date(dates2, format = "%Y%m%d", tz = "UTC") dim(dates2) <- c(sdate = 5, ftime = 1) +# dat3 (without sdate dim) +dat3 <- array(1:5, dim = c(lon = 4, lat = 4, ftime = 2)) +coords3 <- list(sdate = c('20000101', '20010102'), + var = 'tas', + lon = 1.:4., + lat = 1.:4.) +dates3 <- c('20000101', '20010102') +dates3 <- as.Date(dates3, format = "%Y%m%d", tz = "UTC") +dim(dates3) <- c(ftime = 2) + +# dat4 (without ftime dim) +dat4 <- array(1:5, dim = c(sdate = 2, lon = 4, lat = 4)) +coords4 <- list(sdate = c('20000101', '20010102'), + var = 'tas', + lon = 1.:4., + lat = 1.:4.) +dates4 <- c('20000101', '20010102') +dates4 <- as.Date(dates4, format = "%Y%m%d", tz = "UTC") +dim(dates4) <- c(sdate = 2) + +# dates5 (Dates with extra dimensions) +dates5 <- c('20000101', '20010102', '20010102', '20010102') +dates5 <- as.Date(dates5, format = "%Y%m%d", tz = "UTC") +dim(dates5) <- c(ftime = 2, test = 1, test2 = 2) + ############################################## test_that("1. Input checks: CST_SaveExp", { @@ -63,14 +90,6 @@ test_that("1. Input checks: CST_SaveExp", { CST_SaveExp(data = cube0), paste0("Level 'attrs' must be a list with at least 'Dates' element.") ) - # cube0$attrs <- NULL - # cube0$attrs$Dates <- dates2 - # expect_warning( - # CST_SaveExp(data = cube0, sdate_dim = c('sdate', 'sweek'), - # ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, - # var_dim = NULL, single_file = FALSE), - # paste0("Element 'coords' not found. No coordinates will be used.") - # ) # sdate_dim suppressWarnings( @@ -79,24 +98,18 @@ test_that("1. Input checks: CST_SaveExp", { paste0("Parameter 'sdate_dim' must be a character string.") ) ) - # expect_warning( - # CST_SaveExp(data = cube1, sdate_dim = c('sdate', 'sweek'), - # ftime_dim = 'ftime', memb_dim = NULL, dat_dim = NULL, - # var_dim = NULL), - # paste0("Parameter 'sdate_dim' has length greater than 1 and ", - # "only the first element will be used.") - # ) suppressWarnings( expect_error( CST_SaveExp(data = cube1, sdate_dim = 'a', ftime_dim = 'ftime'), paste0("Parameter 'sdate_dim' is not found in 'data' dimension.") ) ) - # # startdates + # startdates # expect_warning( # CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, # dat_dim = NULL, var_dim = NULL, startdates = 1), - # "Parameter 'startdates' is not a character string, it will not be used." + # paste0("Parameter 'startdates' doesn't have the same length ", + # "as dimension 'sdate', it will not be used.") # ) # expect_warning( # CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, @@ -104,30 +117,6 @@ test_that("1. Input checks: CST_SaveExp", { # paste0("Parameter 'startdates' doesn't have the same length ", # "as dimension '", 'sdate',"', it will not be used.") # ) - # # metadata - # expect_warning( - # CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, - # dat_dim = NULL, var_dim = NULL), - # paste0("No metadata found in element Variable from attrs.") - # ) - cube1$attrs$Variable$metadata <- 'metadata' - expect_error( - CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, - dat_dim = NULL, var_dim = NULL), - paste0("Element metadata from Variable element in attrs must be a list.") - ) - cube1$attrs$Variable$metadata <- list(test = 'var') - # expect_warning( - # CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, - # dat_dim = NULL, var_dim = NULL), - # paste0("Metadata is not found for any coordinate.") - # ) - cube1$attrs$Variable$metadata <- list(var = 'var') - # expect_warning( - # CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, - # dat_dim = NULL, var_dim = NULL), - # paste0("Metadata is not found for any variable.") - # ) # memb_dim suppressWarnings( expect_error( @@ -142,6 +131,13 @@ test_that("1. Input checks: CST_SaveExp", { "as NULL if there is no member dimension.") ) ) + # metadata + cube1$attrs$Variable$metadata <- 'metadata' + expect_error( + CST_SaveExp(data = cube1, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL), + paste0("Element metadata from Variable element in attrs must be a list.") + ) }) ############################################## @@ -166,15 +162,17 @@ test_that("1. Input checks", { ) # Dates expect_error( - SaveExp(data = array(1, dim = c(a = 1)), Dates = 'a'), + SaveExp(data = array(1, dim = c(a = 1)), Dates = 'a', sdate_dim = NULL, + memb_dim = NULL, ftime_dim = 'a', dat_dim = NULL, var_dim = NULL), paste0("Parameter 'Dates' must be of 'POSIXct' or 'Dates' class.") ) expect_error( - SaveExp(data = array(1, dim = c(a = 1)), + SaveExp(data = array(1, dim = c(time = 1, sdate = 1, member = 1)), + dat_dim = NULL, var_dim = NULL, Dates = as.Date('2022-02-01', format = "%Y-%m-%d")), paste0("Parameter 'Dates' must have dimension names.") ) - # # drop_dims + # drop_dims # expect_warning( # SaveExp(data = dat2, coords = coords2, # metadata = list(tas = list(level = '2m')), @@ -199,14 +197,15 @@ test_that("1. Input checks", { # paste0("Parameter 'drop_dims' can only contain dimension names ", # "that are of length 1. It will not be used.") # ) - # # varname # expect_warning( # SaveExp(data = dat2, coords = coords2, # metadata = list(tas = list(level = '2m')), # Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, - # dat_dim = NULL, var_dim = NULL), - # paste0("Parameter 'varname' is NULL. It will be assigned to 'X'.") + # dat_dim = NULL, var_dim = NULL, drop_dims = 'ftime'), + # paste0("Parameter 'drop_dims' contains dimensions used in the ", + # "computation. It will not be used.") # ) + # varname suppressWarnings( expect_error( SaveExp(data = dat2, coords = coords2, varname = 1, @@ -215,30 +214,67 @@ test_that("1. Input checks", { "Parameter 'varname' must be a character." ) ) - # # coords - # expect_warning( - # SaveExp(data = dat2, coords = list(sdate = coords2[[1]]), - # varname = 'tas', metadata = list(tas = list(level = '2m')), - # Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, - # dat_dim = NULL, var_dim = NULL), - # "Coordinate 'lon' is not provided and it will be set as index in element coords.", - # "Coordinate 'lat' is not provided and it will be set as index in element coords.", - # "Coordinate 'ftime' is not provided and it will be set as index in element coords." - # ) - # # varname, metadata, spatial coords, unknown dim - # expect_warning( - # SaveExp(data = dat1, ftime_dim = NULL, sdate_dim = NULL, memb_dim = NULL, - # dat_dim = NULL, var_dim = NULL, single_file = TRUE), - # "Parameter 'varname' is NULL. It will be assigned to 'X'.", - # "Parameter 'metadata' is not provided so the metadata saved will be incomplete.", - # paste0("Spatial coordinates not found.") - # ) + # varname, metadata, spatial coords, unknown dim expect_error( SaveExp(data = dat1, varname = 1, ftime_dim = NULL, sdate_dim = NULL, memb_dim = NULL, dat_dim = NULL, var_dim = NULL), paste0("Parameter 'varname' must be a character string with the ", "variable names.") ) + # ftime_dim + expect_error( + SaveExp(data = dat4, coords = coords4, + metadata = list(tas = list(level = '2m')), + Dates = dates4, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL), + paste0("Parameter 'ftime_dim' is not found in 'data' dimension.") + ) + # Dates dimension check + # expect_warning( + # SaveExp(data = dat4, coords = coords4, + # metadata = list(tas = list(level = '2m')), + # Dates = NULL, ftime_dim = NULL, memb_dim = NULL, + # dat_dim = NULL, var_dim = NULL), + # paste0("Dates must be provided if 'data' must be saved in separated files. ", + # "All data will be saved in a single file.") + # ) + # Without ftime and sdate + expect_error( + SaveExp(data = dat3, coords = coords3, + metadata = list(tas = list(level = '2m')), + Dates = dates5, ftime_dim = 'ftime', memb_dim = NULL, + dat_dim = NULL, var_dim = NULL, sdate_dim = NULL), + paste0("Parameter 'Dates' can have only 'sdate_dim' and 'ftime_dim' ", + "dimensions of length greater than 1.") + ) + # expect_warning( + # SaveExp(data = dat2, coords = coords2, + # metadata = list(tas = list(level = '2m')), + # startdates = c(paste(1:11, collapse = '')), + # Dates = dates2, ftime_dim = 'ftime', memb_dim = NULL, + # dat_dim = NULL, var_dim = NULL, sdate_dim = 'sdate'), + # paste0("Parameter 'startdates' should be a character string containing ", + # "the start dates in the format 'yyyy-mm-dd', 'yyyymmdd', 'yyyymm', ", + # "'POSIXct' or 'Dates' class. Files will be named with Dates instead.") + # ) + # expect_warning( + # SaveExp(data = dat2, coords = coords2, + # metadata = list(tas = list(level = '2m')), + # Dates = NULL, ftime_dim = 'ftime', memb_dim = NULL, + # dat_dim = NULL, var_dim = NULL, sdate_dim = 'sdate'), + # paste0("Dates must be provided if 'data' must be saved in separated files. ", + # "All data will be saved in a single file.") + # ) + # (dat3) Without sdate_dim + # expect_warning( + # SaveExp(data = dat3, coords = coords3, + # metadata = list(tas = list(level = '2m')), + # Dates = NULL, ftime_dim = 'ftime', memb_dim = NULL, + # dat_dim = NULL, var_dim = NULL, sdate_dim = NULL, + # extra_string = 'nosdate3.nc', single_file = FALSE), + # paste0("Dates must be provided if 'data' must be saved in separated files. ", + # "All data will be saved in a single file.") + # ) }) ############################################## diff --git a/tests/testthat/test-CST_SplitDim.R b/tests/testthat/test-CST_SplitDim.R index 45e2b1a89cf9895241f2181ea3ea324a2700fc23..f8ad88ee09329e6106b630359643641d79b65f63 100644 --- a/tests/testthat/test-CST_SplitDim.R +++ b/tests/testthat/test-CST_SplitDim.R @@ -10,6 +10,7 @@ indices1 <- c(rep(1,5), rep(2,5), rep (3, 5), rep(4, 5)) output1 <- matrix(data1$data, nrow = 5, ncol = 4) names(dim(output1)) <- c('time', 'monthly') output1 <- list(data = output1) +output1$dims <- dim(output1$data) class(output1) <- 's2dv_cube' exp_cor <- 1 : 20 @@ -21,6 +22,7 @@ class(exp_cor) <- 's2dv_cube' output2 <- matrix(data1$data, nrow = 5, ncol = 4) names(dim(output2)) <- c('time', 'index') output2 <- list(data = output2) +output2$dims <- dim(output2$data) class(output2) <- 's2dv_cube' time2 <- c(seq(ISOdate(1903, 1, 1), ISOdate(1903, 1, 4), "days"), @@ -41,6 +43,7 @@ output3 <- c(data3$data, rep(NA, 4)) dim(output3) <- c(time = 8, monthly = 3) result3 <- data3 result3$data <- output3 +result3$dims <- dim(result3$data) # dat4 data4 <- list(data = array(rnorm(10), dim = c(sdate = 2, lon = 5))) @@ -92,38 +95,62 @@ test_that("2. Output checks", { ############################################## -# test_that("3. Output checks: sample data", { -# output <- lonlat_temp$exp$data -# output <- abind(output[, , , 1, ,], output[, , , 2, ,], output[, , , 3, ,], along = 5) -# dim(output) <- c(dataset = 1, member = 15, sdate = 6, ftime = 1, -# lat = 22, lon = 53, monthly = 3) -# result <- lonlat_temp$exp -# result$data <- output -# expect_equal( -# CST_SplitDim(data = lonlat_temp$exp, split_dim = 'ftime'), -# result -# ) -# expect_equal( -# dim(CST_SplitDim(data = lonlat_temp$exp, split_dim = 'member', -# freq = 5)$data), -# c(dataset = 1, member = 5, sdate = 6, ftime = 3, lat = 22, -# lon = 53, index = 3) -# ) -# expect_warning( -# CST_SplitDim(data = lonlat_temp$exp, split_dim = 'member', freq = 5, -# new_dim_name = c('a', 'b')), -# paste0("Parameter 'new_dim_name' has length greater than 1 ", -# "and only the first elemenst is used.") -# ) -# expect_error( -# CST_SplitDim(data = lonlat_temp$exp, split_dim = 'member', freq = 5, -# new_dim_name = 3), -# "Parameter 'new_dim_name' must be character string" -# ) -# expect_equal( -# dim(CST_SplitDim(data = lonlat_temp$exp, split_dim = 'member', -# freq = 5, new_dim_name = 'wt')$data), -# c(dataset = 1, member = 5, sdate = 6, ftime = 3, lat = 22, -# lon = 53, wt = 3) -# ) -# }) +test_that("3. Output checks: sample data", { + output <- lonlat_temp$exp$data + output <- abind(output[, , , 1, ,], output[, , , 2, ,], output[, , , 3, ,], along = 5) + dim(output) <- c(dataset = 1, member = 15, sdate = 6, ftime = 1, + lat = 22, lon = 53, monthly = 3) + result <- lonlat_temp$exp + result$data <- output + result$attrs$Dates <- s2dv::Reorder(result$attrs$Dates, c('sdate', 'ftime')) + dim(result$attrs$Dates) <- c(ftime = 1, sdate = 6, monthly = 3) + result$dims <- c(dataset = 1, member = 15, sdate = 6, ftime = 1, lat = 22, + lon = 53, monthly = 3) + attributes(result$attrs$Dates)$end <- NULL + expect_equal( + CST_SplitDim(data = lonlat_temp$exp, split_dim = 'ftime', ftime_dim = 'ftime'), + result + ) + expect_equal( + dim(CST_SplitDim(data = lonlat_temp$exp, split_dim = 'member', + freq = 5)$data), + c(dataset = 1, member = 5, sdate = 6, ftime = 3, lat = 22, + lon = 53, index = 3) + ) + expect_warning( + CST_SplitDim(data = lonlat_temp$exp, split_dim = 'member', freq = 5, + new_dim_name = c('a', 'b')), + paste0("Parameter 'new_dim_name' has length greater than 1 ", + "and only the first elemenst is used.") + ) + expect_error( + CST_SplitDim(data = lonlat_temp$exp, split_dim = 'member', freq = 5, + new_dim_name = 3), + "Parameter 'new_dim_name' must be character string" + ) + expect_equal( + dim(CST_SplitDim(data = lonlat_temp$exp, split_dim = 'member', + freq = 5, new_dim_name = 'wt')$data), + c(dataset = 1, member = 5, sdate = 6, ftime = 3, lat = 22, + lon = 53, wt = 3) + ) +}) + +############################################## + +test_that("4. Output checks II", { + res <- CST_SplitDim(lonlat_prec_st, indices = lonlat_prec_st$attrs$Dates[,1], + split_dim = 'sdate', freq = 'year', return_indices = T) + expect_equal( + names(res), + c('data', 'indices') + ) + expect_equal( + res$dims, + dim(res$data) + ) + expect_equal( + all(names(dim(res$data$attrs$Dates)) %in% names(res$data$dims)), + TRUE + ) +}) diff --git a/tests/testthat/test-CST_Subset.R b/tests/testthat/test-CST_Subset.R index 9fc04b48408c654e40c13f3537a5daff387261f9..2a4fdd1f329f91ea3491b2f0f48e184ec635607f 100644 --- a/tests/testthat/test-CST_Subset.R +++ b/tests/testthat/test-CST_Subset.R @@ -50,6 +50,26 @@ test_that("2. Output checks: CST_Subset", { names(res1$coords), c("member", "ftime", "lon") ) + ## lat + expect_equal( + res1$coords$lat, + NULL + ) + ## lon + expect_equal( + as.vector(res1$coords$lon), + c(6, 7) + ) + ## sdate + expect_equal( + res1$coords$sdate, + NULL + ) + ## member + expect_equal( + as.vector(res1$coords$member), + c(1,2) + ) # Check attrs expect_equal( names(res1$attrs), diff --git a/vignettes/RainFARM_vignette.Rmd b/vignettes/RainFARM_vignette.Rmd index a51d75cb9160e14b5b1247ee1a4039486a2f1279..27d7423320eed8442a0b0111869b8f8e67bec9be 100644 --- a/vignettes/RainFARM_vignette.Rmd +++ b/vignettes/RainFARM_vignette.Rmd @@ -180,21 +180,21 @@ slopes <- CST_RFSlope(exp, time_dim = c("member", "ftime")) dim(slopes) # dataset var sdate # 1 1 3 -slopes -, , 1 +# slopes +# , , 1 - [,1] -[1,] 1.09957 +# [,1] +# [1,] 1.09957 -, , 2 +# , , 2 - [,1] -[1,] 1.768861 +# [,1] +# [1,] 1.768861 -, , 3 +# , , 3 - [,1] -[1,] 1.190176 +# [,1] +# [1,] 1.190176 ``` which return an array of spectral slopes, one for each "dataset" and starting date "sdate".