From 845daf33714570d590f8fab32ad103433ac90703 Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Thu, 30 Jan 2025 10:14:45 +0100 Subject: [PATCH 1/9] added CST_summary.R --- R/CST_summary.R | 125 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 R/CST_summary.R diff --git a/R/CST_summary.R b/R/CST_summary.R new file mode 100644 index 00000000..520d3806 --- /dev/null +++ b/R/CST_summary.R @@ -0,0 +1,125 @@ +#' Generate a Summary of the data in the s2dv_cube +#' +#'This function prints the summary of an object of class \code{s2dv_cube}, +#'loaded using \code{CST_Start}. +#' +#' @author Theertha Kariyathan, \email{theertha.kariyathan@bsc.es} +#' +#' @param data_cube An \code{s2dv_cube} object containing: +#' - `data`: N-dimensional array with named dimensions +#' - `dims`: Dimensions, including `var` (variables). +#' - `attrs`: Attributes such as `VarName` and `Metadata`. +#' - `coords`: Named list with coordinates of dimensions. +#' +#' @return A printed summary of the \code{s2dv_cube} object, including: +#' - Months that have been loaded. +#' - Range of the dates that have been loaded. +#' - Object dimensions. +#' - Basic statistical summary of the data. +#' - Missing files. +#' - Number of NAs per time dimension and latitude/longitude dimensions +#' - Variables that have been loaded, along with their units +#' +#' @details The function uses the metadata from the s2dv cube to extract +#' variable names and units. It prints a detailed summary of all the variables loaded into the +#' \code{s2dv_cube} object. +#' +#' @examples +#' # Example s2dv cube paths +#' repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +#' repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +#' +#' # Create data cube +#' data_cube <- CST_Start(dat = list( +#' list(name = 'system4_m1', path = repos2), +#' list(name = 'system5_m1', path = repos)), +#' var = c('tas', 'sfcWind'), +#' sdate = '20170101', +#' ensemble = indices(1), +#' time = indices(1), +#' lat = indices(1:5), +#' lon = indices(1:5), +#' synonims = list(lat = c('lat', 'latitude'), +#' lon = c('lon', 'longitude')), +#' return_vars = list(time = 'sdate', +#' longitude = 'dat', +#' latitude = 'dat'), +#' metadata_dims = c('dat', 'var'), +#' retrieve = TRUE) +#' +#' # Generate summary +#' CST_summary(data_cube) +#' +#' @seealso \link[CSTools]{CST_start} for creating an s2dv cube object. +#' @importFrom StartR Start +#' @export +#' +#' +CST_summary <- function(data_cube) { + # Get name, leadtime months and date range + object_name <- deparse(substitute(data_cube)) + + date_format <- "%b %d %Y" + months <- unique(format(as.Date(data_cube$attrs$Dates), format = '%B')) + months <- paste(as.character(months), collapse=", ") + sdate_min <- format(min(as.Date(data_cube$attrs$Dates), na.rm = TRUE), + format = date_format) + sdate_max <- format(max(as.Date(data_cube$attrs$Dates), na.rm = TRUE), + format = date_format) + # Log the summary + print(paste(Sys.time(), "DATA SUMMARY:")) + + print(paste(Sys.time(),object_name, "months:", months)) + print(paste(Sys.time(), object_name, "range:", sdate_min, "to", sdate_max)) + print(paste(Sys.time(), object_name, "dimensions:")) + + # Use capture.output() and for loop to display results neatly + output_string <- capture.output(dim(data_cube$data)) + for (i in output_string) { + print(paste(Sys.time(), i)) + } + + # Print statistical summary of the data for every variable + print(paste(Sys.time(),"Statistical summary of the data in ", object_name, ":")) + + for (var_index in 1:data_cube$dims[['var']]) { + variable_name <- data_cube$attrs$Variable$varName[var_index] + variable_units <- data_cube$attrs$Variable$metadata[[variable_name]]$units + print(paste(Sys.time(), "Variable: ", variable_name, + " (units: ", variable_units, ")")) + output_string <- capture.output(summary(Subset(data_cube$data, + along = 'var', + indices = list(var_index)))) + for (i in output_string) { + print(paste(Sys.time(), i)) + } + } + + # Number of NAs per time dimension and latitude/longitude dimensions + + list_na <- lapply(seq_along(dim(data_cube$data)), function(dim) { + apply(data_cube$data, dim, function(x) sum(is.na(x))) + }) + + # Identify dimensions with NAs + na_list <- sapply(list_na, function(x) which(x != 0) %>% paste(collapse = ",")) %>% unlist() + names(na_list) <- names(dim(data_cube$data)) + + # Count the number of NAs per identified dimension + num_nas <- sapply(list_na, function(x) x[which(x != 0)] %>% paste(collapse = ",")) %>% unlist() + names(num_nas) <- names(dim(data_cube$data)) + + # Generate output strings + output_na_list <- paste(names(na_list), na_list, sep = ": ", collapse = " ") + output_num_nas <- paste(names(num_nas), num_nas, sep = ": ", collapse = " ") + + print(paste(Sys.time(), "Dimensions with NA values")) + print(paste(Sys.time(), output_na_list)) + + print(paste(Sys.time(), "Number of NAs per dimension")) + print(paste(Sys.time(), output_num_nas)) + + print(paste(Sys.time(), "---------------------------------------------")) + invisible(gc()) +} + -- GitLab From 622e5a231db3e1796431d734832a21b2470ee8e9 Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Thu, 30 Jan 2025 15:53:18 +0100 Subject: [PATCH 2/9] show loaded files, added display switch --- R/CST_summary.R | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/R/CST_summary.R b/R/CST_summary.R index 520d3806..3afd7330 100644 --- a/R/CST_summary.R +++ b/R/CST_summary.R @@ -55,7 +55,7 @@ #' @export #' #' -CST_summary <- function(data_cube) { +CST_summary <- function(data_cube,loaded_files = TRUE, dimension = TRUE) { # Get name, leadtime months and date range object_name <- deparse(substitute(data_cube)) @@ -113,12 +113,20 @@ CST_summary <- function(data_cube) { output_na_list <- paste(names(na_list), na_list, sep = ": ", collapse = " ") output_num_nas <- paste(names(num_nas), num_nas, sep = ": ", collapse = " ") + if(dimension == TRUE){ print(paste(Sys.time(), "Dimensions with NA values")) print(paste(Sys.time(), output_na_list)) print(paste(Sys.time(), "Number of NAs per dimension")) print(paste(Sys.time(), output_num_nas)) - + } + # Loaded files + if(loaded_files == TRUE){ + all_files <- lapply(data_cube$attrs$source_files, unlist) %>% unlist() + loaded_files <- all_files[!is.na(all_files)] + print(paste(Sys.time(), "Loaded files:")) + print(paste(Sys.time(), loaded_files)) + } print(paste(Sys.time(), "---------------------------------------------")) invisible(gc()) } -- GitLab From 665b9f01e420132ec95695e5dc4d244f5d3661c4 Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Wed, 5 Feb 2025 14:24:41 +0100 Subject: [PATCH 3/9] renamed switch to na_dim --- R/CST_summary.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/CST_summary.R b/R/CST_summary.R index 3afd7330..19ba7e94 100644 --- a/R/CST_summary.R +++ b/R/CST_summary.R @@ -55,7 +55,7 @@ #' @export #' #' -CST_summary <- function(data_cube,loaded_files = TRUE, dimension = TRUE) { +CST_summary <- function(data_cube,loaded_files = TRUE, na_dim = TRUE) { # Get name, leadtime months and date range object_name <- deparse(substitute(data_cube)) @@ -113,7 +113,7 @@ CST_summary <- function(data_cube,loaded_files = TRUE, dimension = TRUE) { output_na_list <- paste(names(na_list), na_list, sep = ": ", collapse = " ") output_num_nas <- paste(names(num_nas), num_nas, sep = ": ", collapse = " ") - if(dimension == TRUE){ + if(na_dim == TRUE){ print(paste(Sys.time(), "Dimensions with NA values")) print(paste(Sys.time(), output_na_list)) -- GitLab From 1b962d4b8f4bc2c3645dc6ea666cb487b72568d1 Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Wed, 12 Feb 2025 10:04:51 +0100 Subject: [PATCH 4/9] modified CST_summary,added unit-test --- R/CST_summary.R | 79 +++++++++++++++++-------------- tests/testthat/test-CST_summary.R | 26 ++++++++++ 2 files changed, 69 insertions(+), 36 deletions(-) create mode 100644 tests/testthat/test-CST_summary.R diff --git a/R/CST_summary.R b/R/CST_summary.R index 19ba7e94..50e062b1 100644 --- a/R/CST_summary.R +++ b/R/CST_summary.R @@ -1,7 +1,7 @@ -#' Generate a Summary of the data in the s2dv_cube +#' Generate a Summary of the data and metadata in the s2dv_cube object #' -#'This function prints the summary of an object of class \code{s2dv_cube}, -#'loaded using \code{CST_Start}. +#'This function prints the summary of the data and metadata of an object of class \code{s2dv_cube}, +#'loaded using \code{CST_Start} or \code{SUNSET}. #' #' @author Theertha Kariyathan, \email{theertha.kariyathan@bsc.es} #' @@ -10,21 +10,28 @@ #' - `dims`: Dimensions, including `var` (variables). #' - `attrs`: Attributes such as `VarName` and `Metadata`. #' - `coords`: Named list with coordinates of dimensions. +#' @param show_NA Logical. If \code{TRUE}, details of NA values in the loaded object will be displayed in the output. +#' @param loaded_files Logical. If \code{TRUE}, the names of the loaded files will be displayed. #' -#' @return A printed summary of the \code{s2dv_cube} object, including: -#' - Months that have been loaded. -#' - Range of the dates that have been loaded. -#' - Object dimensions. -#' - Basic statistical summary of the data. -#' - Missing files. -#' - Number of NAs per time dimension and latitude/longitude dimensions -#' - Variables that have been loaded, along with their units +#' @return A printed summary of the data and metadata of the \code{s2dv_cube} object, including: +#' - months: Months that have been loaded. +#' - range: Range of the dates that have been loaded. +#' - dimensions: Object dimensions. +#' - Statistical summary of the data in data_cube: Basic statistical summary of the data. +#' - Variable: Variables that have been loaded, along with their units (units:) +#' - NA-Indices per Dimension: Index with NA values per dimension +#' - Number of NAs in NA-Indices per Dimensions: Number of NAs, in the Indices with NA values per dimension +#' - Loaded files: Successfully loaded Files #' -#' @details The function uses the metadata from the s2dv cube to extract -#' variable names and units. It prints a detailed summary of all the variables loaded into the -#' \code{s2dv_cube} object. -#' +#' @details The function uses the data and metadata from the loaded \code{s2dv_cube} object to generate a summary of the object. +#' The summary includes the return parameters mentioned above. +#' The \code{show_NA} and \code{loaded_files} parameters allow the user to choose whether to display: +#' - Details of NA values in the object (if \code{show_NA = TRUE}). +#' - Information about successfully loaded files (if \code{loaded_files = TRUE}), allows users to verify that the desired files were loaded. +#' #' @examples +#' CST_summary(lonlat_prec_st) +#' #' # Example s2dv cube paths #' repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" #' repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" @@ -50,12 +57,12 @@ #' # Generate summary #' CST_summary(data_cube) #' -#' @seealso \link[CSTools]{CST_start} for creating an s2dv cube object. +#' @seealso \link[CSTools]{CST_start} or \link[SUNSET] for creating an s2dv cube object. #' @importFrom StartR Start #' @export #' #' -CST_summary <- function(data_cube,loaded_files = TRUE, na_dim = TRUE) { +CST_summary <- function(data_cube,loaded_files = TRUE, show_NA = TRUE) { # Get name, leadtime months and date range object_name <- deparse(substitute(data_cube)) @@ -67,34 +74,34 @@ CST_summary <- function(data_cube,loaded_files = TRUE, na_dim = TRUE) { sdate_max <- format(max(as.Date(data_cube$attrs$Dates), na.rm = TRUE), format = date_format) # Log the summary - print(paste(Sys.time(), "DATA SUMMARY:")) + cat("DATA SUMMARY:\n") - print(paste(Sys.time(),object_name, "months:", months)) - print(paste(Sys.time(), object_name, "range:", sdate_min, "to", sdate_max)) - print(paste(Sys.time(), object_name, "dimensions:")) + cat(paste(object_name, "months:", months),"\n") + cat(paste(object_name, "range:", sdate_min, "to", sdate_max),"\n") + cat(paste(object_name, "dimensions:"),"\n") # Use capture.output() and for loop to display results neatly output_string <- capture.output(dim(data_cube$data)) for (i in output_string) { - print(paste(Sys.time(), i)) + cat(i,"\n") } # Print statistical summary of the data for every variable - print(paste(Sys.time(),"Statistical summary of the data in ", object_name, ":")) - + cat(paste("Statistical summary of the data in ", object_name, ":"),"\n") + if(any(names(data_cube$dims) == "var")){ for (var_index in 1:data_cube$dims[['var']]) { variable_name <- data_cube$attrs$Variable$varName[var_index] variable_units <- data_cube$attrs$Variable$metadata[[variable_name]]$units - print(paste(Sys.time(), "Variable: ", variable_name, - " (units: ", variable_units, ")")) + cat(paste("Variable: ", variable_name, + " (units: ", variable_units, ")"),"\n") output_string <- capture.output(summary(Subset(data_cube$data, along = 'var', indices = list(var_index)))) for (i in output_string) { - print(paste(Sys.time(), i)) + cat(i,"\n") } } - + } # Number of NAs per time dimension and latitude/longitude dimensions list_na <- lapply(seq_along(dim(data_cube$data)), function(dim) { @@ -113,21 +120,21 @@ CST_summary <- function(data_cube,loaded_files = TRUE, na_dim = TRUE) { output_na_list <- paste(names(na_list), na_list, sep = ": ", collapse = " ") output_num_nas <- paste(names(num_nas), num_nas, sep = ": ", collapse = " ") - if(na_dim == TRUE){ - print(paste(Sys.time(), "Dimensions with NA values")) - print(paste(Sys.time(), output_na_list)) + if(show_NA == TRUE){ + cat("NA-Indices per Dimension\n") + cat(output_na_list,"\n") - print(paste(Sys.time(), "Number of NAs per dimension")) - print(paste(Sys.time(), output_num_nas)) + cat("Number of NAs in NA-Indices per Dimensions\n") + cat(output_num_nas,"\n") } # Loaded files if(loaded_files == TRUE){ all_files <- lapply(data_cube$attrs$source_files, unlist) %>% unlist() loaded_files <- all_files[!is.na(all_files)] - print(paste(Sys.time(), "Loaded files:")) - print(paste(Sys.time(), loaded_files)) + cat("Loaded files:\n") + cat(paste(loaded_files, collapse = "\n"), "\n") } - print(paste(Sys.time(), "---------------------------------------------")) + cat("---------------------------------------------","\n") invisible(gc()) } diff --git a/tests/testthat/test-CST_summary.R b/tests/testthat/test-CST_summary.R new file mode 100644 index 00000000..dcb8e7d5 --- /dev/null +++ b/tests/testthat/test-CST_summary.R @@ -0,0 +1,26 @@ +############################################## + +output <- "DATA SUMMARY: +lonlat_prec months: March, February +lonlat_prec range: Mar 01 2011 to Mar 31 2013 +lonlat_prec dimensions: +dataset member sdate ftime lat lon + 1 6 3 31 4 4 +Statistical summary of the data in lonlat_prec : +NA-Indices per Dimension +dataset: member: sdate: ftime: lat: lon: +Number of NAs in NA-Indices per Dimensions +dataset: member: sdate: ftime: lat: lon: +Loaded files: +/esarchive/exp/ecmwf/system5c3s/daily_mean/prlr_s0-24h/prlr_20101101.nc +/esarchive/exp/ecmwf/system5c3s/daily_mean/prlr_s0-24h/prlr_20111101.nc +/esarchive/exp/ecmwf/system5c3s/daily_mean/prlr_s0-24h/prlr_20121101.nc +--------------------------------------------- " + +############################################## + +test_that("1. Output checks", { + expect_equal(capture.output(CST_summary(lonlat_prec)), + capture.output(cat(output))) +}) + -- GitLab From aad3239d9e7385bc1bf92be4b3e448564fc61c3e Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Wed, 12 Feb 2025 10:22:26 +0100 Subject: [PATCH 5/9] modified unit test and cst_summary --- R/CST_summary.R | 4 +--- tests/testthat/test-CST_summary.R | 32 +++++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/R/CST_summary.R b/R/CST_summary.R index 50e062b1..e4605ae0 100644 --- a/R/CST_summary.R +++ b/R/CST_summary.R @@ -58,10 +58,8 @@ #' CST_summary(data_cube) #' #' @seealso \link[CSTools]{CST_start} or \link[SUNSET] for creating an s2dv cube object. -#' @importFrom StartR Start +#' @import startR #' @export -#' -#' CST_summary <- function(data_cube,loaded_files = TRUE, show_NA = TRUE) { # Get name, leadtime months and date range object_name <- deparse(substitute(data_cube)) diff --git a/tests/testthat/test-CST_summary.R b/tests/testthat/test-CST_summary.R index dcb8e7d5..94c6d1af 100644 --- a/tests/testthat/test-CST_summary.R +++ b/tests/testthat/test-CST_summary.R @@ -1,6 +1,6 @@ ############################################## - -output <- "DATA SUMMARY: +# Default output +output_default <- "DATA SUMMARY: lonlat_prec months: March, February lonlat_prec range: Mar 01 2011 to Mar 31 2013 lonlat_prec dimensions: @@ -17,10 +17,34 @@ Loaded files: /esarchive/exp/ecmwf/system5c3s/daily_mean/prlr_s0-24h/prlr_20121101.nc --------------------------------------------- " +# Conditional output +output_conditional <- "DATA SUMMARY: +lonlat_prec months: March, February +lonlat_prec range: Mar 01 2011 to Mar 31 2013 +lonlat_prec dimensions: +dataset member sdate ftime lat lon + 1 6 3 31 4 4 +Statistical summary of the data in lonlat_prec : +--------------------------------------------- " + ############################################## test_that("1. Output checks", { - expect_equal(capture.output(CST_summary(lonlat_prec)), - capture.output(cat(output))) + # Default output + actual_out <- capture.output(CST_summary(lonlat_prec)) + expect_out <- capture.output(cat(output_default)) + expect_equal( + actual_out, + expect_out + ) + + # Conditional output + actual_out <- capture.output(CST_summary(lonlat_prec, loaded_files = FALSE, show_NA = FALSE)) + expect_out <- capture.output(cat(output_conditional)) + + expect_equal( + actual_out, + expect_out + ) }) -- GitLab From 6fd9e4f43c93b6e6bfb5244f8c52a82344b1199a Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Wed, 12 Feb 2025 10:28:09 +0100 Subject: [PATCH 6/9] doc update cst_summary --- R/CST_summary.R | 1 - tests/testthat/test-CST_summary.R | 5 ++++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/CST_summary.R b/R/CST_summary.R index e4605ae0..73732c01 100644 --- a/R/CST_summary.R +++ b/R/CST_summary.R @@ -58,7 +58,6 @@ #' CST_summary(data_cube) #' #' @seealso \link[CSTools]{CST_start} or \link[SUNSET] for creating an s2dv cube object. -#' @import startR #' @export CST_summary <- function(data_cube,loaded_files = TRUE, show_NA = TRUE) { # Get name, leadtime months and date range diff --git a/tests/testthat/test-CST_summary.R b/tests/testthat/test-CST_summary.R index 94c6d1af..69881800 100644 --- a/tests/testthat/test-CST_summary.R +++ b/tests/testthat/test-CST_summary.R @@ -1,5 +1,7 @@ ############################################## -# Default output + +# Default output + output_default <- "DATA SUMMARY: lonlat_prec months: March, February lonlat_prec range: Mar 01 2011 to Mar 31 2013 @@ -18,6 +20,7 @@ Loaded files: --------------------------------------------- " # Conditional output + output_conditional <- "DATA SUMMARY: lonlat_prec months: March, February lonlat_prec range: Mar 01 2011 to Mar 31 2013 -- GitLab From 82b47a328b1323f730e75f9c9a35b144c579588e Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Thu, 13 Feb 2025 09:50:47 +0100 Subject: [PATCH 7/9] renamed to CST_Summary --- R/{CST_summary.R => CST_Summary.R} | 10 ++++++---- .../{test-CST_summary.R => test-CST_Summary.R} | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) rename R/{CST_summary.R => CST_Summary.R} (97%) rename tests/testthat/{test-CST_summary.R => test-CST_Summary.R} (92%) diff --git a/R/CST_summary.R b/R/CST_Summary.R similarity index 97% rename from R/CST_summary.R rename to R/CST_Summary.R index 73732c01..857b0526 100644 --- a/R/CST_summary.R +++ b/R/CST_Summary.R @@ -30,9 +30,11 @@ #' - Information about successfully loaded files (if \code{loaded_files = TRUE}), allows users to verify that the desired files were loaded. #' #' @examples -#' CST_summary(lonlat_prec_st) +#' # Example 1: +#' CST_Summary(lonlat_prec_st) #' -#' # Example s2dv cube paths +#' # Example 2: +#' # s2dv cube paths #' repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" #' repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" #' @@ -55,11 +57,11 @@ #' retrieve = TRUE) #' #' # Generate summary -#' CST_summary(data_cube) +#' CST_Summary(data_cube) #' #' @seealso \link[CSTools]{CST_start} or \link[SUNSET] for creating an s2dv cube object. #' @export -CST_summary <- function(data_cube,loaded_files = TRUE, show_NA = TRUE) { +CST_Summary <- function(data_cube,loaded_files = TRUE, show_NA = TRUE) { # Get name, leadtime months and date range object_name <- deparse(substitute(data_cube)) diff --git a/tests/testthat/test-CST_summary.R b/tests/testthat/test-CST_Summary.R similarity index 92% rename from tests/testthat/test-CST_summary.R rename to tests/testthat/test-CST_Summary.R index 69881800..1674cd2c 100644 --- a/tests/testthat/test-CST_summary.R +++ b/tests/testthat/test-CST_Summary.R @@ -34,7 +34,7 @@ Statistical summary of the data in lonlat_prec : test_that("1. Output checks", { # Default output - actual_out <- capture.output(CST_summary(lonlat_prec)) + actual_out <- capture.output(CST_Summary(lonlat_prec)) expect_out <- capture.output(cat(output_default)) expect_equal( actual_out, @@ -42,7 +42,7 @@ test_that("1. Output checks", { ) # Conditional output - actual_out <- capture.output(CST_summary(lonlat_prec, loaded_files = FALSE, show_NA = FALSE)) + actual_out <- capture.output(CST_Summary(lonlat_prec, loaded_files = FALSE, show_NA = FALSE)) expect_out <- capture.output(cat(output_conditional)) expect_equal( -- GitLab From 950be81c492463c775fb33095e732fb616e7edd1 Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 14 Feb 2025 12:50:45 +0100 Subject: [PATCH 8/9] Format fixes; modify parameter names and default values; improvements in logic --- R/CST_Summary.R | 163 ++++++++++++++++++++++++------------------------ 1 file changed, 83 insertions(+), 80 deletions(-) diff --git a/R/CST_Summary.R b/R/CST_Summary.R index 857b0526..84e0fe97 100644 --- a/R/CST_Summary.R +++ b/R/CST_Summary.R @@ -5,19 +5,19 @@ #' #' @author Theertha Kariyathan, \email{theertha.kariyathan@bsc.es} #' -#' @param data_cube An \code{s2dv_cube} object containing: +#' @param data An \code{s2dv_cube} object containing: #' - `data`: N-dimensional array with named dimensions #' - `dims`: Dimensions, including `var` (variables). #' - `attrs`: Attributes such as `VarName` and `Metadata`. #' - `coords`: Named list with coordinates of dimensions. #' @param show_NA Logical. If \code{TRUE}, details of NA values in the loaded object will be displayed in the output. -#' @param loaded_files Logical. If \code{TRUE}, the names of the loaded files will be displayed. +#' @param show_loaded_files Logical. If \code{TRUE}, the names of the loaded files will be displayed. #' #' @return A printed summary of the data and metadata of the \code{s2dv_cube} object, including: #' - months: Months that have been loaded. #' - range: Range of the dates that have been loaded. #' - dimensions: Object dimensions. -#' - Statistical summary of the data in data_cube: Basic statistical summary of the data. +#' - Statistical summary of the data in data: Basic statistical summary of the data. #' - Variable: Variables that have been loaded, along with their units (units:) #' - NA-Indices per Dimension: Index with NA values per dimension #' - Number of NAs in NA-Indices per Dimensions: Number of NAs, in the Indices with NA values per dimension @@ -25,115 +25,118 @@ #' #' @details The function uses the data and metadata from the loaded \code{s2dv_cube} object to generate a summary of the object. #' The summary includes the return parameters mentioned above. -#' The \code{show_NA} and \code{loaded_files} parameters allow the user to choose whether to display: +#' The \code{show_NA} and \code{show_loaded_files} parameters allow the user to choose whether to display: #' - Details of NA values in the object (if \code{show_NA = TRUE}). -#' - Information about successfully loaded files (if \code{loaded_files = TRUE}), allows users to verify that the desired files were loaded. +#' - Information about successfully loaded files (if \code{show_loaded_files = TRUE}), allows users to verify that the desired files were loaded. #' #' @examples #' # Example 1: -#' CST_Summary(lonlat_prec_st) +#' CST_Summary(data = lonlat_temp_st$exp) #' #' # Example 2: +#' \dontrun{ #' # s2dv cube paths -#' repos <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" -#' repos2 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +#' repos1 <- "/esarchive/exp/ecmwf/system4_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" +#' repos2 <- "/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc" #' #' # Create data cube -#' data_cube <- CST_Start(dat = list( -#' list(name = 'system4_m1', path = repos2), -#' list(name = 'system5_m1', path = repos)), -#' var = c('tas', 'sfcWind'), -#' sdate = '20170101', -#' ensemble = indices(1), -#' time = indices(1), -#' lat = indices(1:5), -#' lon = indices(1:5), -#' synonims = list(lat = c('lat', 'latitude'), -#' lon = c('lon', 'longitude')), -#' return_vars = list(time = 'sdate', -#' longitude = 'dat', -#' latitude = 'dat'), -#' metadata_dims = c('dat', 'var'), -#' retrieve = TRUE) +#' data <- CST_Start(dat = list( +#' list(name = 'system4_m1', path = repos1), +#' list(name = 'system5_m1', path = repos2)), +#' var = c('tas', 'sfcWind'), +#' sdate = '20170101', +#' ensemble = indices(1), +#' time = indices(1:3), +#' lat = indices(1:5), +#' lon = indices(1:5), +#' synonims = list(lat = c('lat', 'latitude'), +#' lon = c('lon', 'longitude')), +#' return_vars = list(time = 'sdate', +#' longitude = 'dat', +#' latitude = 'dat'), +#' metadata_dims = c('dat', 'var'), +#' retrieve = TRUE) #' #' # Generate summary -#' CST_Summary(data_cube) +#' CST_Summary(data) +#' } #' -#' @seealso \link[CSTools]{CST_start} or \link[SUNSET] for creating an s2dv cube object. +#' @seealso \link[CSTools]{CST_start} or \link[CSTools]{s2dv_cube} for creating +#' an s2dv cube object. #' @export -CST_Summary <- function(data_cube,loaded_files = TRUE, show_NA = TRUE) { + +CST_Summary <- function(data, show_loaded_files = FALSE, show_NA = FALSE) { # Get name, leadtime months and date range - object_name <- deparse(substitute(data_cube)) - + object_name <- deparse(substitute(data)) date_format <- "%b %d %Y" - months <- unique(format(as.Date(data_cube$attrs$Dates), format = '%B')) + months <- unique(format(as.Date(data$attrs$Dates), format = '%B')) months <- paste(as.character(months), collapse=", ") - sdate_min <- format(min(as.Date(data_cube$attrs$Dates), na.rm = TRUE), + sdate_min <- format(min(as.Date(data$attrs$Dates), na.rm = TRUE), format = date_format) - sdate_max <- format(max(as.Date(data_cube$attrs$Dates), na.rm = TRUE), + sdate_max <- format(max(as.Date(data$attrs$Dates), na.rm = TRUE), format = date_format) # Log the summary cat("DATA SUMMARY:\n") - cat(paste(object_name, "months:", months),"\n") - cat(paste(object_name, "range:", sdate_min, "to", sdate_max),"\n") - cat(paste(object_name, "dimensions:"),"\n") + cat(paste(object_name, "months:", months), "\n") + cat(paste(object_name, "range:", sdate_min, "to", sdate_max), "\n") + cat(paste(object_name, "dimensions:"), "\n") # Use capture.output() and for loop to display results neatly - output_string <- capture.output(dim(data_cube$data)) + output_string <- capture.output(dim(data$data)) for (i in output_string) { cat(i,"\n") } # Print statistical summary of the data for every variable - cat(paste("Statistical summary of the data in ", object_name, ":"),"\n") - if(any(names(data_cube$dims) == "var")){ - for (var_index in 1:data_cube$dims[['var']]) { - variable_name <- data_cube$attrs$Variable$varName[var_index] - variable_units <- data_cube$attrs$Variable$metadata[[variable_name]]$units - cat(paste("Variable: ", variable_name, - " (units: ", variable_units, ")"),"\n") - output_string <- capture.output(summary(Subset(data_cube$data, - along = 'var', - indices = list(var_index)))) - for (i in output_string) { - cat(i,"\n") + cat(paste("Statistical summary of the data in ", object_name, ":"), "\n") + ## TODO: Add parameter var_dim + if (any(names(data$dims) == "var")) { + for (var_index in 1:data$dims[['var']]) { + variable_name <- data$attrs$Variable$varName[var_index] + variable_units <- data$attrs$Variable$metadata[[variable_name]]$units + cat(paste0("Variable: ", variable_name, + " (units: ", variable_units, ")"), "\n") + output_string <- capture.output(summary(Subset(data$data, + along = 'var', + indices = list(var_index)))) + for (i in output_string) { + cat(i, "\n") + } } } - } - # Number of NAs per time dimension and latitude/longitude dimensions - - list_na <- lapply(seq_along(dim(data_cube$data)), function(dim) { - apply(data_cube$data, dim, function(x) sum(is.na(x))) - }) - - # Identify dimensions with NAs - na_list <- sapply(list_na, function(x) which(x != 0) %>% paste(collapse = ",")) %>% unlist() - names(na_list) <- names(dim(data_cube$data)) - - # Count the number of NAs per identified dimension - num_nas <- sapply(list_na, function(x) x[which(x != 0)] %>% paste(collapse = ",")) %>% unlist() - names(num_nas) <- names(dim(data_cube$data)) - - # Generate output strings - output_na_list <- paste(names(na_list), na_list, sep = ": ", collapse = " ") - output_num_nas <- paste(names(num_nas), num_nas, sep = ": ", collapse = " ") - - if(show_NA == TRUE){ - cat("NA-Indices per Dimension\n") - cat(output_na_list,"\n") - - cat("Number of NAs in NA-Indices per Dimensions\n") - cat(output_num_nas,"\n") + + if (show_NA) { + # Number of NAs per time dimension and latitude/longitude dimensions + list_na <- lapply(seq_along(dim(data$data)), function(dim) { + apply(data$data, dim, function(x) sum(is.na(x))) + }) + + # Identify dimensions with NAs + na_list <- sapply(list_na, function(x) which(x != 0) %>% paste(collapse = ",")) %>% unlist() + names(na_list) <- names(dim(data$data)) + + # Count the number of NAs per identified dimension + num_nas <- sapply(list_na, function(x) x[which(x != 0)] %>% paste(collapse = ",")) %>% unlist() + names(num_nas) <- names(dim(data$data)) + + # Generate output strings + output_na_list <- paste(names(na_list), na_list, sep = ": ", collapse = " ") + output_num_nas <- paste(names(num_nas), num_nas, sep = ": ", collapse = " ") + + cat("NA-Indices per Dimension\n") + cat(output_na_list,"\n") + + cat("Number of NAs in NA-Indices per Dimensions\n") + cat(output_num_nas,"\n") } # Loaded files - if(loaded_files == TRUE){ - all_files <- lapply(data_cube$attrs$source_files, unlist) %>% unlist() - loaded_files <- all_files[!is.na(all_files)] - cat("Loaded files:\n") - cat(paste(loaded_files, collapse = "\n"), "\n") + if (show_loaded_files) { + all_files <- lapply(data$attrs$source_files, unlist) %>% unlist() + loaded_files <- all_files[!is.na(all_files)] + cat("Loaded files:\n") + cat(paste(loaded_files, collapse = "\n"), "\n") } - cat("---------------------------------------------","\n") - invisible(gc()) + cat("---------------------------------------------", "\n") } -- GitLab From 00e089e3f1bb42262c2c044bb8b3262b411c18bb Mon Sep 17 00:00:00 2001 From: THEERTHA KARIYATHAN Date: Tue, 18 Feb 2025 16:01:53 +0100 Subject: [PATCH 9/9] fixes structure,doc,unit-test --- R/CST_Summary.R | 91 ++++++++++++++++++++----------- tests/testthat/test-CST_Summary.R | 75 ++++++++++++++++++------- 2 files changed, 115 insertions(+), 51 deletions(-) diff --git a/R/CST_Summary.R b/R/CST_Summary.R index 84e0fe97..07b56b94 100644 --- a/R/CST_Summary.R +++ b/R/CST_Summary.R @@ -1,34 +1,36 @@ #' Generate a Summary of the data and metadata in the s2dv_cube object #' -#'This function prints the summary of the data and metadata of an object of class \code{s2dv_cube}, -#'loaded using \code{CST_Start} or \code{SUNSET}. +#'This function prints the summary of the data and metadata of an object of +#'class \code{s2dv_cube}. #' #' @author Theertha Kariyathan, \email{theertha.kariyathan@bsc.es} #' #' @param data An \code{s2dv_cube} object containing: -#' - `data`: N-dimensional array with named dimensions -#' - `dims`: Dimensions, including `var` (variables). -#' - `attrs`: Attributes such as `VarName` and `Metadata`. -#' - `coords`: Named list with coordinates of dimensions. -#' @param show_NA Logical. If \code{TRUE}, details of NA values in the loaded object will be displayed in the output. -#' @param show_loaded_files Logical. If \code{TRUE}, the names of the loaded files will be displayed. +#' - \code{data}: N-dimensional array with named dimensions +#' - \code{dim}: Dimensions, including \code{var} (variables). +#' - \code{attrs}: Attributes such as \code{VarName} and \code{Metadata}. +#' - \code{coords}: Named list with coordinates of dimensions. +#' @param show_NA A logical value indicating if details of NA values in the +#' loaded object will be displayed in the output or not. Default = FALSE. +#' @param show_loaded_files A logical value indicating if the names of the +#' loaded files will be displayed in the output or not. Default = FALSE. +#' @param var_dim A character string indicating the name of the variable +#' dimension. Default = "var". #' -#' @return A printed summary of the data and metadata of the \code{s2dv_cube} object, including: +#' @details The function uses the data and metadata from the loaded +#' \code{s2dv_cube} object to generate a summary of the object.The summary +#' includes : #' - months: Months that have been loaded. #' - range: Range of the dates that have been loaded. #' - dimensions: Object dimensions. -#' - Statistical summary of the data in data: Basic statistical summary of the data. -#' - Variable: Variables that have been loaded, along with their units (units:) +#' - Statistical summary of the data in data: Basic statistical +#' summary of the data. +#' - Variable: Loaded Variables, along with their units (units:) #' - NA-Indices per Dimension: Index with NA values per dimension -#' - Number of NAs in NA-Indices per Dimensions: Number of NAs, in the Indices with NA values per dimension +#' - Number of NAs in NA-Indices per Dimensions: Number of NAs, +#' in the Indices with NA values per dimension #' - Loaded files: Successfully loaded Files -#' -#' @details The function uses the data and metadata from the loaded \code{s2dv_cube} object to generate a summary of the object. -#' The summary includes the return parameters mentioned above. -#' The \code{show_NA} and \code{show_loaded_files} parameters allow the user to choose whether to display: -#' - Details of NA values in the object (if \code{show_NA = TRUE}). -#' - Information about successfully loaded files (if \code{show_loaded_files = TRUE}), allows users to verify that the desired files were loaded. -#' +#' #' @examples #' # Example 1: #' CST_Summary(data = lonlat_temp_st$exp) @@ -65,12 +67,32 @@ #' an s2dv cube object. #' @export -CST_Summary <- function(data, show_loaded_files = FALSE, show_NA = FALSE) { +CST_Summary <- function(data, show_loaded_files = FALSE, show_NA = FALSE, + var_dim = "var") { + # Check 's2dv_cube' + if (!inherits(data, "s2dv_cube")) { + stop("Parameter 'data' must be of the class 's2dv_cube'.") + } + if (!is.logical(show_loaded_files)) { + stop("Parameter 'show_loaded_files' must be logical.") + } + if (!is.logical(show_NA)) { + stop("Parameter 'show_NA' must be logical.") + } + + if (!is.character(var_dim) || length(var_dim) != 1) { + stop("'var_dim' must be a single character string.") + } + + if (!(var_dim %in% names(data$dims))) { + warning(paste("Dimension", var_dim, "not found. Summary is not be split by variable.")) + } + # Get name, leadtime months and date range object_name <- deparse(substitute(data)) date_format <- "%b %d %Y" - months <- unique(format(as.Date(data$attrs$Dates), format = '%B')) - months <- paste(as.character(months), collapse=", ") + months <- unique(format(as.Date(data$attrs$Dates), format = "%B")) + months <- paste(as.character(months), collapse = ", ") sdate_min <- format(min(as.Date(data$attrs$Dates), na.rm = TRUE), format = date_format) sdate_max <- format(max(as.Date(data$attrs$Dates), na.rm = TRUE), @@ -85,27 +107,33 @@ CST_Summary <- function(data, show_loaded_files = FALSE, show_NA = FALSE) { # Use capture.output() and for loop to display results neatly output_string <- capture.output(dim(data$data)) for (i in output_string) { - cat(i,"\n") + cat(i, "\n") } # Print statistical summary of the data for every variable cat(paste("Statistical summary of the data in ", object_name, ":"), "\n") - ## TODO: Add parameter var_dim - if (any(names(data$dims) == "var")) { - for (var_index in 1:data$dims[['var']]) { + + if (!(var_dim %in% names(data$dims))) { + output_string <- capture.output(summary(data$data)) + for (i in output_string) { + cat(i, "\n") + } + + } else { + for (var_index in 1:data$dims[[var_dim]]) { variable_name <- data$attrs$Variable$varName[var_index] variable_units <- data$attrs$Variable$metadata[[variable_name]]$units cat(paste0("Variable: ", variable_name, - " (units: ", variable_units, ")"), "\n") + " (units: ", variable_units, ")"), "\n") output_string <- capture.output(summary(Subset(data$data, - along = 'var', + along = var_dim, indices = list(var_index)))) for (i in output_string) { cat(i, "\n") } } } - + if (show_NA) { # Number of NAs per time dimension and latitude/longitude dimensions list_na <- lapply(seq_along(dim(data$data)), function(dim) { @@ -125,11 +153,12 @@ CST_Summary <- function(data, show_loaded_files = FALSE, show_NA = FALSE) { output_num_nas <- paste(names(num_nas), num_nas, sep = ": ", collapse = " ") cat("NA-Indices per Dimension\n") - cat(output_na_list,"\n") + cat(output_na_list, "\n") cat("Number of NAs in NA-Indices per Dimensions\n") - cat(output_num_nas,"\n") + cat(output_num_nas, "\n") } + # Loaded files if (show_loaded_files) { all_files <- lapply(data$attrs$source_files, unlist) %>% unlist() diff --git a/tests/testthat/test-CST_Summary.R b/tests/testthat/test-CST_Summary.R index 1674cd2c..bdcc4be9 100644 --- a/tests/testthat/test-CST_Summary.R +++ b/tests/testthat/test-CST_Summary.R @@ -1,7 +1,6 @@ ############################################## # Default output - output_default <- "DATA SUMMARY: lonlat_prec months: March, February lonlat_prec range: Mar 01 2011 to Mar 31 2013 @@ -9,45 +8,81 @@ lonlat_prec dimensions: dataset member sdate ftime lat lon 1 6 3 31 4 4 Statistical summary of the data in lonlat_prec : + Min. 1st Qu. Median Mean 3rd Qu. Max. +-7.064e-10 0.000e+00 3.532e-09 3.501e-08 4.027e-08 7.516e-07 +--------------------------------------------- " + +# Conditional output +output_conditional <- "DATA SUMMARY: +lonlat_prec_st months: March, February +lonlat_prec_st range: Mar 01 2011 to Mar 31 2013 +lonlat_prec_st dimensions: +dataset var member sdate ftime lat lon + 1 1 6 3 31 4 4 +Statistical summary of the data in lonlat_prec_st : +Variable: prlr (units: m s-1) + Min. 1st Qu. Median Mean 3rd Qu. Max. +-7.064e-10 0.000e+00 3.532e-09 3.501e-08 4.027e-08 7.516e-07 NA-Indices per Dimension -dataset: member: sdate: ftime: lat: lon: +dataset: var: member: sdate: ftime: lat: lon: Number of NAs in NA-Indices per Dimensions -dataset: member: sdate: ftime: lat: lon: +dataset: var: member: sdate: ftime: lat: lon: Loaded files: /esarchive/exp/ecmwf/system5c3s/daily_mean/prlr_s0-24h/prlr_20101101.nc /esarchive/exp/ecmwf/system5c3s/daily_mean/prlr_s0-24h/prlr_20111101.nc /esarchive/exp/ecmwf/system5c3s/daily_mean/prlr_s0-24h/prlr_20121101.nc --------------------------------------------- " -# Conditional output - -output_conditional <- "DATA SUMMARY: -lonlat_prec months: March, February -lonlat_prec range: Mar 01 2011 to Mar 31 2013 -lonlat_prec dimensions: -dataset member sdate ftime lat lon - 1 6 3 31 4 4 -Statistical summary of the data in lonlat_prec : ---------------------------------------------- " - ############################################## +test_that("Input checks: CST_Summary", { + # Check that data is s2dv_cube + expect_error( + CST_Summary(array(10)), + "Parameter 'data' must be of the class 's2dv_cube'." + ) + # Check show_NA + expect_error( + CST_Summary(lonlat_prec, show_NA = 1.5), + "Parameter 'show_NA' must be logical." + ) + # Check show_loaded_files + expect_error( + CST_Summary(lonlat_prec, show_loaded_files = 1.5), + "Parameter 'show_loaded_files' must be logical." + ) + # Check var_dim + expect_error( + CST_Summary(lonlat_prec, var_dim = 1), + "'var_dim' must be a single character string." + ) + expect_error( + CST_Summary(lonlat_prec, var_dim = c('tas', 'psl')), + "'var_dim' must be a single character string." + ) + expect_warning( + CST_Summary(lonlat_prec, var_dim = "var"), + "Dimension var not found. Summary is not be split by variable." + ) +}) -test_that("1. Output checks", { +test_that("2. Output checks: CST_Summary", { # Default output - actual_out <- capture.output(CST_Summary(lonlat_prec)) + actual_out <- capture.output(suppressWarnings(CST_Summary(lonlat_prec))) expect_out <- capture.output(cat(output_default)) expect_equal( actual_out, expect_out - ) + ) # Conditional output - actual_out <- capture.output(CST_Summary(lonlat_prec, loaded_files = FALSE, show_NA = FALSE)) + actual_out <- capture.output( + CST_Summary(lonlat_prec_st, show_loaded_files = TRUE, + show_NA = TRUE, var_dim = "var") + ) expect_out <- capture.output(cat(output_conditional)) - expect_equal( actual_out, expect_out - ) + ) }) -- GitLab