From 85e638450ad95d2ecaa66b0d99d8674ecbb441ef Mon Sep 17 00:00:00 2001 From: ARIADNA BATALLA FERRES Date: Wed, 24 Apr 2024 11:01:48 +0200 Subject: [PATCH 01/16] Implement unit test for detecting hard-coded absolute paths in the code --- tests/testthat/test-hardcoded_paths.R | 49 +++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 tests/testthat/test-hardcoded_paths.R diff --git a/tests/testthat/test-hardcoded_paths.R b/tests/testthat/test-hardcoded_paths.R new file mode 100644 index 00000000..9caeae6e --- /dev/null +++ b/tests/testthat/test-hardcoded_paths.R @@ -0,0 +1,49 @@ + +# Unit test: detect hard-coded absolute paths in the code + +library(lintr) + +detect_hardcoded_paths <- function() { + + # Define the directories to check + directories <- c("modules", "tools") + + # Define custom lintr to detect the string "/esarchive/" + esarchive_linter <- function(source_file) { + lines <- readLines(soure_file, warn = FALSE) + # Remove comments: everything from '#' to the end of line + code_lines <- gsub("#.*$", "", lines) + # Check if any cleaned line contains "/esarchive/" + if (any(grepl("/esarchive/", code_lines))) { + lint <- lintr::Lint( + filename = source_file, + line_number = which(grepl("/esarchive/", code_lines)), + column_number = NULL, + type = "error", + message = "Hard-coded path '/esarchive/' found.", + linter = "esarchive_linter" + ) + return(list(lint)) # Returns list of lint objects + } + return(list()) # Returns empty list if "/esarchive/" not found + } + + # Apply the custom linter to each directory and save results + files_with_issues <- unlist(lapply(directories, function(dir) { + lint_dir(path = dir, linters = list(esarchive = esarchive_linter)) + }), recursive = TRUE) + + # Check results and print out the test outcome + if (length(files_with_issues) > 0) { + cat("Test FAILED. '/esarchive/' found in the following files:\n") + issues_report <- vapply(files_with_issues, function(l) { + paste(l$filename, "Line:", l$line_number) # Print file name and line number + }, character(1)) + print(issues_report) + return(FALSE) + } else { + cat("Test PASSED. No '/esarchive/' found in any file.\n") + return(TRUE) + } +} + -- GitLab From f7c240fb478b6dcd9b92e9c13b473ee2e9157ae9 Mon Sep 17 00:00:00 2001 From: ARIADNA BATALLA FERRES Date: Thu, 25 Apr 2024 13:56:31 +0200 Subject: [PATCH 02/16] Replace custom function with absolute_path_linter(lax = TRUE) --- tests/testthat/test-hardcoded_paths.R | 31 +++++---------------------- 1 file changed, 5 insertions(+), 26 deletions(-) diff --git a/tests/testthat/test-hardcoded_paths.R b/tests/testthat/test-hardcoded_paths.R index 9caeae6e..72bf8c40 100644 --- a/tests/testthat/test-hardcoded_paths.R +++ b/tests/testthat/test-hardcoded_paths.R @@ -8,42 +8,21 @@ detect_hardcoded_paths <- function() { # Define the directories to check directories <- c("modules", "tools") - # Define custom lintr to detect the string "/esarchive/" - esarchive_linter <- function(source_file) { - lines <- readLines(soure_file, warn = FALSE) - # Remove comments: everything from '#' to the end of line - code_lines <- gsub("#.*$", "", lines) - # Check if any cleaned line contains "/esarchive/" - if (any(grepl("/esarchive/", code_lines))) { - lint <- lintr::Lint( - filename = source_file, - line_number = which(grepl("/esarchive/", code_lines)), - column_number = NULL, - type = "error", - message = "Hard-coded path '/esarchive/' found.", - linter = "esarchive_linter" - ) - return(list(lint)) # Returns list of lint objects - } - return(list()) # Returns empty list if "/esarchive/" not found - } - - # Apply the custom linter to each directory and save results + # Apply absolute_path_linter() to each directory and save results files_with_issues <- unlist(lapply(directories, function(dir) { - lint_dir(path = dir, linters = list(esarchive = esarchive_linter)) + lint_dir(path = dir, linters = absolute_path_linter(lax = TRUE)) }), recursive = TRUE) # Check results and print out the test outcome if (length(files_with_issues) > 0) { - cat("Test FAILED. '/esarchive/' found in the following files:\n") + cat("Test FAILED. Absolute paths found in the following files:\n") issues_report <- vapply(files_with_issues, function(l) { - paste(l$filename, "Line:", l$line_number) # Print file name and line number + paste(l$filename, "Line:", l$line_number, "Message:", l$message) }, character(1)) print(issues_report) return(FALSE) } else { - cat("Test PASSED. No '/esarchive/' found in any file.\n") + cat("Test PASSED. No absolute paths found in any file.\n") return(TRUE) } } - -- GitLab From d6381f13cf7f6a6e82307986f6817972046ab6ab Mon Sep 17 00:00:00 2001 From: ARIADNA BATALLA FERRES Date: Thu, 25 Apr 2024 17:43:21 +0200 Subject: [PATCH 03/16] Include call to function within the script --- tests/testthat/test-hardcoded_paths.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-hardcoded_paths.R b/tests/testthat/test-hardcoded_paths.R index 72bf8c40..d8b60962 100644 --- a/tests/testthat/test-hardcoded_paths.R +++ b/tests/testthat/test-hardcoded_paths.R @@ -15,14 +15,16 @@ detect_hardcoded_paths <- function() { # Check results and print out the test outcome if (length(files_with_issues) > 0) { - cat("Test FAILED. Absolute paths found in the following files:\n") + cat("Test FAILED. Hard-coded paths found in the following files:\n") issues_report <- vapply(files_with_issues, function(l) { paste(l$filename, "Line:", l$line_number, "Message:", l$message) }, character(1)) print(issues_report) return(FALSE) } else { - cat("Test PASSED. No absolute paths found in any file.\n") + cat("Test PASSED😊 No hard-coded paths found in any file.\n") return(TRUE) } } + +detect_hardcoded_paths() -- GitLab From 9d01111460955cf173a2c651cadf7f39a8c81424 Mon Sep 17 00:00:00 2001 From: ARIADNA BATALLA FERRES Date: Fri, 26 Apr 2024 10:59:14 +0200 Subject: [PATCH 04/16] Create call to unit test: test/test_paths.R --- tests/test_paths.R | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 tests/test_paths.R diff --git a/tests/test_paths.R b/tests/test_paths.R new file mode 100644 index 00000000..87dbed4a --- /dev/null +++ b/tests/test_paths.R @@ -0,0 +1,9 @@ +library(testthat) + +path_testthat <- file.path('./tests/testthat/') +files_testthat <- list.files('./tests/testthat/', pattern = 'paths') + +for (i_file in 1:length(files_testthat)) { + source(paste0('./tests/testthat/', files_testthat[i_file])) +} + -- GitLab From 16c3cb1a2a7b81962484944e1f6b6ddfdc2eefd2 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 30 Apr 2024 16:52:19 +0200 Subject: [PATCH 05/16] Remove hardcoded paths in load_sample(), test_check_number_of_independent_varifications.R and write_autosubmit_conf() --- modules/Loading/R/load_sample.R | 3 +-- tools/test_check_number_of_independent_verifications.R | 4 ++-- tools/write_autosubmit_conf.R | 6 +++--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/modules/Loading/R/load_sample.R b/modules/Loading/R/load_sample.R index e0d906d3..e1a8fe65 100644 --- a/modules/Loading/R/load_sample.R +++ b/modules/Loading/R/load_sample.R @@ -1,8 +1,7 @@ -## TODO: remove paths to personal scratchs -source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") # Load required libraries/funs source("tools/CST_ChangeDimName.R") source("modules/Loading/R/compare_exp_obs_grids.R") +source("modules/Loading/R/get_regrid_params.R") load_sample <- function(recipe) { # Hindcast: diff --git a/tools/test_check_number_of_independent_verifications.R b/tools/test_check_number_of_independent_verifications.R index 846dc5be..0ddc6bbb 100644 --- a/tools/test_check_number_of_independent_verifications.R +++ b/tools/test_check_number_of_independent_verifications.R @@ -1,12 +1,12 @@ library(testthat) test_that("A few combinations", { - source("/esarchive/scratch/nperez/git/auto-s2s/tools/check_recipe.R") + source("tools/check_recipe.R") recipe <- list(Analysis = list(Variables = list(ECVs = list( list(name = 'tas', freq = 'daily_mean')), Indicators = list(list(name = 'gdd'))), Workflow = list(Calibration = 'SBC', Indicators = TRUE))) - conf <- list(code_dir = "/esarchive/scratch/nperez/git/auto-s2s/") + conf <- list(code_dir = "./") expect_equal(check_number_of_dependent_verifications(recipe, conf), list(independent = NULL, dependent = list(list(list(name = 'tas', freq = 'daily_mean'), diff --git a/tools/write_autosubmit_conf.R b/tools/write_autosubmit_conf.R index 5f398197..0ccd8543 100644 --- a/tools/write_autosubmit_conf.R +++ b/tools/write_autosubmit_conf.R @@ -21,8 +21,8 @@ write_autosubmit_conf <- function(recipe, nchunks, # Read autosubmit info for the specific filesystem (e.g. esarchive) auto_specs <- read_yaml("conf/autosubmit.yml")[[recipe$Run$filesystem]] # Output directory - dest_dir <- paste0(auto_specs$experiment_dir, expid, "/conf/") - proj_dir <- paste0(auto_specs$experiment_dir, expid, "/proj/auto-s2s/") + dest_dir <- file.path(auto_specs$experiment_dir, expid, "conf") + proj_dir <- file.path(auto_specs$experiment_dir, expid, "proj", "auto-s2s") # Create project directory if it does not exist yet so that chunk_to_recipe # and split_to_recipe files can be created if (!dir.exists(proj_dir)) { @@ -146,7 +146,7 @@ write_autosubmit_conf <- function(recipe, nchunks, paste("##### AUTOSUBMIT CONFIGURATION WRITTEN FOR", expid, "#####")) info(recipe$Run$logger, paste0("You can check your experiment configuration at: ", - "/esarchive/autosubmit/", expid, "/conf/")) + dest_dir)) # Print instructions/commands for user if (recipe$Run$Terminal) { ## TODO: Change SSH message for other environments (outside BSC) -- GitLab From 5f68baabe8c127cdd6f788cbdd656df37bb9d2d3 Mon Sep 17 00:00:00 2001 From: ARIADNA BATALLA FERRES Date: Tue, 30 Apr 2024 17:42:27 +0200 Subject: [PATCH 06/16] Flagged paths rewritten using file.path() instead of paste0() --- modules/Anomalies/Anomalies.R | 4 +- modules/Calibration/Calibration.R | 4 +- modules/Downscaling/Downscaling.R | 4 +- modules/Indices/R/compute_nao.R | 86 ++++++++++++----------- modules/Indices/R/compute_nino.R | 99 ++++++++++++++------------- modules/Scorecards/Scorecards.R | 6 +- modules/Skill/Skill.R | 8 +-- modules/Statistics/Statistics.R | 4 +- tests/testthat/test-hardcoded_paths.R | 13 ++-- 9 files changed, 117 insertions(+), 111 deletions(-) diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index d5f11228..a6411e6c 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -122,8 +122,8 @@ Anomalies <- function(recipe, data) { if (recipe$Analysis$Workflow$Anomalies$save != 'none') { info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Anomalies/") + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, + "outputs", "Anomalies") # Save forecast if ((recipe$Analysis$Workflow$Anomalies$save %in% c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index f1362a22..e6afb210 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -169,8 +169,8 @@ Calibration <- function(recipe, data) { info(recipe$Run$logger, "##### START SAVING CALIBRATED DATA #####") ## TODO: What do we do with the full values? - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Calibration/") + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, + "outputs", "Calibration") if ((recipe$Analysis$Workflow$Calibration$save %in% c('all', 'exp_only', 'fcst_only')) && (!is.null(data$fcst))) { save_forecast(recipe = recipe, data_cube = fcst_calibrated, type = 'fcst') diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 59233dc2..65d654cd 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -278,8 +278,8 @@ Downscaling <- function(recipe, data) { info(recipe$Run$logger, "##### START SAVING DOWNSCALED DATA #####") } ## TODO: What do we do with the full values? - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Downscaling/") + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, + "outputs", "Downscaling") # if ((recipe$Analysis$Workflow$Downscaling$save %in% # c('all', 'exp_only', 'fcst_only')) && (!is.null(data$fcst))) { # save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') diff --git a/modules/Indices/R/compute_nao.R b/modules/Indices/R/compute_nao.R index bdf2bb85..0e24b5fe 100644 --- a/modules/Indices/R/compute_nao.R +++ b/modules/Indices/R/compute_nao.R @@ -117,7 +117,7 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, Dates = data$obs$attrs$Dates, Dataset = recipe$Analysis$Datasets$Reference$name)) if (recipe$Analysis$Workflow$Indices$NAO$save == 'all') { - file_dest <- paste0(recipe$Run$output_dir, "/outputs/Indices/") + file_dest <- file.path(recipe$Run$output_dir, "outputs", "Indices") if (tolower(recipe$Analysis$Horizon) == "seasonal") { # Use startdates param from SaveExp to correctly name the files: if (length(data$hcst$attrs$source_files) == dim(data$hcst$data)['syear']) { @@ -159,7 +159,7 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, recipe$Analysis$Variables$name)]]$long if (plot_ts) { - dir.create(paste0(recipe$Run$output_dir, "/plots/Indices/"), + dir.create(file.path(recipe$Run$output_dir, "plots", "Indices"), showWarnings = F, recursive = T) source("modules/Indices/R/plot_deterministic_forecast.R") for (tstep in 1:dim(nao$hcst$data)['time']) { @@ -179,10 +179,11 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, month.abb[mes], "/", recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) - plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_", - system, "_", recipe$Analysis$Datasets$Reference$name, - "_s", recipe$Analysis$Time$sdate, "_ftime", - sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), ".pdf") + plotfile <- file.path(recipe$Run$output_dir, "plots", "Indices", + paste0("NAO_", + system, "_", recipe$Analysis$Datasets$Reference$name, + "_s", recipe$Analysis$Time$sdate, "_ftime", + sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), ".pdf")) caption <- paste0("NAO method: ", ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", @@ -196,10 +197,10 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, "Lead time", fmonth, " / Start dates", recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) - plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_", - system, "_", recipe$Analysis$Datasets$Reference$name, - "_ftime", - sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), ".pdf") + plotfile <- file.path(recipe$Run$output_dir, "plots", "Indices", + paste0("NAO_", + system, "_", recipe$Analysis$Datasets$Reference$name, "_ftime", + sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), ".pdf")) caption <- paste0("NAO method: ", ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", @@ -229,7 +230,7 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, source("modules/Visualization/R/tmp/PlotRobinson.R") source("modules/Indices/R/correlation_eno.R") source("modules/Visualization/R/get_proj_code.R") - dir.create(paste0(recipe$Run$output_dir, "/plots/Indices/"), + dir.create(file.path(recipe$Run$output_dir, "plots", "Indices"), showWarnings = F, recursive = T) # Get correct code for stereographic projection projection_code <- get_proj_code(proj_name = "stereographic") @@ -273,11 +274,12 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, " Correlation /", month.abb[mes], "/", recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) - plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", - recipe$Analysis$Variable$name, "_", - recipe$Analysis$Datasets$Reference$name, - "_s", recipe$Analysis$Time$sdate, - "_ftime", fmonth, ".pdf") + plotfile <- file.path(recipe$Run$output_dir, "plots", "Indices", + paste0("NAO_correlation_", + recipe$Analysis$Variable$name, "_", + recipe$Analysis$Datasets$Reference$name, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".pdf")) caption <- paste0("NAO method: ", ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", @@ -292,10 +294,11 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, " Correlation / Start dates ", recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) - plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", - recipe$Analysis$Variable$name, "_", - recipe$Analysis$Datasets$Reference$name, - "_ftime", fmonth, ".pdf") + plotfile <- file.path(recipe$Run$output_dir, "plots", "Indices", + paste0("NAO_correlation_", + recipe$Analysis$Variable$name, "_", + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".pdf")) caption <- paste0("NAO method: ", ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", @@ -331,11 +334,11 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, " Correlation /", month.abb[mes], "/", recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) - plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", - recipe$Analysis$Variable$name, "_ensmean_", - system, - "_s", recipe$Analysis$Time$sdate, - "_ftime", fmonth, ".pdf") + plotfile <- file.path(recipe$Run$output_dir, "plots", "Indices", + paste0("NAO_correlation_", + recipe$Analysis$Variable$name, "_ensmean_", + system, "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".pdf")) caption <- paste0("NAO method: ", ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", @@ -351,11 +354,12 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, " Correlation / Start dates ", recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) - plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", - recipe$Analysis$Variable$name, "_ensmean_", - system, - recipe$Analysis$Datasets$Reference$name, - "_ftime", fmonth, ".pdf") + plotfile <- file.path(recipe$Run$output_dir, "plots", "Indices", + paste0("NAO_correlation_", + recipe$Analysis$Variable$name, "_ensmean_", + system, + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".pdf")) caption <- paste0("NAO method: ", ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", @@ -388,11 +392,12 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, " Correlation /", month.abb[mes], "/", recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) - plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", - recipe$Analysis$Variable$name, "_member_", - system, - "_s", recipe$Analysis$Time$sdate, - "_ftime", fmonth, ".pdf") + plotfile <- file.path(recipe$Run$output_dir, "plots", "Indices", + paste0("NAO_correlation_", + recipe$Analysis$Variable$name, "_member_", + system, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".pdf")) caption <- paste0("NAO method: ", ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", @@ -408,11 +413,12 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp, " Correlation / Start dates ", recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) - plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", - recipe$Analysis$Variable$name, "_member_", - system, - recipe$Analysis$Datasets$Reference$name, - "_ftime", fmonth, ".pdf") + plotfile <- file.path(recipe$Run$output_dir, "plots", "Indices", + paste0("NAO_correlation_", + recipe$Analysis$Variable$name, "_member_", + system, + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".pdf")) caption <- paste0("NAO method: ", ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", diff --git a/modules/Indices/R/compute_nino.R b/modules/Indices/R/compute_nino.R index b47d9ee2..f251ce44 100644 --- a/modules/Indices/R/compute_nino.R +++ b/modules/Indices/R/compute_nino.R @@ -104,7 +104,7 @@ compute_nino <- function(data, recipe, region, standardised = TRUE, Dataset = recipe$Analysis$Datasets$Reference$name)) if (save == 'all') { - file_dest <- paste0(recipe$Run$output_dir, "/outputs/Indices/") + file_dest <- file.path(recipe$Run$output_dir, "outputs", "Indices") # Use startdates param from SaveExp to correctly name the files: if (tolower(recipe$Analysis$Horizon) == "seasonal") { if (length(data$hcst$attrs$source_files) == dim(data$hcst$data)['syear']) { @@ -145,7 +145,7 @@ compute_nino <- function(data, recipe, region, standardised = TRUE, var_name <- conf$vars[[which(names(conf$vars) == recipe$Analysis$Variables$name)]]$long if (plot_ts) { - dir.create(paste0(recipe$Run$output_dir, "/plots/Indices/"), + dir.create(file.path(recipe$Run$output_dir, "plots", "Indices"), showWarnings = F, recursive = T) source("modules/Indices/R/plot_deterministic_forecast.R") for (tstep in 1:dim(nino$hcst$data)['time']) { @@ -165,12 +165,12 @@ compute_nino <- function(data, recipe, region, standardised = TRUE, month.abb[mes], "/", recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) - plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/", - nino_name, "_", - system, "_", recipe$Analysis$Datasets$Reference$name, - "_s", recipe$Analysis$Time$sdate, "_ftime", - sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), - ".pdf") + plotfile <- file.path(recipe$Run$output_dir, "plots", "Indices", + paste0(nino_name, "_", + system, "_", recipe$Analysis$Datasets$Reference$name, + "_s", recipe$Analysis$Time$sdate, "_ftime", + sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), + ".pdf")) caption <- paste0("Nominal start date: 1st of ", month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], "\n", @@ -181,10 +181,12 @@ compute_nino <- function(data, recipe, region, standardised = TRUE, "Lead time", fmonth, "/", recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) - plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/", nino_name, "_", - system, "_", recipe$Analysis$Datasets$Reference$name, - "_ftime", - sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), ".pdf") + plotfile <- file.path(recipe$Run$output_dir, "plots", "Indices", + paste0(nino_name, "_", + system, "_", recipe$Analysis$Datasets$Reference$name, + "_ftime", + sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), + ".pdf")) caption <- paste0("Start date month: ", month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], "\n", @@ -217,7 +219,7 @@ compute_nino <- function(data, recipe, region, standardised = TRUE, # Get code for Robinson projection depending on GEOS/GDAL/PROJ version projection_code <- get_proj_code("robinson") # Avoid rewriten longitudes a shift is neeced: - dir.create(paste0(recipe$Run$output_dir, "/plots/Indices/"), + dir.create(file.path(recipe$Run$output_dir, "plots", "Indices"), showWarnings = F, recursive = T) correl_obs <- Apply(list(data$obs$data, nino$obs$data), target_dims = 'syear', @@ -267,12 +269,13 @@ compute_nino <- function(data, recipe, region, standardised = TRUE, month.abb[mes], "/", recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) - plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/", nino_name, - "_correlation_", - recipe$Analysis$Variable$name, "_", - recipe$Analysis$Datasets$Reference$name, - "_s", recipe$Analysis$Time$sdate, - "_ftime", fmonth, ".pdf") + plotfile <- file.path(recipe$Run$output_dir, "plots", "Indices", + paste0(nino_name, + "_correlation_", + recipe$Analysis$Variable$name, "_", + recipe$Analysis$Datasets$Reference$name, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".pdf")) caption <- paste0("Nominal start date: 1st of ", month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], "\n", @@ -285,11 +288,11 @@ compute_nino <- function(data, recipe, region, standardised = TRUE, "Correlation / Start dates", recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) - plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/", - nino_name, "_correlation_", - recipe$Analysis$Variable$name, "_", - recipe$Analysis$Datasets$Reference$name, - "_ftime", fmonth, ".pdf") + plotfile <- file.path(recipe$Run$output_dir, "plots", "Indices", + paste0(nino_name, "_correlation_", + recipe$Analysis$Variable$name, "_", + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".pdf")) caption <- paste0("Start date: month ", month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], "\n", @@ -325,12 +328,12 @@ compute_nino <- function(data, recipe, region, standardised = TRUE, month.abb[mes], "/", recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) - plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/", - nino_name, "_correlation_", - recipe$Analysis$Variable$name, "_ensmean_", - system, - "_s", recipe$Analysis$Time$sdate, - "_ftime", fmonth, ".pdf") + plotfile <- file.path(recipe$Run$output_dir, "plots", "Indices", + paste0(nino_name, "_correlation_", + recipe$Analysis$Variable$name, "_ensmean_", + system, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".pdf")) caption <- paste0("Ensemble mean\n", "Nominal start date: 1st of ", month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], @@ -343,12 +346,12 @@ compute_nino <- function(data, recipe, region, standardised = TRUE, "Correlation / Start dates", recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) - plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/", - nino_name, "_correlation_", - recipe$Analysis$Variable$name, "_ensmean_", - system, - recipe$Analysis$Datasets$Reference$name, - "_ftime", fmonth, ".pdf") + plotfile <- file.path(recipe$Run$output_dir, "plots", "Indices", + paste0(nino_name, "_correlation_", + recipe$Analysis$Variable$name, "_ensmean_", + system, + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".pdf")) caption <- paste0("Correlation ensemble mean\n", "Start date month: ", month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], @@ -379,12 +382,12 @@ compute_nino <- function(data, recipe, region, standardised = TRUE, month.abb[mes], "/", recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) - plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/", - nino_name, "_correlation_", - recipe$Analysis$Variable$name, "_member_", - system, - "_s", recipe$Analysis$Time$sdate, - "_ftime", fmonth, ".pdf") + plotfile <- file.path(recipe$Run$output_dir, "plots", "Indices", + paste0(nino_name, "_correlation_", + recipe$Analysis$Variable$name, "_member_", + system, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".pdf")) caption <- paste0("Individual members\n", "Nominal start date: 1st of ", month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], @@ -397,12 +400,12 @@ compute_nino <- function(data, recipe, region, standardised = TRUE, "Correlation / Start dates", recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) - plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/", - nino_name, "_correlation_", - recipe$Analysis$Variable$name, "_ensmean_", - system, - recipe$Analysis$Datasets$Reference$name, - "_ftime", fmonth, ".pdf") + plotfile <- file.path(recipe$Run$output_dir, "plots", "Indices", + paste0(nino_name, "_correlation_", + recipe$Analysis$Variable$name, "_ensmean_", + system, + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".pdf")) caption <- paste0("Correlation ensemble mean\n", "Start date month: ", month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R index 37aa421c..02e4f72c 100644 --- a/modules/Scorecards/Scorecards.R +++ b/modules/Scorecards/Scorecards.R @@ -22,9 +22,9 @@ source('modules/Scorecards/R/tmp/ClimPalette.R') Scorecards <- function(recipe) { ## Parameters for loading data files - skill.input.path <- paste0(recipe$Run$output_dir, "/outputs/Skill/") - stats.input.path <- paste0(recipe$Run$output_dir, "/outputs/Statistics/") - output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") + skill.input.path <- file.path(recipe$Run$output_dir, "outputs", "Skill") + stats.input.path <- file.path(recipe$Run$output_dir, "outputs", "Statistics") + output.path <- file.path(recipe$Run$output_dir, "plots", "Scorecards") dir.create(output.path, recursive = T, showWarnings = F) system <- recipe$Analysis$Datasets$System$name reference <- recipe$Analysis$Datasets$Reference$name diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index aa22838a..77cf2af4 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -357,8 +357,8 @@ Skill <- function(recipe, data, agg = 'global') { if (recipe$Analysis$Workflow$Skill$save != 'none') { info(recipe$Run$logger, "##### START SAVING SKILL METRIC #####") } - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Skill/") + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, + "outputs", "Skill") # Separate 'corr' from the rest of the metrics because of extra 'ensemble' dim ## TODO: merge save_metrics() and save_metrics_scorecards() if (recipe$Analysis$Workflow$Skill$save == 'all') { @@ -486,8 +486,8 @@ Probabilities <- function(recipe, data) { info(recipe$Run$logger, "##### START SAVING PERCENTILES AND PROBABILITY CATEGORIES #####") - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Skill/") + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, + "outputs", "Skill") # Save percentiles if (recipe$Analysis$Workflow$Probabilities$save %in% c('all', 'percentiles_only')) { diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index 085bcdc5..462791e6 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -84,8 +84,8 @@ Statistics <- function(recipe, data, agg = 'global') { if (recipe$Analysis$Workflow$Skill$save != 'none') { info(recipe$Run$logger, "##### START SAVING STATISTICS #####") } - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Statistics/") + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, + "outputs", "Statistics") if (recipe$Analysis$Workflow$Statistics$save == 'all') { # Save all statistics diff --git a/tests/testthat/test-hardcoded_paths.R b/tests/testthat/test-hardcoded_paths.R index d8b60962..1345e013 100644 --- a/tests/testthat/test-hardcoded_paths.R +++ b/tests/testthat/test-hardcoded_paths.R @@ -9,17 +9,14 @@ detect_hardcoded_paths <- function() { directories <- c("modules", "tools") # Apply absolute_path_linter() to each directory and save results - files_with_issues <- unlist(lapply(directories, function(dir) { + files_with_issues <- lapply(directories, function(dir) { lint_dir(path = dir, linters = absolute_path_linter(lax = TRUE)) - }), recursive = TRUE) - + }) + # Check results and print out the test outcome if (length(files_with_issues) > 0) { - cat("Test FAILED. Hard-coded paths found in the following files:\n") - issues_report <- vapply(files_with_issues, function(l) { - paste(l$filename, "Line:", l$line_number, "Message:", l$message) - }, character(1)) - print(issues_report) + cat("Test FAILED. Hard-coded paths found:\n") + print(files_with_issues) return(FALSE) } else { cat("Test PASSED😊 No hard-coded paths found in any file.\n") -- GitLab From c648a71104f6293ce5405b07cbad54c97194da5e Mon Sep 17 00:00:00 2001 From: ARIADNA BATALLA FERRES Date: Tue, 30 Apr 2024 17:46:04 +0200 Subject: [PATCH 07/16] More rewritten flagged paths --- tools/divide_recipe.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index 3b2e6eee..1f83d63c 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -51,7 +51,7 @@ divide_recipe <- function(recipe) { # Modify the saving of the individual models in case multimodel is yes or both if (recipe$Analysis$Datasets$Multimodel$execute %in% c(TRUE, 'both')) { # Create directory for multimodel recipes - dir.create(paste0(recipe$Run$output_dir, "/logs/recipes/multimodel/"), + dir.create(file.path(recipe$Run$output_dir, "logs", "recipes", "multimodel"), recursive = TRUE) n_models <- length(recipe$Analysis$Datasets$System) + 1 mm <- tolower(recipe$Analysis$Datasets$Multimodel$approach) @@ -189,11 +189,11 @@ divide_recipe <- function(recipe) { if (all_recipes[[reci]]$Analysis$Datasets$System$name == 'Multimodel') { - recipe_dir <- paste0(recipe$Run$output_dir, "/logs/recipes/multimodel/") + recipe_dir <- file.path(recipe$Run$output_dir, "logs", "recipes", "multimodel") split_to_recipe[split] <- recipe_split split <- split + 1 } else { - recipe_dir <- paste0(recipe$Run$output_dir, "/logs/recipes/") + recipe_dir <- file.path(recipe$Run$output_dir, "logs", "recipes") chunk_to_recipe[chunk] <- recipe_name chunk <- chunk + 1 } @@ -206,8 +206,8 @@ divide_recipe <- function(recipe) { paste("The main recipe has been divided into", length(chunk_to_recipe), "single model atomic recipes, plus", length(split_to_recipe), "multi-model atomic recipes.")) - text <- paste0("Check output directory ", recipe$Run$output_dir, - "/logs/recipes/ to see all the individual atomic recipes.") + text <- paste0("Check output directory ", recipe$Run$output_dir, "/", + file.path("logs", "recipes"), "/", " to see all the individual atomic recipes.") info(recipe$Run$logger, text) ## TODO: Change returns? return(list(n_atomic_recipes = length(chunk_to_recipe), # length(all_recipes) -- GitLab From 3c8d79cb6e32b4c7e2e1655cc76f351b351ad847 Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 2 May 2024 09:49:24 +0200 Subject: [PATCH 08/16] Fix bugs related to use of paste0() instead of file.path() --- conf/autosubmit.yml | 4 ++-- recipes/recipe_multimodel_seasonal.yml | 2 +- tools/check_recipe.R | 6 +++--- tools/write_autosubmit_conf.R | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/conf/autosubmit.yml b/conf/autosubmit.yml index 3e3f1220..db2f503e 100644 --- a/conf/autosubmit.yml +++ b/conf/autosubmit.yml @@ -3,12 +3,12 @@ esarchive: module_version: autosubmit/4.0.98-foss-2015a-Python-3.7.3 auto_version: 4.0.98 conf_format: yaml - experiment_dir: /esarchive/autosubmit/ + experiment_dir: /esarchive/autosubmit userID: bsc32 mars: platform: NORD3 ## TO BE CHANGED module_version: autosubmit/4.0.0b-foss-2015a-Python-3.7.3 ## TO BE CHANGED auto_version: 4.0.0 conf_format: yaml - experiment_dir: /esarchive/autosubmit/ ## TO BE CHANGED + experiment_dir: /esarchive/autosubmit ## TO BE CHANGED userID: bsc32 ## TO BE CHANGED diff --git a/recipes/recipe_multimodel_seasonal.yml b/recipes/recipe_multimodel_seasonal.yml index f974fc48..f7ff4b72 100644 --- a/recipes/recipe_multimodel_seasonal.yml +++ b/recipes/recipe_multimodel_seasonal.yml @@ -67,7 +67,7 @@ Run: # fill only if using autosubmit auto_conf: script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script - expid: a6wq # replace with your EXPID + expid: a6v7 # replace with your EXPID hpc_user: bsc32762 # replace with your hpc username wallclock: 01:00 # hh:mm wallclock_multimodel: 02:00 diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 42cb832c..57415018 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -787,11 +787,11 @@ check_recipe <- function(recipe) { paste("autosubmit expid -H", auto_specs$platform, "-d ")) error_status <- TRUE - } else if (!dir.exists(paste0(auto_specs$experiment_dir, - recipe$Run$auto_conf$expid))) { + } else if (!dir.exists(file.path(auto_specs$experiment_dir, + recipe$Run$auto_conf$expid))) { error(recipe$Run$logger, paste0("No folder in ", auto_specs$experiment_dir, - " for the EXPID", recipe$Run$auto_conf$expid, + " for the EXPID ", recipe$Run$auto_conf$expid, ". Please make sure it is correct.")) error_status <- TRUE } diff --git a/tools/write_autosubmit_conf.R b/tools/write_autosubmit_conf.R index 0ccd8543..f1989642 100644 --- a/tools/write_autosubmit_conf.R +++ b/tools/write_autosubmit_conf.R @@ -132,15 +132,15 @@ write_autosubmit_conf <- function(recipe, nchunks, recipe$Run$auto_conf$hpc_user } else if (conf_type == "proj") { # Section 5: proj - ## modules? Info that goes on script, e.g. output directory + ## modules? Info that goes on script, e.g. directory conf$common$OUTDIR <- recipe$Run$output_dir conf$common$SCRIPT <- recipe$Run$auto_conf$script conf$common$RECIPE <- paste0(recipe$name, ".yml") } # Write config file inside autosubmit dir - write.config(conf, paste0(dest_dir, dest_file), + write.config(conf, file.path(dest_dir, dest_file), write.type = auto_specs$conf_format) - Sys.chmod(paste0(dest_dir, dest_file), mode = "755", use_umask = F) + Sys.chmod(file.path(dest_dir, dest_file), mode = "755", use_umask = F) } info(recipe$Run$logger, paste("##### AUTOSUBMIT CONFIGURATION WRITTEN FOR", expid, "#####")) -- GitLab From 8c5dca529dfb6aeb3f1d090b07a92961acf9ce08 Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 2 May 2024 11:03:48 +0200 Subject: [PATCH 09/16] Change paste0() to file.path() in get_filename() --- modules/Saving/R/get_filename.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Saving/R/get_filename.R b/modules/Saving/R/get_filename.R index b2345691..4c2dd726 100644 --- a/modules/Saving/R/get_filename.R +++ b/modules/Saving/R/get_filename.R @@ -76,6 +76,6 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { "probs" = {filename <- paste0(var, gg, "-probs_", date)}, "bias" = {filename <- paste0(var, gg, "-bias_", date)}) } - return(paste0(dir, filename, ".nc")) + return(file.path(dir, paste0(filename, ".nc"))) } -- GitLab From d6bed5c99fbcc4cd15583667ac7909f25a539677 Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 3 May 2024 12:28:43 +0200 Subject: [PATCH 10/16] Change pending paste0() to file.path() to fix pipeline --- modules/Saving/R/get_dir.R | 2 +- modules/Saving/R/save_metrics.R | 3 ++- modules/Visualization/R/plot_ensemble_mean.R | 18 ++++++++---------- modules/Visualization/R/plot_metrics.R | 6 +++--- .../R/plot_most_likely_terciles_map.R | 3 ++- modules/Visualization/Visualization.R | 2 +- recipes/recipe_multimodel_seasonal.yml | 2 +- 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/modules/Saving/R/get_dir.R b/modules/Saving/R/get_dir.R index 35bede6f..a0b054c6 100644 --- a/modules/Saving/R/get_dir.R +++ b/modules/Saving/R/get_dir.R @@ -17,7 +17,7 @@ get_dir <- function(recipe, variable, agg = "global") { # Define output dir name accordint to Scorecards format dict <- read_yaml("conf/output_dictionaries/scorecards.yml") # system <- dict$System[[recipe$Analysis$Datasets$System$name]]$short_name - dir <- paste0(outdir, system, "/", reference, "/", calib.method, "/", variable, "/") + dir <- file.path(outdir, system, reference, calib.method, variable) } else { # Default generic output format based on FOCUS # Get startdate or hindcast period diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index 57063a0c..b3d9fb97 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -101,8 +101,9 @@ save_metrics <- function(recipe, } ## TODO: Maybe 'scorecards' condition could go here to further simplify ## the code - extra_string <- get_filename(NULL, recipe, variable, + extra_string <- get_filename("", recipe, variable, fcst.sdate, agg, names(subset_metric)[[i]]) + browser() SaveExp(data = subset_metric[[i]], destination = outdir, Dates = dates, coords = c(data_cube$coords['longitude'], diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index 59881864..8186838d 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -81,18 +81,16 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } for (i_syear in start_date) { + i_var_ens_mean <- ClimProjDiags::Subset(var_ens_mean, + along = c("syear"), + indices = which(start_date == i_syear), + drop = 'selected') if (length(start_date) == 1) { - i_var_ens_mean <- ClimProjDiags::Subset(var_ens_mean, - along = c("syear"), - indices = which(start_date == i_syear), - drop = 'selected') - outfile <- paste0(outdir[[var]], "forecast_ensemble_mean-", start_date) + outfile <- file.path(outdir[[var]], + paste0("forecast_ensemble_mean-", start_date)) } else { - i_var_ens_mean <- ClimProjDiags::Subset(var_ens_mean, - along = c("syear"), - indices = which(start_date == i_syear), - drop = 'selected') - outfile <- paste0(outdir[[var]], "forecast_ensemble_mean-", i_syear) + outfile <- file.path(outdir[[var]], + paste0("forecast_ensemble_mean-", i_syear)) } # Mask if (!is.null(mask)) { diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index c00e8164..141987d0 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -1,7 +1,7 @@ library(stringr) plot_metrics <- function(recipe, data_cube, metrics, - outdir, significance = F, output_conf) { + outdir, significance = F, output_conf) { # recipe: Auto-S2S recipe # archive: Auto-S2S archive # data_cube: s2dv_cube object with the corresponding hindcast data @@ -194,9 +194,9 @@ plot_metrics <- function(recipe, data_cube, metrics, } # Define output file name and titles if (tolower(recipe$Analysis$Horizon) == "seasonal") { - outfile <- paste0(outdir[var], name, "-", month_label) + outfile <- file.path(outdir[var], paste0(name, "-", month_label)) } else { - outfile <- paste0(outdir[var], name) + outfile <- file.path(outdir[var], name) } # Get variable name and long name var_name <- data_cube$attrs$Variable$varName[[var]] diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index c9ce70f2..0e6e27c5 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -87,7 +87,8 @@ plot_most_likely_terciles <- function(recipe, along = c("syear"), indices = which(start_date == i_syear), drop = 'selected') - outfile <- paste0(outdir[[var]], "forecast_most_likely_tercile-", i_syear) + outfile <- file.path(outdir[[var]], + paste0("forecast_most_likely_tercile-", i_syear)) # Mask if (!is.null(mask)) { outfile <- paste0(outfile, "_rpssmask") diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 3750baea..109251c9 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -55,7 +55,7 @@ Visualization <- function(recipe, # Get plot types and create output directories plots <- strsplit(recipe$Analysis$Workflow$Visualization$plots, ", | |,")[[1]] ## TODO: Do not modify output dir here - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/plots/") + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "plots") outdir <- get_dir(recipe = recipe, variable = data$hcst$attrs$Variable$varName) for (directory in outdir) { diff --git a/recipes/recipe_multimodel_seasonal.yml b/recipes/recipe_multimodel_seasonal.yml index f7ff4b72..3b14e09b 100644 --- a/recipes/recipe_multimodel_seasonal.yml +++ b/recipes/recipe_multimodel_seasonal.yml @@ -68,7 +68,7 @@ Run: auto_conf: script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script expid: a6v7 # replace with your EXPID - hpc_user: bsc32762 # replace with your hpc username + hpc_user: bsc032762 # replace with your hpc username wallclock: 01:00 # hh:mm wallclock_multimodel: 02:00 processors_per_job: 4 -- GitLab From 16218459b2442d7ed3a3a1b04a26a3fe9003e215 Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 3 May 2024 14:26:49 +0200 Subject: [PATCH 11/16] Move lintr tests and add to git pipeline --- .gitlab-ci.yml | 13 +++++++++++++ tests/{test_paths.R => test_lintr.R} | 2 +- ...rdcoded_paths.R => test-lintr_hardcoded_paths.R} | 0 3 files changed, 14 insertions(+), 1 deletion(-) rename tests/{test_paths.R => test_lintr.R} (71%) rename tests/testthat/{test-hardcoded_paths.R => test-lintr_hardcoded_paths.R} (100%) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 997870c8..90f49c1f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -26,3 +26,16 @@ unit-test-decadal: # This job runs in the test stage. - module list - echo "Running decadal unit tests..." - Rscript ./tests/test_decadal.R + +unit-test-lintr: # This job runs in the test stage. + stage: test + script: + - echo "Loading modules..." + - module load R/4.1.2-foss-2015a-bare + - module load CDO/1.9.8-foss-2015a + - module load GEOS/3.7.2-foss-2015a-Python-3.7.3 + - module load GDAL/2.2.1-foss-2015a + - module load PROJ/4.8.0-foss-2015a + - module list + - echo "Running lintr unit tests..." + - Rscript ./tests/test_lintr.R diff --git a/tests/test_paths.R b/tests/test_lintr.R similarity index 71% rename from tests/test_paths.R rename to tests/test_lintr.R index 87dbed4a..7d0d1349 100644 --- a/tests/test_paths.R +++ b/tests/test_lintr.R @@ -1,7 +1,7 @@ library(testthat) path_testthat <- file.path('./tests/testthat/') -files_testthat <- list.files('./tests/testthat/', pattern = 'paths') +files_testthat <- list.files('./tests/testthat/', pattern = 'lintr') for (i_file in 1:length(files_testthat)) { source(paste0('./tests/testthat/', files_testthat[i_file])) diff --git a/tests/testthat/test-hardcoded_paths.R b/tests/testthat/test-lintr_hardcoded_paths.R similarity index 100% rename from tests/testthat/test-hardcoded_paths.R rename to tests/testthat/test-lintr_hardcoded_paths.R -- GitLab From ee9268f348d89c0bfb0e2c64b1e6bc45060c2b19 Mon Sep 17 00:00:00 2001 From: ARIADNA BATALLA FERRES Date: Fri, 3 May 2024 16:11:19 +0200 Subject: [PATCH 12/16] Fixed if() clause and added directory names to output --- tests/testthat/test-hardcoded_paths.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-hardcoded_paths.R b/tests/testthat/test-hardcoded_paths.R index 1345e013..6616e747 100644 --- a/tests/testthat/test-hardcoded_paths.R +++ b/tests/testthat/test-hardcoded_paths.R @@ -13,13 +13,16 @@ detect_hardcoded_paths <- function() { lint_dir(path = dir, linters = absolute_path_linter(lax = TRUE)) }) + # Assign names to the list elements + names(files_with_issues) <- directories + # Check results and print out the test outcome - if (length(files_with_issues) > 0) { + if (any(sapply(files_with_issues, function(x) length(x) > 0))) { cat("Test FAILED. Hard-coded paths found:\n") print(files_with_issues) return(FALSE) } else { - cat("Test PASSED😊 No hard-coded paths found in any file.\n") + cat("Test PASSED🥳 No hard-coded paths found in any file.\n") return(TRUE) } } -- GitLab From 3dd18f10b357f1570adada38fc2d9d34b0efb04e Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 6 May 2024 16:09:33 +0200 Subject: [PATCH 13/16] Add test_that() clause --- .gitlab-ci.yml | 2 +- tests/testthat/test-lintr_hardcoded_paths.R | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 90f49c1f..fe4abf17 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -37,5 +37,5 @@ unit-test-lintr: # This job runs in the test stage. - module load GDAL/2.2.1-foss-2015a - module load PROJ/4.8.0-foss-2015a - module list - - echo "Running lintr unit tests..." + - echo "Running lint tests..." - Rscript ./tests/test_lintr.R diff --git a/tests/testthat/test-lintr_hardcoded_paths.R b/tests/testthat/test-lintr_hardcoded_paths.R index 6616e747..f5a62cef 100644 --- a/tests/testthat/test-lintr_hardcoded_paths.R +++ b/tests/testthat/test-lintr_hardcoded_paths.R @@ -18,13 +18,15 @@ detect_hardcoded_paths <- function() { # Check results and print out the test outcome if (any(sapply(files_with_issues, function(x) length(x) > 0))) { - cat("Test FAILED. Hard-coded paths found:\n") + cat("Test FAILED. Absolute paths found:\n") print(files_with_issues) return(FALSE) } else { - cat("Test PASSED🥳 No hard-coded paths found in any file.\n") + cat("No absolute paths found in any file.\n") return(TRUE) } } -detect_hardcoded_paths() +test_that("Check code for absolute paths", { + expect_true(detect_hardcoded_paths()) +}) -- GitLab From 904288e4bf592d08d9dc0c680dbc6b1759758494 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 8 May 2024 09:34:17 +0200 Subject: [PATCH 14/16] Fix atomic recipe file path --- tools/divide_recipe.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index 1f83d63c..ddaff215 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -198,7 +198,8 @@ divide_recipe <- function(recipe) { chunk <- chunk + 1 } write_yaml(all_recipes[[reci]], - paste0(recipe_dir, "atomic_recipe_", recipe_name, ".yml")) + file.path(recipe_dir, + paste0("atomic_recipe_", recipe_name, ".yml"))) } # Print information for user -- GitLab From c496496f298084a08a399c15b82f7690a39031bd Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 24 May 2024 10:41:39 +0200 Subject: [PATCH 15/16] Remove local copy of CST_SaveExp() --- modules/Saving/R/tmp/CST_SaveExp.R | 915 ----------------------------- 1 file changed, 915 deletions(-) delete mode 100644 modules/Saving/R/tmp/CST_SaveExp.R diff --git a/modules/Saving/R/tmp/CST_SaveExp.R b/modules/Saving/R/tmp/CST_SaveExp.R deleted file mode 100644 index 2ffd8fa8..00000000 --- a/modules/Saving/R/tmp/CST_SaveExp.R +++ /dev/null @@ -1,915 +0,0 @@ -#'Save objects of class 's2dv_cube' to data in NetCDF format -#' -#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} -#' -#'@description This function allows to divide and save a object of class -#''s2dv_cube' into a NetCDF file, allowing to reload the saved data using -#'\code{CST_Start} or \code{CST_Load} functions. It also allows to save any -#''s2dv_cube' object that follows the NetCDF attributes conventions. -#' -#'@param data An object of class \code{s2dv_cube}. -#'@param destination A character string containing the directory name in which -#' to save the data. NetCDF file for each starting date are saved into the -#' folder tree: 'destination/Dataset/variable/'. By default the function -#' saves the data into the working directory. -#'@param sdate_dim A character string indicating the name of the start date -#' dimension. By default, it is set to 'sdate'. It can be NULL if there is no -#' start date dimension. -#'@param ftime_dim A character string indicating the name of the forecast time -#' dimension. If 'Dates' are used, it can't be NULL. If there is no forecast -#' time dimension, 'Dates' will be set to NULL and will not be used. By -#' default, it is set to 'time'. -#'@param dat_dim A character string indicating the name of dataset dimension. -#' It can be NULL if there is no dataset dimension. By default, it is set to -#' 'dataset'. -#'@param var_dim A character string indicating the name of variable dimension. -#' It can be NULL if there is no variable dimension. By default, it is set to -#' 'var'. -#'@param memb_dim A character string indicating the name of the member -#' dimension. It can be NULL if there is no member dimension. By default, it is -#' set to 'member'. -#'@param startdates A vector of dates that will be used for the filenames -#' when saving the data in multiple files (single_file = FALSE). It must be a -#' vector of the same length as the start date dimension of data. It must be a -#' vector of class \code{Dates}, \code{'POSIXct'} or character with lenghts -#' between 1 and 10. If it is NULL, the coordinate corresponding the the start -#' date dimension or the first Date of each time step will be used as the name -#' of the files. It is NULL by default. -#'@param single_file A logical value indicating if all object is saved in a -#' single file (TRUE) or in multiple files (FALSE). When it is FALSE, -#' the array is separated for datasets, variable and start date. When there are -#' no specified time dimensions, the data will be saved in a single file by -#' default. The output file name when 'single_file' is TRUE is a character -#' string containing: '__.nc'; when it is FALSE, -#' it is '_.nc'. It is FALSE by default. -#'@param drop_dims (optional) A vector of character strings indicating the -#' dimension names of length 1 that need to be dropped in order that they don't -#' appear in the netCDF file. Only is allowed to drop dimensions that are not -#' used in the computation. The dimensions used in the computation are the ones -#' specified in: sdate_dim, ftime_dim, dat_dim, var_dim and memb_dim. It is -#' NULL by default. -#'@param extra_string (Optional) A character string to be included as part of -#' the file name, for instance, to identify member or realization. When -#' single_file is TRUE, the 'extra_string' will substitute all the default -#' file name; when single_file is FALSE, the 'extra_string' will be added -#' in the file name as: '__.nc'. It is NULL by -#' default. -#'@param units_hours_since (Optional) A logical value only available for the -#' case: 'Dates' have forecast time and start date dimension, 'single_file' is -#' TRUE and 'time_bounds' are not used. When it is TRUE, it saves the forecast -#' time with units of 'hours since'; if it is FALSE, the time units will be a -#' number of time steps with its corresponding frequency (e.g. n days, n months -#' or n hours). It is FALSE by default. -#'@param global_attrs (Optional) A list with elements containing the global -#' attributes to be saved in the NetCDF. -#' -#'@return Multiple or single NetCDF files containing the data array.\cr -#'\item{\code{single_file is TRUE}}{ -#' All data is saved in a single file located in the specified destination -#' path with the following name (by default): -#' '__.nc'. Multiple variables -#' are saved separately in the same file. The forecast time units -#' are calculated from each start date (if sdate_dim is not NULL) or from -#' the time step. If 'units_hours_since' is TRUE, the forecast time units -#' will be 'hours since '. If 'units_hours_since' is FALSE, -#' the forecast time units are extracted from the frequency of the time steps -#' (hours, days, months); if no frequency is found, the units will be ’hours -#' since’. When the time units are 'hours since' the time ateps are assumed to -#' be equally spaced. -#'} -#'\item{\code{single_file is FALSE}}{ -#' The data array is subset and stored into multiple files. Each file -#' contains the data subset for each start date, variable and dataset. Files -#' with different variables and datasets are stored in separated directories -#' within the following directory tree: 'destination/Dataset/variable/'. -#' The name of each file will be by default: '_.nc'. -#' The forecast time units are calculated from each start date (if sdate_dim -#' is not NULL) or from the time step. The forecast time units will be 'hours -#' since '. -#'} -#' -#'@seealso \code{\link[startR]{Start}}, \code{\link{as.s2dv_cube}} and -#'\code{\link{s2dv_cube}} -#' -#'@examples -#'data <- lonlat_temp_st$exp -#'CST_SaveExp(data = data, ftime_dim = 'ftime', var_dim = 'var', -#' dat_dim = 'dataset', sdate_dim = 'sdate') -#' -#'@export -CST_SaveExp <- function(data, destination = "./", startdates = NULL, - sdate_dim = 'sdate', ftime_dim = 'time', - memb_dim = 'member', dat_dim = 'dataset', - var_dim = 'var', drop_dims = NULL, - single_file = FALSE, extra_string = NULL, - global_attrs = NULL, units_hours_since = FALSE) { - # Check 's2dv_cube' - if (!inherits(data, 's2dv_cube')) { - stop("Parameter 'data' must be of the class 's2dv_cube'.") - } - # Check object structure - if (!all(c('data', 'attrs') %in% names(data))) { - stop("Parameter 'data' must have at least 'data' and 'attrs' elements ", - "within the 's2dv_cube' structure.") - } - if (!inherits(data$attrs, 'list')) { - stop("Level 'attrs' must be a list with at least 'Dates' element.") - } - # metadata - if (!is.null(data$attrs$Variable$metadata)) { - if (!inherits(data$attrs$Variable$metadata, 'list')) { - stop("Element metadata from Variable element in attrs must be a list.") - } - } - # Dates - if (is.null(data$attrs$Dates)) { - stop("Element 'Dates' from 'attrs' level cannot be NULL.") - } - if (is.null(dim(data$attrs$Dates))) { - stop("Element 'Dates' from 'attrs' level must have time dimensions.") - } - # sdate_dim - if (!is.null(sdate_dim)) { - if (!is.character(sdate_dim)) { - stop("Parameter 'sdate_dim' must be a character string.") - } - } - # startdates - if (is.null(startdates)) { - if (is.character(data$coords[[sdate_dim]])) { - startdates <- data$coords[[sdate_dim]] - } - } - - SaveExp(data = data$data, - destination = destination, - coords = data$coords, - Dates = data$attrs$Dates, - time_bounds = data$attrs$time_bounds, - startdates = startdates, - varname = data$attrs$Variable$varName, - metadata = data$attrs$Variable$metadata, - Datasets = data$attrs$Datasets, - sdate_dim = sdate_dim, ftime_dim = ftime_dim, - memb_dim = memb_dim, - dat_dim = dat_dim, var_dim = var_dim, - drop_dims = drop_dims, - single_file = single_file, - extra_string = extra_string, - global_attrs = global_attrs, - units_hours_since = units_hours_since) -} -#'Save a multidimensional array with metadata to data in NetCDF format -#'@description This function allows to save a data array with metadata into a -#'NetCDF file, allowing to reload the saved data using \code{Start} function -#'from StartR package. If the original 's2dv_cube' object has been created from -#'\code{CST_Load()}, then it can be reloaded with \code{Load()}. -#' -#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} -#' -#'@param data A multi-dimensional array with named dimensions. -#'@param destination A character string indicating the path where to store the -#' NetCDF files. -#'@param coords A named list with elements of the coordinates corresponding to -#' the dimensions of the data parameter. The names and length of each element -#' must correspond to the names of the dimensions. If any coordinate is not -#' provided, it is set as an index vector with the values from 1 to the length -#' of the corresponding dimension. -#'@param Dates A named array of dates with the corresponding sdate and forecast -#' time dimension. If there is no sdate_dim, you can set it to NULL. -#' It must have ftime_dim dimension. -#'@param time_bounds (Optional) A list of two arrays of dates containing -#' the lower (first array) and the upper (second array) time bounds -#' corresponding to Dates. Each array must have the same dimensions as Dates. -#' If 'Dates' parameter is NULL, 'time_bounds' are not used. It is NULL by -#' default. -#'@param startdates A vector of dates that will be used for the filenames -#' when saving the data in multiple files (single_file = FALSE). It must be a -#' vector of the same length as the start date dimension of data. It must be a -#' vector of class \code{Dates}, \code{'POSIXct'} or character with lenghts -#' between 1 and 10. If it is NULL, the coordinate corresponding the the start -#' date dimension or the first Date of each time step will be used as the name -#' of the files. It is NULL by default. -#'@param varname A character string indicating the name of the variable to be -#' saved. -#'@param metadata A named list where each element is a variable containing the -#' corresponding information. The information must be contained in a list of -#' lists for each variable. -#'@param Datasets A vector of character string indicating the names of the -#' datasets. -#'@param sdate_dim A character string indicating the name of the start date -#' dimension. By default, it is set to 'sdate'. It can be NULL if there is no -#' start date dimension. -#'@param ftime_dim A character string indicating the name of the forecast time -#' dimension. By default, it is set to 'time'. It can be NULL if there is no -#' forecast time dimension. -#'@param dat_dim A character string indicating the name of dataset dimension. -#' By default, it is set to 'dataset'. It can be NULL if there is no dataset -#' dimension. -#'@param var_dim A character string indicating the name of variable dimension. -#' By default, it is set to 'var'. It can be NULL if there is no variable -#' dimension. -#'@param memb_dim A character string indicating the name of the member -#' dimension. By default, it is set to 'member'. It can be NULL if there is no -#' member dimension. -#'@param drop_dims (optional) A vector of character strings indicating the -#' dimension names of length 1 that need to be dropped in order that they don't -#' appear in the netCDF file. Only is allowed to drop dimensions that are not -#' used in the computation. The dimensions used in the computation are the ones -#' specified in: sdate_dim, ftime_dim, dat_dim, var_dim and memb_dim. It is -#' NULL by default. -#'@param single_file A logical value indicating if all object is saved in a -#' single file (TRUE) or in multiple files (FALSE). When it is FALSE, -#' the array is separated for datasets, variable and start date. When there are -#' no specified time dimensions, the data will be saved in a single file by -#' default. The output file name when 'single_file' is TRUE is a character -#' string containing: '__.nc'; when it is FALSE, -#' it is '_.nc'. It is FALSE by default. -#'@param extra_string (Optional) A character string to be included as part of -#' the file name, for instance, to identify member or realization. When -#' single_file is TRUE, the 'extra_string' will substitute all the default -#' file name; when single_file is FALSE, the 'extra_string' will be added -#' in the file name as: '__.nc'. It is NULL by -#' default. -#'@param global_attrs (Optional) A list with elements containing the global -#' attributes to be saved in the NetCDF. -#'@param units_hours_since (Optional) A logical value only available for the -#' case: Dates have forecast time and start date dimension, single_file is -#' TRUE and 'time_bounds' is NULL. When it is TRUE, it saves the forecast time -#' with units of 'hours since'; if it is FALSE, the time units will be a number -#' of time steps with its corresponding frequency (e.g. n days, n months or n -#' hours). It is FALSE by default. -#' -#'@return Multiple or single NetCDF files containing the data array.\cr -#'\item{\code{single_file is TRUE}}{ -#' All data is saved in a single file located in the specified destination -#' path with the following name (by default): -#' '__.nc'. Multiple variables -#' are saved separately in the same file. The forecast time units -#' are calculated from each start date (if sdate_dim is not NULL) or from -#' the time step. If 'units_hours_since' is TRUE, the forecast time units -#' will be 'hours since '. If 'units_hours_since' is FALSE, -#' the forecast time units are extracted from the frequency of the time steps -#' (hours, days, months); if no frequency is found, the units will be ’hours -#' since’. When the time units are 'hours since' the time ateps are assumed to -#' be equally spaced. -#'} -#'\item{\code{single_file is FALSE}}{ -#' The data array is subset and stored into multiple files. Each file -#' contains the data subset for each start date, variable and dataset. Files -#' with different variables and datasets are stored in separated directories -#' within the following directory tree: 'destination/Dataset/variable/'. -#' The name of each file will be by default: '_.nc'. -#' The forecast time units are calculated from each start date (if sdate_dim -#' is not NULL) or from the time step. The forecast time units will be 'hours -#' since '. -#'} -#' -#'@examples -#'data <- lonlat_temp_st$exp$data -#'lon <- lonlat_temp_st$exp$coords$lon -#'lat <- lonlat_temp_st$exp$coords$lat -#'coords <- list(lon = lon, lat = lat) -#'Datasets <- lonlat_temp_st$exp$attrs$Datasets -#'varname <- 'tas' -#'Dates <- lonlat_temp_st$exp$attrs$Dates -#'metadata <- lonlat_temp_st$exp$attrs$Variable$metadata -#'SaveExp(data = data, coords = coords, Datasets = Datasets, varname = varname, -#' Dates = Dates, metadata = metadata, single_file = TRUE, -#' ftime_dim = 'ftime', var_dim = 'var', dat_dim = 'dataset') -#' -#'@import easyNCDF -#'@importFrom s2dv Reorder -#'@import multiApply -#'@importFrom ClimProjDiags Subset -#'@export -SaveExp <- function(data, destination = "./", coords = NULL, - Dates = NULL, time_bounds = NULL, startdates = NULL, - varname = NULL, metadata = NULL, Datasets = NULL, - sdate_dim = 'sdate', ftime_dim = 'time', - memb_dim = 'member', dat_dim = 'dataset', var_dim = 'var', - drop_dims = NULL, single_file = FALSE, extra_string = NULL, - global_attrs = NULL, units_hours_since = FALSE) { - ## Initial checks - # data - if (is.null(data)) { - stop("Parameter 'data' cannot be NULL.") - } - dimnames <- names(dim(data)) - if (is.null(dimnames)) { - stop("Parameter 'data' must be an array with named dimensions.") - } - if (!is.null(attributes(data)$dimensions)) { - attributes(data)$dimensions <- NULL - } - # destination - if (!is.character(destination) | length(destination) > 1) { - stop("Parameter 'destination' must be a character string of one element ", - "indicating the name of the file (including the folder if needed) ", - "where the data will be saved.") - } - # drop_dims - if (!is.null(drop_dims)) { - if (!is.character(drop_dims) | any(!drop_dims %in% names(dim(data)))) { - warning("Parameter 'drop_dims' must be character string containing ", - "the data dimension names to be dropped. It will not be used.") - } else if (!all(dim(data)[drop_dims] %in% 1)) { - warning("Parameter 'drop_dims' can only contain dimension names ", - "that are of length 1. It will not be used.") - } else if (any(drop_dims %in% c(ftime_dim, sdate_dim, dat_dim, memb_dim, var_dim))) { - warning("Parameter 'drop_dims' contains dimensions used in the computation. ", - "It will not be used.") - drop_dims <- NULL - } else { - data <- Subset(x = data, along = drop_dims, - indices = lapply(1:length(drop_dims), function(x) 1), - drop = 'selected') - dimnames <- names(dim(data)) - } - } - # coords - if (!is.null(coords)) { - if (!inherits(coords, 'list')) { - stop("Parameter 'coords' must be a named list of coordinates.") - } - if (is.null(names(coords))) { - stop("Parameter 'coords' must have names corresponding to coordinates.") - } - } else { - coords <- sapply(dimnames, function(x) 1:dim(data)[x]) - } - # varname - if (is.null(varname)) { - varname <- 'X' - } else if (length(varname) > 1) { - multiple_vars <- TRUE - } else { - multiple_vars <- FALSE - } - if (!all(sapply(varname, is.character))) { - stop("Parameter 'varname' must be a character string with the ", - "variable names.") - } - # single_file - if (!inherits(single_file, 'logical')) { - warning("Parameter 'single_file' must be a logical value. It will be ", - "set as FALSE.") - single_file <- FALSE - } - # extra_string - if (!is.null(extra_string)) { - if (!is.character(extra_string)) { - stop("Parameter 'extra_string' must be a character string.") - } - } - # global_attrs - if (!is.null(global_attrs)) { - if (!inherits(global_attrs, 'list')) { - stop("Parameter 'global_attrs' must be a list.") - } - } - - ## Dimensions checks - # Spatial coordinates - if (!any(dimnames %in% .KnownLonNames()) | - !any(dimnames %in% .KnownLatNames())) { - lon_dim <- NULL - lat_dim <- NULL - } else { - lon_dim <- dimnames[which(dimnames %in% .KnownLonNames())] - lat_dim <- dimnames[which(dimnames %in% .KnownLatNames())] - } - # ftime_dim - if (!is.null(ftime_dim)) { - if (!is.character(ftime_dim)) { - stop("Parameter 'ftime_dim' must be a character string.") - } - if (!all(ftime_dim %in% dimnames)) { - stop("Parameter 'ftime_dim' is not found in 'data' dimension. Set it ", - "as NULL if there is no forecast time dimension.") - } - } - # sdate_dim - if (!is.null(sdate_dim)) { - if (!is.character(sdate_dim)) { - stop("Parameter 'sdate_dim' must be a character string.") - } - if (!all(sdate_dim %in% dimnames)) { - stop("Parameter 'sdate_dim' is not found in 'data' dimension.") - } - } - # memb_dim - if (!is.null(memb_dim)) { - if (!is.character(memb_dim)) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!all(memb_dim %in% dimnames)) { - stop("Parameter 'memb_dim' is not found in 'data' dimension. Set it ", - "as NULL if there is no member dimension.") - } - } - # dat_dim - if (!is.null(dat_dim)) { - if (!is.character(dat_dim)) { - stop("Parameter 'dat_dim' must be a character string.") - } - if (!all(dat_dim %in% dimnames)) { - stop("Parameter 'dat_dim' is not found in 'data' dimension. Set it ", - "as NULL if there is no Datasets dimension.") - } - n_datasets <- dim(data)[dat_dim] - } else { - n_datasets <- 1 - } - # var_dim - if (!is.null(var_dim)) { - if (!is.character(var_dim)) { - stop("Parameter 'var_dim' must be a character string.") - } - if (!all(var_dim %in% dimnames)) { - stop("Parameter 'var_dim' is not found in 'data' dimension. Set it ", - "as NULL if there is no variable dimension.") - } - n_vars <- dim(data)[var_dim] - } else { - n_vars <- 1 - } - # minimum dimensions - if (all(dimnames %in% c(var_dim, dat_dim))) { - if (!single_file) { - warning("Parameter data has only ", - paste(c(var_dim, dat_dim), collapse = ' and '), " dimensions ", - "and it cannot be splitted in multiple files. All data will ", - "be saved in a single file.") - single_file <- TRUE - } - } - # Dates (1): initial checks - if (!is.null(Dates)) { - if (!any(inherits(Dates, "POSIXct"), inherits(Dates, "Date"))) { - stop("Parameter 'Dates' must be of 'POSIXct' or 'Dates' class.") - } - if (is.null(dim(Dates))) { - stop("Parameter 'Dates' must have dimension names.") - } - if (all(is.null(ftime_dim), is.null(sdate_dim))) { - warning("Parameters 'ftime_dim' and 'sdate_dim' can't both be NULL ", - "if 'Dates' are used. 'Dates' will not be used.") - Dates <- NULL - } - # sdate_dim in Dates - if (!is.null(sdate_dim)) { - if (!sdate_dim %in% names(dim(Dates))) { - warning("Parameter 'sdate_dim' is not found in 'Dates' dimension. ", - "Dates will not be used.") - Dates <- NULL - } - } - # ftime_dim in Dates - if (!is.null(ftime_dim)) { - if (!ftime_dim %in% names(dim(Dates))) { - warning("Parameter 'ftime_dim' is not found in 'Dates' dimension. ", - "Dates will not be used.") - Dates <- NULL - } - } - } - # time_bounds - if (!is.null(time_bounds)) { - if (!inherits(time_bounds, 'list')) { - stop("Parameter 'time_bounds' must be a list with two dates arrays.") - } - time_bounds_dims <- lapply(time_bounds, function(x) dim(x)) - if (!identical(time_bounds_dims[[1]], time_bounds_dims[[2]])) { - stop("Parameter 'time_bounds' must have 2 arrays with same dimensions.") - } - if (is.null(Dates)) { - time_bounds <- NULL - } else { - name_tb <- sort(names(time_bounds_dims[[1]])) - name_dt <- sort(names(dim(Dates))) - if (!identical(dim(Dates)[name_dt], time_bounds_dims[[1]][name_tb])) { - stop(paste0("Parameter 'Dates' and 'time_bounds' must have same length ", - "of all dimensions.")) - } - } - } - # Dates (2): Check dimensions - if (!is.null(Dates)) { - if (any(dim(Dates)[!names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] != 1)) { - stop("Parameter 'Dates' can have only 'sdate_dim' and 'ftime_dim' ", - "dimensions of length greater than 1.") - } - # drop dimensions of length 1 different from sdate_dim and ftime_dim - dim(Dates) <- dim(Dates)[names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] - - # add ftime if needed - if (is.null(ftime_dim)) { - warning("A 'time' dimension of length 1 will be added to 'Dates'.") - dim(Dates) <- c(time = 1, dim(Dates)) - dim(data) <- c(time = 1, dim(data)) - dimnames <- names(dim(data)) - ftime_dim <- 'time' - if (!is.null(time_bounds)) { - time_bounds <- lapply(time_bounds, function(x) { - dim(x) <- c(time = 1, dim(x)) - return(x) - }) - } - units_hours_since <- TRUE - } - # add sdate if needed - if (is.null(sdate_dim)) { - if (!single_file) { - dim(Dates) <- c(dim(Dates), sdate = 1) - dim(data) <- c(dim(data), sdate = 1) - dimnames <- names(dim(data)) - sdate_dim <- 'sdate' - if (!is.null(time_bounds)) { - time_bounds <- lapply(time_bounds, function(x) { - dim(x) <- c(dim(x), sdate = 1) - return(x) - }) - } - if (!is.null(startdates)) { - if (length(startdates) != 1) { - warning("Parameter 'startdates' must be of length 1 if 'sdate_dim' is NULL.", - "They won't be used.") - startdates <- NULL - } - } - } - units_hours_since <- TRUE - } - } - # startdates - if (!is.null(Dates)) { - # check startdates - if (is.null(startdates)) { - startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') - } else if (any(nchar(startdates) > 10, nchar(startdates) < 1)) { - warning("Parameter 'startdates' should be a character string containing ", - "the start dates in the format 'yyyy-mm-dd', 'yyyymmdd', 'yyyymm', ", - "'POSIXct' or 'Dates' class. Files will be named with Dates instead.") - startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') - } - } else if (!single_file) { - warning("Dates must be provided if 'data' must be saved in separated files. ", - "All data will be saved in a single file.") - single_file <- TRUE - } - # startdates - if (is.null(startdates)) { - if (is.null(sdate_dim)) { - startdates <- 'XXX' - } else { - startdates <- rep('XXX', dim(data)[sdate_dim]) - } - } else { - if (any(inherits(startdates, "POSIXct"), inherits(startdates, "Date"))) { - startdates <- format(startdates, "%Y%m%d") - } - if (!is.null(sdate_dim)) { - if (dim(data)[sdate_dim] != length(startdates)) { - warning(paste0("Parameter 'startdates' doesn't have the same length ", - "as dimension '", sdate_dim,"', it will not be used.")) - startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') - startdates <- format(startdates, "%Y%m%d") - } - } - } - - # Datasets - if (is.null(Datasets)) { - Datasets <- rep('XXX', n_datasets ) - } - if (inherits(Datasets, 'list')) { - Datasets <- names(Datasets) - } - if (n_datasets > length(Datasets)) { - warning("Dimension 'Datasets' in 'data' is greater than those listed in ", - "element 'Datasets' and the first element will be reused.") - Datasets <- c(Datasets, rep(Datasets[1], n_datasets - length(Datasets))) - } else if (n_datasets < length(Datasets)) { - warning("Dimension 'Datasets' in 'data' is smaller than those listed in ", - "element 'Datasets' and only the firsts elements will be used.") - Datasets <- Datasets[1:n_datasets] - } - - ## NetCDF dimensions definition - excluded_dims <- var_dim - if (!is.null(Dates)) { - excluded_dims <- c(excluded_dims, sdate_dim, ftime_dim) - } - if (!single_file) { - excluded_dims <- c(excluded_dims, dat_dim) - } - - ## Unknown dimensions check - alldims <- c(dat_dim, var_dim, sdate_dim, lon_dim, lat_dim, ftime_dim, memb_dim) - if (!all(dimnames %in% alldims)) { - unknown_dims <- dimnames[which(!dimnames %in% alldims)] - memb_dim <- c(memb_dim, unknown_dims) - } - - filedims <- c(dat_dim, var_dim, sdate_dim, lon_dim, lat_dim, ftime_dim, memb_dim) - filedims <- filedims[which(!filedims %in% excluded_dims)] - - # Delete unneded coords - coords[c(names(coords)[!names(coords) %in% filedims])] <- NULL - out_coords <- NULL - for (i_coord in filedims) { - # vals - if (i_coord %in% names(coords)) { - if (length(coords[[i_coord]]) != dim(data)[i_coord]) { - warning(paste0("Coordinate '", i_coord, "' has different lenght as ", - "its dimension and it will not be used.")) - out_coords[[i_coord]] <- 1:dim(data)[i_coord] - } else if (is.numeric(coords[[i_coord]])) { - out_coords[[i_coord]] <- as.vector(coords[[i_coord]]) - } else { - out_coords[[i_coord]] <- 1:dim(data)[i_coord] - } - } else { - out_coords[[i_coord]] <- 1:dim(data)[i_coord] - } - dim(out_coords[[i_coord]]) <- dim(data)[i_coord] - - ## metadata - if (i_coord %in% names(metadata)) { - if ('variables' %in% names(attributes(metadata[[i_coord]]))) { - # from Start: 'lon' or 'lat' - attrs <- attributes(metadata[[i_coord]])[['variables']] - attrs[[i_coord]]$dim <- NULL - attr(out_coords[[i_coord]], 'variables') <- attrs - } else if (inherits(metadata[[i_coord]], 'list')) { - # from Start and Load: main var - attr(out_coords[[i_coord]], 'variables') <- list(metadata[[i_coord]]) - names(attributes(out_coords[[i_coord]])$variables) <- i_coord - } else if (!is.null(attributes(metadata[[i_coord]]))) { - # from Load - attrs <- attributes(metadata[[i_coord]]) - # We remove because some attributes can't be saved - attrs <- NULL - attr(out_coords[[i_coord]], 'variables') <- list(attrs) - names(attributes(out_coords[[i_coord]])$variables) <- i_coord - } - } - } - - if (!single_file) { - for (i in 1:n_datasets) { - path <- file.path(destination, Datasets[i], varname) - for (j in 1:n_vars) { - if (!dir.exists(path[j])) { - dir.create(path[j], recursive = TRUE) - } - startdates <- gsub("-", "", startdates) - dim(startdates) <- c(length(startdates)) - names(dim(startdates)) <- sdate_dim - if (is.null(dat_dim) & is.null(var_dim)) { - data_subset <- data - } else if (is.null(dat_dim)) { - data_subset <- Subset(data, c(var_dim), list(j), drop = 'selected') - } else if (is.null(var_dim)) { - data_subset <- Subset(data, along = c(dat_dim), list(i), drop = 'selected') - } else { - data_subset <- Subset(data, c(dat_dim, var_dim), list(i, j), drop = 'selected') - } - target <- names(dim(data_subset))[which(!names(dim(data_subset)) %in% c(sdate_dim, ftime_dim))] - target_dims_data <- c(target, ftime_dim) - if (is.null(Dates)) { - input_data <- list(data_subset, startdates) - target_dims <- list(target_dims_data, NULL) - } else if (!is.null(time_bounds)) { - input_data <- list(data_subset, startdates, Dates, - time_bounds[[1]], time_bounds[[2]]) - target_dims = list(target_dims_data, NULL, - ftime_dim, ftime_dim, ftime_dim) - } else { - input_data <- list(data_subset, startdates, Dates) - target_dims = list(target_dims_data, NULL, ftime_dim) - } - Apply(data = input_data, - target_dims = target_dims, - fun = .saveexp, - destination = path[j], - coords = out_coords, - ftime_dim = ftime_dim, - varname = varname[j], - metadata_var = metadata[[varname[j]]], - extra_string = extra_string, - global_attrs = global_attrs) - } - } - } else { - # time_bnds - if (!is.null(time_bounds)) { - time_bnds <- c(time_bounds[[1]], time_bounds[[2]]) - } - # Dates - remove_metadata_dim <- TRUE - if (!is.null(Dates)) { - if (is.null(sdate_dim)) { - sdates <- Dates[1] - # ftime definition - leadtimes <- as.numeric(difftime(Dates, sdates, units = "hours")) - } else { - # sdate definition - sdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') - differ <- as.numeric(difftime(sdates, sdates[1], units = "hours")) - dim(differ) <- dim(data)[sdate_dim] - differ <- list(differ) - names(differ) <- sdate_dim - out_coords <- c(differ, out_coords) - attrs <- list(units = paste('hours since', sdates[1]), - calendar = 'proleptic_gregorian', longname = sdate_dim) - attr(out_coords[[sdate_dim]], 'variables')[[sdate_dim]] <- attrs - # ftime definition - Dates <- Reorder(Dates, c(ftime_dim, sdate_dim)) - differ_ftime <- array(dim = dim(Dates)) - for (i in 1:length(sdates)) { - differ_ftime[, i] <- as.numeric(difftime(Dates[, i], Dates[1, i], - units = "hours")) - } - dim(differ_ftime) <- dim(Dates) - leadtimes <- Subset(differ_ftime, along = sdate_dim, 1, drop = 'selected') - if (!all(apply(differ_ftime, 1, function(x){length(unique(x)) == 1}))) { - warning("Time steps are not equal for all start dates. Only ", - "forecast time values for the first start date will be saved ", - "correctly.") - } - } - if (all(!units_hours_since, is.null(time_bounds))) { - if (all(diff(leadtimes/24) == 1)) { - # daily values - units <- 'days' - leadtimes_vals <- round(leadtimes/24) + 1 - } else if (all(diff(leadtimes/24) %in% c(28, 29, 30, 31))) { - # monthly values - units <- 'months' - leadtimes_vals <- round(leadtimes/(30.437*24)) + 1 - } else { - # other frequency - units <- 'hours' - leadtimes_vals <- leadtimes + 1 - } - } else { - units <- paste('hours since', paste(sdates, collapse = ', ')) - leadtimes_vals <- leadtimes - } - - # Add time_bnds - if (!is.null(time_bounds)) { - if (is.null(sdate_dim)) { - sdates <- Dates[1] - time_bnds <- c(time_bounds[[1]], time_bounds[[2]]) - leadtimes_bnds <- as.numeric(difftime(time_bnds, sdates, units = "hours")) - dim(leadtimes_bnds) <- c(dim(Dates), bnds = 2) - } else { - # assuming they have sdate and ftime - time_bnds <- lapply(time_bounds, function(x) { - x <- Reorder(x, c(ftime_dim, sdate_dim)) - return(x) - }) - time_bnds <- c(time_bounds[[1]], time_bounds[[2]]) - dim(time_bnds) <- c(dim(Dates), bnds = 2) - differ_bnds <- array(dim = c(dim(time_bnds))) - for (i in 1:length(sdates)) { - differ_bnds[, i, ] <- as.numeric(difftime(time_bnds[, i, ], Dates[1, i], - units = "hours")) - } - # NOTE (TODO): Add a warning when they are not equally spaced? - leadtimes_bnds <- Subset(differ_bnds, along = sdate_dim, 1, drop = 'selected') - } - # Add time_bnds - leadtimes_bnds <- Reorder(leadtimes_bnds, c('bnds', ftime_dim)) - leadtimes_bnds <- list(leadtimes_bnds) - names(leadtimes_bnds) <- 'time_bnds' - out_coords <- c(leadtimes_bnds, out_coords) - attrs <- list(units = paste('hours since', paste(sdates, collapse = ', ')), - calendar = 'proleptic_gregorian', - long_name = 'time bounds', unlim = FALSE) - attr(out_coords[['time_bnds']], 'variables')$time_bnds <- attrs - } - # Add ftime var - dim(leadtimes_vals) <- dim(data)[ftime_dim] - leadtimes_vals <- list(leadtimes_vals) - names(leadtimes_vals) <- ftime_dim - out_coords <- c(leadtimes_vals, out_coords) - attrs <- list(units = units, calendar = 'proleptic_gregorian', - longname = ftime_dim, - dim = list(list(name = ftime_dim, unlim = TRUE))) - if (!is.null(time_bounds)) { - attrs$bounds = 'time_bnds' - } - attr(out_coords[[ftime_dim]], 'variables')[[ftime_dim]] <- attrs - for (j in 1:n_vars) { - remove_metadata_dim <- FALSE - metadata[[varname[j]]]$dim <- list(list(name = ftime_dim, unlim = TRUE)) - } - # Reorder ftime_dim to last - if (length(dim(data)) != which(names(dim(data)) == ftime_dim)) { - order <- c(names(dim(data))[which(!names(dim(data)) %in% c(ftime_dim))], ftime_dim) - data <- Reorder(data, order) - } - } - # var definition - extra_info_var <- NULL - for (j in 1:n_vars) { - varname_j <- varname[j] - metadata_j <- metadata[[varname_j]] - if (is.null(var_dim)) { - out_coords[[varname_j]] <- data - } else { - out_coords[[varname_j]] <- Subset(data, var_dim, j, drop = 'selected') - } - if (!is.null(metadata_j)) { - if (remove_metadata_dim) metadata_j$dim <- NULL - attr(out_coords[[varname_j]], 'variables') <- list(metadata_j) - names(attributes(out_coords[[varname_j]])$variables) <- varname_j - } - # Add global attributes - if (!is.null(global_attrs)) { - attributes(out_coords[[varname_j]])$global_attrs <- global_attrs - } - } - if (is.null(extra_string)) { - first_sdate <- startdates[1] - last_sdate <- startdates[length(startdates)] - gsub("-", "", first_sdate) - file_name <- paste0(paste(c(varname, - gsub("-", "", first_sdate), - gsub("-", "", last_sdate)), - collapse = '_'), ".nc") - } else { - nc <- substr(extra_string, nchar(extra_string)-2, nchar(extra_string)) - if (nc == ".nc") { - file_name <- extra_string - } else { - file_name <- paste0(extra_string, ".nc") - } - } - full_filename <- file.path(destination, file_name) - ArrayToNc(out_coords, full_filename) - } -} - -.saveexp <- function(data, coords, destination = "./", - startdates = NULL, dates = NULL, - time_bnds1 = NULL, time_bnds2 = NULL, - ftime_dim = 'time', varname = 'var', - metadata_var = NULL, extra_string = NULL, - global_attrs = NULL) { - remove_metadata_dim <- TRUE - if (!is.null(dates)) { - if (!any(is.null(time_bnds1), is.null(time_bnds2))) { - time_bnds <- c(time_bnds1, time_bnds2) - time_bnds <- as.numeric(difftime(time_bnds, dates[1], units = "hours")) - dim(time_bnds) <- c(dim(data)[ftime_dim], bnds = 2) - time_bnds <- Reorder(time_bnds, c('bnds', ftime_dim)) - time_bnds <- list(time_bnds) - names(time_bnds) <- 'time_bnds' - coords <- c(time_bnds, coords) - attrs <- list(units = paste('hours since', dates[1]), - calendar = 'proleptic_gregorian', - longname = 'time bounds') - attr(coords[['time_bnds']], 'variables')$time_bnds <- attrs - } - # Add ftime_dim - differ <- as.numeric(difftime(dates, dates[1], units = "hours")) - dim(differ) <- dim(data)[ftime_dim] - differ <- list(differ) - names(differ) <- ftime_dim - coords <- c(differ, coords) - attrs <- list(units = paste('hours since', dates[1]), - calendar = 'proleptic_gregorian', - longname = ftime_dim, - dim = list(list(name = ftime_dim, unlim = TRUE))) - if (!is.null(time_bnds1)) { - attrs$bounds = 'time_bnds' - } - attr(coords[[ftime_dim]], 'variables')[[ftime_dim]] <- attrs - metadata_var$dim <- list(list(name = ftime_dim, unlim = TRUE)) - remove_metadata_dim <- FALSE - } - # Add data - coords[[varname]] <- data - if (!is.null(metadata_var)) { - if (remove_metadata_dim) metadata_var$dim <- NULL - attr(coords[[varname]], 'variables') <- list(metadata_var) - names(attributes(coords[[varname]])$variables) <- varname - } - # Add global attributes - if (!is.null(global_attrs)) { - attributes(coords[[varname]])$global_attrs <- global_attrs - } - - if (is.null(extra_string)) { - file_name <- paste0(varname, "_", startdates, ".nc") - } else { - file_name <- paste0(varname, "_", extra_string, "_", startdates, ".nc") - } - full_filename <- file.path(destination, file_name) - ArrayToNc(coords, full_filename) -} \ No newline at end of file -- GitLab From 43ca043842d37ea62935c221121280dccfa4ad51 Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 24 May 2024 10:52:25 +0200 Subject: [PATCH 16/16] fix pipeline --- modules/Saving/Saving.R | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 1995c4d5..5ce98575 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -14,7 +14,6 @@ source("modules/Saving/R/get_time.R") source("modules/Saving/R/get_latlon.R") source("modules/Saving/R/get_global_attributes.R") source("modules/Saving/R/drop_dims.R") -source("modules/Saving/R/tmp/CST_SaveExp.R") Saving <- function(recipe, data, skill_metrics = NULL, -- GitLab